summaryrefslogtreecommitdiff
path: root/src/vm/i386
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm/i386')
-rw-r--r--src/vm/i386/.gitmirror1
-rw-r--r--src/vm/i386/CLRErrorReporting.vrg5
-rw-r--r--src/vm/i386/RedirectedHandledJITCase.asm136
-rw-r--r--src/vm/i386/asmconstants.h485
-rw-r--r--src/vm/i386/asmhelpers.asm2400
-rw-r--r--src/vm/i386/cgencpu.h573
-rw-r--r--src/vm/i386/cgenx86.cpp2257
-rw-r--r--src/vm/i386/excepcpu.h87
-rw-r--r--src/vm/i386/excepx86.cpp3734
-rw-r--r--src/vm/i386/fptext.asm277
-rw-r--r--src/vm/i386/gmsasm.asm37
-rw-r--r--src/vm/i386/gmscpu.h140
-rw-r--r--src/vm/i386/gmsx86.cpp1245
-rw-r--r--src/vm/i386/jithelp.asm2574
-rw-r--r--src/vm/i386/jitinterfacex86.cpp1922
-rw-r--r--src/vm/i386/profiler.cpp336
-rw-r--r--src/vm/i386/remotingx86.cpp225
-rw-r--r--src/vm/i386/stublinkerx86.cpp6806
-rw-r--r--src/vm/i386/stublinkerx86.h781
-rw-r--r--src/vm/i386/virtualcallstubcpu.hpp1077
20 files changed, 25098 insertions, 0 deletions
diff --git a/src/vm/i386/.gitmirror b/src/vm/i386/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/vm/i386/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file
diff --git a/src/vm/i386/CLRErrorReporting.vrg b/src/vm/i386/CLRErrorReporting.vrg
new file mode 100644
index 0000000000..6e45ba967c
--- /dev/null
+++ b/src/vm/i386/CLRErrorReporting.vrg
@@ -0,0 +1,5 @@
+VSREG 7
+
+[HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Eventlog\Application\.NET Runtime 4.0 Error Reporting]
+"EventMessageFile"="[DWFolder.D0DF3458_A845_11D3_8D0A_0050046416B9]DW20.EXE"
+"TypesSupported"=dword:00000007
diff --git a/src/vm/i386/RedirectedHandledJITCase.asm b/src/vm/i386/RedirectedHandledJITCase.asm
new file mode 100644
index 0000000000..80345623e7
--- /dev/null
+++ b/src/vm/i386/RedirectedHandledJITCase.asm
@@ -0,0 +1,136 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+
+; ==++==
+;
+
+;
+; ==--==
+; ***********************************************************************
+; File: RedirectedHandledJITCase.asm
+;
+; ***********************************************************************
+;
+
+; This contains thread-redirecting helper routines that are 100% x86 assembly
+
+ .586
+ .model flat
+
+ include asmconstants.inc
+
+ option casemap:none
+ .code
+
+EXTERN _GetCurrentSavedRedirectContext@0:PROC
+
+;
+; WARNING!! These functions immediately ruin thread unwindability. This is
+; WARNING!! OK as long as there is a mechanism for saving the thread context
+; WARNING!! prior to running these functions as well as a mechanism for
+; WARNING!! restoring the context prior to any stackwalk. This means that
+; WARNING!! we need to ensure that no GC can occur while the stack is
+; WARNING!! unwalkable. This further means that we cannot allow any exception
+; WARNING!! to occure when the stack is unwalkable
+;
+
+
+; If you edit this macro, make sure you update GetCONTEXTFromRedirectedStubStackFrame.
+; This function is used by both the personality routine and the debugger to retrieve the original CONTEXT.
+GenerateRedirectedHandledJITCaseStub MACRO reason
+
+EXTERN ?RedirectedHandledJITCaseFor&reason&@Thread@@CGXXZ:proc
+
+ ALIGN 4
+_RedirectedHandledJITCaseFor&reason&_Stub@0 PROC PUBLIC
+
+ push eax ; where to stuff the fake return address
+ push ebp ; save interrupted ebp for stack walk
+ mov ebp, esp
+ sub esp, 4 ; stack slot to save the CONTEXT *
+
+ ;
+ ; Save a copy of the redirect CONTEXT*.
+ ; This is needed for the debugger to unwind the stack.
+ ;
+ call _GetCurrentSavedRedirectContext@0
+
+ mov [ebp-4], eax
+.errnz REDIRECTSTUB_EBP_OFFSET_CONTEXT + 4, REDIRECTSTUB_EBP_OFFSET_CONTEXT has changed - update asm stubs
+
+ ;
+ ; Fetch the interrupted eip and save it as our return address.
+ ;
+ mov eax, [eax + CONTEXT_Eip]
+ mov [ebp+4], eax
+
+ ;
+ ; Call target, which will do whatever we needed to do in the context
+ ; of the target thread, and will RtlRestoreContext when it is done.
+ ;
+ call ?RedirectedHandledJITCaseFor&reason&@Thread@@CGXXZ
+
+ int 3 ; target shouldn't return.
+
+; Put a label here to tell the debugger where the end of this function is.
+PUBLIC _RedirectedHandledJITCaseFor&reason&_StubEnd@0
+_RedirectedHandledJITCaseFor&reason&_StubEnd@0:
+
+_RedirectedHandledJITCaseFor&reason&_Stub@0 ENDP
+
+ENDM
+
+; HijackFunctionStart and HijackFunctionEnd are used to tell BBT to keep the hijacking functions together.
+; Debugger uses range to check whether IP falls into one of them (see code:Debugger::s_hijackFunction).
+
+_HijackFunctionStart@0 proc public
+ret
+_HijackFunctionStart@0 endp
+
+GenerateRedirectedHandledJITCaseStub <GCThreadControl>
+GenerateRedirectedHandledJITCaseStub <DbgThreadControl>
+GenerateRedirectedHandledJITCaseStub <UserSuspend>
+GenerateRedirectedHandledJITCaseStub <YieldTask>
+
+; Hijack for exceptions.
+; This can be used to hijack at a 2nd-chance exception and execute the UEF
+
+EXTERN _ExceptionHijackWorker@16:PROC
+
+_ExceptionHijack@0 PROC PUBLIC
+
+ ; This is where we land when we're hijacked from an IP by the debugger.
+ ; The debugger has already pushed the args:
+ ; - a CONTEXT
+ ; - a EXCEPTION_RECORD onto the stack
+ ; - an DWORD to use to mulitplex the hijack
+ ; - an arbitrary void* data parameter
+ call _ExceptionHijackWorker@16
+
+ ; Don't expect to return from here. Debugger will unhijack us. It has the full
+ ; context and can properly restore us.
+ int 3
+
+; Put a label here to tell the debugger where the end of this function is.
+public _ExceptionHijackEnd@0
+_ExceptionHijackEnd@0:
+
+_ExceptionHijack@0 ENDP
+
+; It is very important to have a dummy function here.
+; Without it, the image has two labels without any instruction in between:
+; One for the last label in this function, and one for the first function in the image following this asm file.
+; Then the linker is free to remove from PDB the function symbol for the function
+; immediately following this, and replace the reference with the last label in this file.
+; When this happens, BBT loses info about function, moves pieces within the function to random place, and generates bad code.
+_HijackFunctionLast@0 proc public
+ret
+_HijackFunctionLast@0 endp
+
+; This is the first function outside the "keep together range". Used by BBT scripts.
+_HijackFunctionEnd@0 proc public
+ret
+_HijackFunctionEnd@0 endp
+
+END
diff --git a/src/vm/i386/asmconstants.h b/src/vm/i386/asmconstants.h
new file mode 100644
index 0000000000..5fd39d6897
--- /dev/null
+++ b/src/vm/i386/asmconstants.h
@@ -0,0 +1,485 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// asmconstants.h -
+//
+// This header defines field offsets and constants used by assembly code
+// Be sure to rebuild clr/src/vm/ceemain.cpp after changing this file, to
+// ensure that the constants match the expected C/C++ values
+
+//
+// If you need to figure out a constant that has changed and is causing
+// a compile-time assert, check out USE_COMPILE_TIME_CONSTANT_FINDER.
+// TODO: put the constant finder in a common place so other platforms can use it.
+
+#ifndef _TARGET_X86_
+#error this file should only be used on an X86 platform
+#endif
+
+#include "../../inc/switches.h"
+
+#ifndef ASMCONSTANTS_C_ASSERT
+#define ASMCONSTANTS_C_ASSERT(cond)
+#endif
+
+#ifndef ASMCONSTANTS_RUNTIME_ASSERT
+#define ASMCONSTANTS_RUNTIME_ASSERT(cond)
+#endif
+
+// Some contants are different in _DEBUG builds. This macro factors out ifdefs from below.
+#ifdef _DEBUG
+#define DBG_FRE(dbg,fre) dbg
+#else
+#define DBG_FRE(dbg,fre) fre
+#endif
+
+//***************************************************************************
+#if defined(_DEBUG) && defined(_TARGET_X86_) && !defined(FEATURE_CORECLR)
+ #define HAS_TRACK_CXX_EXCEPTION_CODE_HACK 1
+ #define TRACK_CXX_EXCEPTION_CODE_HACK
+#else
+ #define HAS_TRACK_CXX_EXCEPTION_CODE_HACK 0
+#endif
+
+#define INITIAL_SUCCESS_COUNT 0x100
+
+#define DynamicHelperFrameFlags_Default 0
+#define DynamicHelperFrameFlags_ObjectArg 1
+#define DynamicHelperFrameFlags_ObjectArg2 2
+
+#ifdef FEATURE_REMOTING
+#define TransparentProxyObject___stubData 0x8
+ASMCONSTANTS_C_ASSERT(TransparentProxyObject___stubData == offsetof(TransparentProxyObject, _stubData))
+
+#define TransparentProxyObject___stub 0x14
+ASMCONSTANTS_C_ASSERT(TransparentProxyObject___stub == offsetof(TransparentProxyObject, _stub))
+
+#define TransparentProxyObject___pMT 0xc
+ASMCONSTANTS_C_ASSERT(TransparentProxyObject___pMT == offsetof(TransparentProxyObject, _pMT))
+#endif // FEATURE_REMOTING
+
+// CONTEXT from rotor_pal.h
+#define CONTEXT_Edi 0x9c
+ASMCONSTANTS_C_ASSERT(CONTEXT_Edi == offsetof(CONTEXT,Edi))
+
+#define CONTEXT_Esi 0xa0
+ASMCONSTANTS_C_ASSERT(CONTEXT_Esi == offsetof(CONTEXT,Esi))
+
+#define CONTEXT_Ebx 0xa4
+ASMCONSTANTS_C_ASSERT(CONTEXT_Ebx == offsetof(CONTEXT,Ebx))
+
+#define CONTEXT_Edx 0xa8
+ASMCONSTANTS_C_ASSERT(CONTEXT_Edx == offsetof(CONTEXT,Edx))
+
+#define CONTEXT_Eax 0xb0
+ASMCONSTANTS_C_ASSERT(CONTEXT_Eax == offsetof(CONTEXT,Eax))
+
+#define CONTEXT_Ebp 0xb4
+ASMCONSTANTS_C_ASSERT(CONTEXT_Ebp == offsetof(CONTEXT,Ebp))
+
+#define CONTEXT_Eip 0xb8
+ASMCONSTANTS_C_ASSERT(CONTEXT_Eip == offsetof(CONTEXT,Eip))
+
+#define CONTEXT_Esp 0xc4
+ASMCONSTANTS_C_ASSERT(CONTEXT_Esp == offsetof(CONTEXT,Esp))
+
+// SYSTEM_INFO from rotor_pal.h
+#define SYSTEM_INFO_dwNumberOfProcessors 20
+ASMCONSTANTS_C_ASSERT(SYSTEM_INFO_dwNumberOfProcessors == offsetof(SYSTEM_INFO,dwNumberOfProcessors))
+
+// SpinConstants from clr/src/vars.h
+#define SpinConstants_dwInitialDuration 0
+ASMCONSTANTS_C_ASSERT(SpinConstants_dwInitialDuration == offsetof(SpinConstants,dwInitialDuration))
+
+#define SpinConstants_dwMaximumDuration 4
+ASMCONSTANTS_C_ASSERT(SpinConstants_dwMaximumDuration == offsetof(SpinConstants,dwMaximumDuration))
+
+#define SpinConstants_dwBackoffFactor 8
+ASMCONSTANTS_C_ASSERT(SpinConstants_dwBackoffFactor == offsetof(SpinConstants,dwBackoffFactor))
+
+// EHContext from clr/src/vm/i386/cgencpu.h
+#define EHContext_Eax 0x00
+ASMCONSTANTS_C_ASSERT(EHContext_Eax == offsetof(EHContext,Eax))
+
+#define EHContext_Ebx 0x04
+ASMCONSTANTS_C_ASSERT(EHContext_Ebx == offsetof(EHContext,Ebx))
+
+#define EHContext_Ecx 0x08
+ASMCONSTANTS_C_ASSERT(EHContext_Ecx == offsetof(EHContext,Ecx))
+
+#define EHContext_Edx 0x0c
+ASMCONSTANTS_C_ASSERT(EHContext_Edx == offsetof(EHContext,Edx))
+
+#define EHContext_Esi 0x10
+ASMCONSTANTS_C_ASSERT(EHContext_Esi == offsetof(EHContext,Esi))
+
+#define EHContext_Edi 0x14
+ASMCONSTANTS_C_ASSERT(EHContext_Edi == offsetof(EHContext,Edi))
+
+#define EHContext_Ebp 0x18
+ASMCONSTANTS_C_ASSERT(EHContext_Ebp == offsetof(EHContext,Ebp))
+
+#define EHContext_Esp 0x1c
+ASMCONSTANTS_C_ASSERT(EHContext_Esp == offsetof(EHContext,Esp))
+
+#define EHContext_Eip 0x20
+ASMCONSTANTS_C_ASSERT(EHContext_Eip == offsetof(EHContext,Eip))
+
+
+// from clr/src/fjit/helperframe.h
+#define SIZEOF_MachState 40
+ASMCONSTANTS_C_ASSERT(SIZEOF_MachState == sizeof(MachState))
+
+#define MachState__pEdi 0
+ASMCONSTANTS_C_ASSERT(MachState__pEdi == offsetof(MachState, _pEdi))
+
+#define MachState__edi 4
+ASMCONSTANTS_C_ASSERT(MachState__edi == offsetof(MachState, _edi))
+
+#define MachState__pEsi 8
+ASMCONSTANTS_C_ASSERT(MachState__pEsi == offsetof(MachState, _pEsi))
+
+#define MachState__esi 12
+ASMCONSTANTS_C_ASSERT(MachState__esi == offsetof(MachState, _esi))
+
+#define MachState__pEbx 16
+ASMCONSTANTS_C_ASSERT(MachState__pEbx == offsetof(MachState, _pEbx))
+
+#define MachState__ebx 20
+ASMCONSTANTS_C_ASSERT(MachState__ebx == offsetof(MachState, _ebx))
+
+#define MachState__pEbp 24
+ASMCONSTANTS_C_ASSERT(MachState__pEbp == offsetof(MachState, _pEbp))
+
+#define MachState__ebp 28
+ASMCONSTANTS_C_ASSERT(MachState__ebp == offsetof(MachState, _ebp))
+
+#define MachState__esp 32
+ASMCONSTANTS_C_ASSERT(MachState__esp == offsetof(MachState, _esp))
+
+#define MachState__pRetAddr 36
+ASMCONSTANTS_C_ASSERT(MachState__pRetAddr == offsetof(MachState, _pRetAddr))
+
+#define LazyMachState_captureEbp 40
+ASMCONSTANTS_C_ASSERT(LazyMachState_captureEbp == offsetof(LazyMachState, captureEbp))
+
+#define LazyMachState_captureEsp 44
+ASMCONSTANTS_C_ASSERT(LazyMachState_captureEsp == offsetof(LazyMachState, captureEsp))
+
+#define LazyMachState_captureEip 48
+ASMCONSTANTS_C_ASSERT(LazyMachState_captureEip == offsetof(LazyMachState, captureEip))
+
+
+#define VASigCookie__StubOffset 4
+ASMCONSTANTS_C_ASSERT(VASigCookie__StubOffset == offsetof(VASigCookie, pNDirectILStub))
+
+#define SIZEOF_TailCallFrame 32
+ASMCONSTANTS_C_ASSERT(SIZEOF_TailCallFrame == sizeof(TailCallFrame))
+
+#define SIZEOF_GSCookie 4
+
+// ICodeManager::SHADOW_SP_IN_FILTER from clr/src/inc/eetwain.h
+#define SHADOW_SP_IN_FILTER_ASM 0x1
+ASMCONSTANTS_C_ASSERT(SHADOW_SP_IN_FILTER_ASM == ICodeManager::SHADOW_SP_IN_FILTER)
+
+// from clr/src/inc/corinfo.h
+#define CORINFO_NullReferenceException_ASM 0
+ASMCONSTANTS_C_ASSERT(CORINFO_NullReferenceException_ASM == CORINFO_NullReferenceException)
+
+#define CORINFO_IndexOutOfRangeException_ASM 3
+ASMCONSTANTS_C_ASSERT(CORINFO_IndexOutOfRangeException_ASM == CORINFO_IndexOutOfRangeException)
+
+#define CORINFO_OverflowException_ASM 4
+ASMCONSTANTS_C_ASSERT(CORINFO_OverflowException_ASM == CORINFO_OverflowException)
+
+#define CORINFO_SynchronizationLockException_ASM 5
+ASMCONSTANTS_C_ASSERT(CORINFO_SynchronizationLockException_ASM == CORINFO_SynchronizationLockException)
+
+#define CORINFO_ArrayTypeMismatchException_ASM 6
+ASMCONSTANTS_C_ASSERT(CORINFO_ArrayTypeMismatchException_ASM == CORINFO_ArrayTypeMismatchException)
+
+#define CORINFO_ArgumentNullException_ASM 8
+ASMCONSTANTS_C_ASSERT(CORINFO_ArgumentNullException_ASM == CORINFO_ArgumentNullException)
+
+#define CORINFO_ArgumentException_ASM 9
+ASMCONSTANTS_C_ASSERT(CORINFO_ArgumentException_ASM == CORINFO_ArgumentException)
+
+
+#ifndef CROSSGEN_COMPILE
+
+// from clr/src/vm/threads.h
+#if defined(TRACK_CXX_EXCEPTION_CODE_HACK) // Is C++ exception code tracking turned on?
+ #define Thread_m_LastCxxSEHExceptionCode 0x20
+ ASMCONSTANTS_C_ASSERT(Thread_m_LastCxxSEHExceptionCode == offsetof(Thread, m_LastCxxSEHExceptionCode))
+
+ #define Thread_m_Context 0x3C
+#else
+ #define Thread_m_Context 0x38
+#endif // TRACK_CXX_EXCEPTION_CODE_HACK
+ASMCONSTANTS_C_ASSERT(Thread_m_Context == offsetof(Thread, m_Context))
+
+#define Thread_m_State 0x04
+ASMCONSTANTS_C_ASSERT(Thread_m_State == offsetof(Thread, m_State))
+#endif // CROSSGEN_COMPILE
+
+#define Thread_m_fPreemptiveGCDisabled 0x08
+#ifndef CROSSGEN_COMPILE
+ASMCONSTANTS_C_ASSERT(Thread_m_fPreemptiveGCDisabled == offsetof(Thread, m_fPreemptiveGCDisabled))
+#endif // CROSSGEN_COMPILE
+
+#define Thread_m_pFrame 0x0C
+#ifndef CROSSGEN_COMPILE
+ASMCONSTANTS_C_ASSERT(Thread_m_pFrame == offsetof(Thread, m_pFrame))
+#endif // CROSSGEN_COMPILE
+
+#ifndef CROSSGEN_COMPILE
+#define Thread_m_dwLockCount 0x18
+ASMCONSTANTS_C_ASSERT(Thread_m_dwLockCount == offsetof(Thread, m_dwLockCount))
+
+#define Thread_m_ThreadId 0x1C
+ASMCONSTANTS_C_ASSERT(Thread_m_ThreadId == offsetof(Thread, m_ThreadId))
+
+#define TS_CatchAtSafePoint_ASM 0x5F
+ASMCONSTANTS_C_ASSERT(Thread::TS_CatchAtSafePoint == TS_CatchAtSafePoint_ASM)
+
+#ifdef FEATURE_HIJACK
+#define TS_Hijacked_ASM 0x80
+ASMCONSTANTS_C_ASSERT(Thread::TS_Hijacked == TS_Hijacked_ASM)
+#endif
+
+#endif // CROSSGEN_COMPILE
+
+
+// from clr/src/vm/appdomain.hpp
+
+#define AppDomain__m_dwId 0x4
+ASMCONSTANTS_C_ASSERT(AppDomain__m_dwId == offsetof(AppDomain, m_dwId));
+
+// from clr/src/vm/ceeload.cpp
+#ifdef FEATURE_MIXEDMODE
+#define IJWNOADThunk__m_cache 0x1C
+ASMCONSTANTS_C_ASSERT(IJWNOADThunk__m_cache == offsetof(IJWNOADThunk, m_cache))
+
+#define IJWNOADThunk__NextCacheOffset 0x8
+ASMCONSTANTS_C_ASSERT(IJWNOADThunk__NextCacheOffset == sizeof(IJWNOADThunkStubCache))
+
+#define IJWNOADThunk__CodeAddrOffsetFromADID 0x4
+ASMCONSTANTS_C_ASSERT(IJWNOADThunk__CodeAddrOffsetFromADID == offsetof(IJWNOADThunkStubCache, m_CodeAddr))
+#endif //FEATURE_MIXEDMODE
+
+// from clr/src/vm/syncblk.h
+#define SizeOfSyncTableEntry_ASM 8
+ASMCONSTANTS_C_ASSERT(sizeof(SyncTableEntry) == SizeOfSyncTableEntry_ASM)
+
+#define SyncBlockIndexOffset_ASM 4
+ASMCONSTANTS_C_ASSERT(sizeof(ObjHeader) - offsetof(ObjHeader, m_SyncBlockValue) == SyncBlockIndexOffset_ASM)
+
+#ifndef __GNUC__
+#define SyncTableEntry_m_SyncBlock 0
+ASMCONSTANTS_C_ASSERT(offsetof(SyncTableEntry, m_SyncBlock) == SyncTableEntry_m_SyncBlock)
+
+#define SyncBlock_m_Monitor 0
+ASMCONSTANTS_C_ASSERT(offsetof(SyncBlock, m_Monitor) == SyncBlock_m_Monitor)
+
+#define AwareLock_m_MonitorHeld 0
+ASMCONSTANTS_C_ASSERT(offsetof(AwareLock, m_MonitorHeld) == AwareLock_m_MonitorHeld)
+#else
+// The following 3 offsets have value of 0, and must be
+// defined to be an empty string. Otherwise, gas may generate assembly
+// code with 0 displacement if 0 is left in the displacement field
+// of an instruction.
+#define SyncTableEntry_m_SyncBlock // 0
+ASMCONSTANTS_C_ASSERT(offsetof(SyncTableEntry, m_SyncBlock) == 0)
+
+#define SyncBlock_m_Monitor // 0
+ASMCONSTANTS_C_ASSERT(offsetof(SyncBlock, m_Monitor) == 0)
+
+#define AwareLock_m_MonitorHeld // 0
+ASMCONSTANTS_C_ASSERT(offsetof(AwareLock, m_MonitorHeld) == 0)
+#endif // !__GNUC__
+
+#define AwareLock_m_HoldingThread 8
+ASMCONSTANTS_C_ASSERT(offsetof(AwareLock, m_HoldingThread) == AwareLock_m_HoldingThread)
+
+#define AwareLock_m_Recursion 4
+ASMCONSTANTS_C_ASSERT(offsetof(AwareLock, m_Recursion) == AwareLock_m_Recursion)
+
+#define BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM 0x08000000
+ASMCONSTANTS_C_ASSERT(BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM == BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX)
+
+#define BIT_SBLK_SPIN_LOCK_ASM 0x10000000
+ASMCONSTANTS_C_ASSERT(BIT_SBLK_SPIN_LOCK_ASM == BIT_SBLK_SPIN_LOCK)
+
+#define SBLK_MASK_LOCK_THREADID_ASM 0x000003FF // special value of 0 + 1023 thread ids
+ASMCONSTANTS_C_ASSERT(SBLK_MASK_LOCK_THREADID_ASM == SBLK_MASK_LOCK_THREADID)
+
+#define SBLK_MASK_LOCK_RECLEVEL_ASM 0x0000FC00 // 64 recursion levels
+ASMCONSTANTS_C_ASSERT(SBLK_MASK_LOCK_RECLEVEL_ASM == SBLK_MASK_LOCK_RECLEVEL)
+
+#define SBLK_LOCK_RECLEVEL_INC_ASM 0x00000400 // each level is this much higher than the previous one
+ASMCONSTANTS_C_ASSERT(SBLK_LOCK_RECLEVEL_INC_ASM == SBLK_LOCK_RECLEVEL_INC)
+
+#define BIT_SBLK_IS_HASHCODE_ASM 0x04000000
+ASMCONSTANTS_C_ASSERT(BIT_SBLK_IS_HASHCODE_ASM == BIT_SBLK_IS_HASHCODE)
+
+#define MASK_SYNCBLOCKINDEX_ASM 0x03ffffff // ((1<<SYNCBLOCKINDEX_BITS)-1)
+ASMCONSTANTS_C_ASSERT(MASK_SYNCBLOCKINDEX_ASM == MASK_SYNCBLOCKINDEX)
+
+// BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM + BIT_SBLK_SPIN_LOCK_ASM +
+// SBLK_MASK_LOCK_THREADID_ASM + SBLK_MASK_LOCK_RECLEVEL_ASM
+#define SBLK_COMBINED_MASK_ASM 0x1800ffff
+ASMCONSTANTS_C_ASSERT(SBLK_COMBINED_MASK_ASM == (BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + SBLK_MASK_LOCK_THREADID + SBLK_MASK_LOCK_RECLEVEL))
+
+// BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM + BIT_SBLK_SPIN_LOCK_ASM
+#define BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_SPIN_LOCK_ASM 0x18000000
+ASMCONSTANTS_C_ASSERT(BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_SPIN_LOCK_ASM == (BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK))
+
+// BIT_SBLK_IS_HASHCODE + BIT_SBLK_SPIN_LOCK
+#define BIT_SBLK_IS_HASHCODE_OR_SPIN_LOCK_ASM 0x14000000
+ASMCONSTANTS_C_ASSERT(BIT_SBLK_IS_HASHCODE_OR_SPIN_LOCK_ASM == (BIT_SBLK_IS_HASHCODE + BIT_SBLK_SPIN_LOCK))
+
+// This is the offset from EBP at which the original CONTEXT is stored in one of the
+// RedirectedHandledJITCase*_Stub functions.
+#define REDIRECTSTUB_EBP_OFFSET_CONTEXT (-4)
+
+#define MethodTable_m_wNumInterfaces 0x0E
+ASMCONSTANTS_C_ASSERT(MethodTable_m_wNumInterfaces == offsetof(MethodTable, m_wNumInterfaces))
+
+#define MethodTable_m_dwFlags 0x0
+ASMCONSTANTS_C_ASSERT(MethodTable_m_dwFlags == offsetof(MethodTable, m_dwFlags))
+
+#define MethodTable_m_pInterfaceMap DBG_FRE(0x28, 0x24)
+ASMCONSTANTS_C_ASSERT(MethodTable_m_pInterfaceMap == offsetof(MethodTable, m_pMultipurposeSlot2))
+
+#define SIZEOF_MethodTable DBG_FRE(0x2C, 0x28)
+ASMCONSTANTS_C_ASSERT(SIZEOF_MethodTable == sizeof(MethodTable))
+
+#define SIZEOF_InterfaceInfo_t 0x4
+ASMCONSTANTS_C_ASSERT(SIZEOF_InterfaceInfo_t == sizeof(InterfaceInfo_t))
+
+#ifdef FEATURE_COMINTEROP
+
+#define SIZEOF_FrameHandlerExRecord 0x0c
+#define OFFSETOF__FrameHandlerExRecord__m_ExReg__Next 0
+#define OFFSETOF__FrameHandlerExRecord__m_ExReg__Handler 4
+#define OFFSETOF__FrameHandlerExRecord__m_pEntryFrame 8
+ASMCONSTANTS_C_ASSERT(SIZEOF_FrameHandlerExRecord == sizeof(FrameHandlerExRecord))
+ASMCONSTANTS_C_ASSERT(OFFSETOF__FrameHandlerExRecord__m_ExReg__Next == offsetof(FrameHandlerExRecord, m_ExReg) + offsetof(EXCEPTION_REGISTRATION_RECORD, Next))
+ASMCONSTANTS_C_ASSERT(OFFSETOF__FrameHandlerExRecord__m_ExReg__Handler == offsetof(FrameHandlerExRecord, m_ExReg) + offsetof(EXCEPTION_REGISTRATION_RECORD, Handler))
+ASMCONSTANTS_C_ASSERT(OFFSETOF__FrameHandlerExRecord__m_pEntryFrame == offsetof(FrameHandlerExRecord, m_pEntryFrame))
+
+#ifdef _DEBUG
+#ifndef STACK_OVERWRITE_BARRIER_SIZE
+#define STACK_OVERWRITE_BARRIER_SIZE 20
+#endif
+#ifndef STACK_OVERWRITE_BARRIER_VALUE
+#define STACK_OVERWRITE_BARRIER_VALUE 0xabcdefab
+#endif
+
+#define SIZEOF_FrameHandlerExRecordWithBarrier 0x5c
+ASMCONSTANTS_C_ASSERT(SIZEOF_FrameHandlerExRecordWithBarrier == sizeof(FrameHandlerExRecordWithBarrier))
+#endif
+
+
+#ifdef MDA_SUPPORTED
+#define SIZEOF_StackImbalanceCookie 0x14
+ASMCONSTANTS_C_ASSERT(SIZEOF_StackImbalanceCookie == sizeof(StackImbalanceCookie))
+
+#define StackImbalanceCookie__m_pMD 0x00
+#define StackImbalanceCookie__m_pTarget 0x04
+#define StackImbalanceCookie__m_dwStackArgSize 0x08
+#define StackImbalanceCookie__m_callConv 0x0c
+#define StackImbalanceCookie__m_dwSavedEsp 0x10
+#define StackImbalanceCookie__HAS_FP_RETURN_VALUE 0x80000000
+
+ASMCONSTANTS_C_ASSERT(StackImbalanceCookie__m_pMD == offsetof(StackImbalanceCookie, m_pMD))
+ASMCONSTANTS_C_ASSERT(StackImbalanceCookie__m_pTarget == offsetof(StackImbalanceCookie, m_pTarget))
+ASMCONSTANTS_C_ASSERT(StackImbalanceCookie__m_dwStackArgSize == offsetof(StackImbalanceCookie, m_dwStackArgSize))
+ASMCONSTANTS_C_ASSERT(StackImbalanceCookie__m_callConv == offsetof(StackImbalanceCookie, m_callConv))
+ASMCONSTANTS_C_ASSERT(StackImbalanceCookie__m_dwSavedEsp == offsetof(StackImbalanceCookie, m_dwSavedEsp))
+ASMCONSTANTS_C_ASSERT(StackImbalanceCookie__HAS_FP_RETURN_VALUE == StackImbalanceCookie::HAS_FP_RETURN_VALUE)
+#endif // MDA_SUPPORTED
+
+#define MethodDesc_m_wFlags DBG_FRE(0x1a, 0x06)
+ASMCONSTANTS_C_ASSERT(MethodDesc_m_wFlags == offsetof(MethodDesc, m_wFlags))
+
+#define MethodDesc_mdcClassification 7
+ASMCONSTANTS_C_ASSERT(MethodDesc_mdcClassification == mdcClassification)
+
+#define MethodDesc_mcComInterop 6
+ASMCONSTANTS_C_ASSERT(MethodDesc_mcComInterop == mcComInterop)
+
+#define ComPlusCallMethodDesc__m_pComPlusCallInfo DBG_FRE(0x1C, 0x8)
+ASMCONSTANTS_C_ASSERT(ComPlusCallMethodDesc__m_pComPlusCallInfo == offsetof(ComPlusCallMethodDesc, m_pComPlusCallInfo))
+
+#define ComPlusCallInfo__m_pRetThunk 0x10
+ASMCONSTANTS_C_ASSERT(ComPlusCallInfo__m_pRetThunk == offsetof(ComPlusCallInfo, m_pRetThunk))
+
+#endif // FEATURE_COMINTEROP
+
+#define NonTrivialInterfaceCastFlags (0x00080000 + 0x40000000 + 0x00400000)
+ASMCONSTANTS_C_ASSERT(NonTrivialInterfaceCastFlags == MethodTable::public_enum_flag_NonTrivialInterfaceCast)
+
+#define ASM__VTABLE_SLOTS_PER_CHUNK 8
+ASMCONSTANTS_C_ASSERT(ASM__VTABLE_SLOTS_PER_CHUNK == VTABLE_SLOTS_PER_CHUNK)
+
+#define ASM__VTABLE_SLOTS_PER_CHUNK_LOG2 3
+ASMCONSTANTS_C_ASSERT(ASM__VTABLE_SLOTS_PER_CHUNK_LOG2 == VTABLE_SLOTS_PER_CHUNK_LOG2)
+
+#define TLS_GETTER_MAX_SIZE_ASM DBG_FRE(0x20, 0x10)
+ASMCONSTANTS_C_ASSERT(TLS_GETTER_MAX_SIZE_ASM == TLS_GETTER_MAX_SIZE)
+
+#define JIT_TailCall_StackOffsetToFlags 0x08
+
+#define CallDescrData__pSrc 0x00
+#define CallDescrData__numStackSlots 0x04
+#define CallDescrData__pArgumentRegisters 0x08
+#define CallDescrData__fpReturnSize 0x0C
+#define CallDescrData__pTarget 0x10
+#ifndef __GNUC__
+#define CallDescrData__returnValue 0x18
+#else
+#define CallDescrData__returnValue 0x14
+#endif
+
+ASMCONSTANTS_C_ASSERT(CallDescrData__pSrc == offsetof(CallDescrData, pSrc))
+ASMCONSTANTS_C_ASSERT(CallDescrData__numStackSlots == offsetof(CallDescrData, numStackSlots))
+ASMCONSTANTS_C_ASSERT(CallDescrData__pArgumentRegisters == offsetof(CallDescrData, pArgumentRegisters))
+ASMCONSTANTS_C_ASSERT(CallDescrData__fpReturnSize == offsetof(CallDescrData, fpReturnSize))
+ASMCONSTANTS_C_ASSERT(CallDescrData__pTarget == offsetof(CallDescrData, pTarget))
+ASMCONSTANTS_C_ASSERT(CallDescrData__returnValue == offsetof(CallDescrData, returnValue))
+
+#undef ASMCONSTANTS_C_ASSERT
+#undef ASMCONSTANTS_RUNTIME_ASSERT
+
+// #define USE_COMPILE_TIME_CONSTANT_FINDER // Uncomment this line to use the constant finder
+#if defined(__cplusplus) && defined(USE_COMPILE_TIME_CONSTANT_FINDER)
+// This class causes the compiler to emit an error with the constant we're interested in
+// in the error message. This is useful if a size or offset changes. To use, comment out
+// the compile-time assert that is firing, enable the constant finder, add the appropriate
+// constant to find to BogusFunction(), and build.
+//
+// Here's a sample compiler error:
+// d:\dd\clr\src\ndp\clr\src\vm\i386\asmconstants.h(326) : error C2248: 'FindCompileTimeConstant<N>::FindCompileTimeConstant' : cannot access private member declared in class 'FindCompileTimeConstant<N>'
+// with
+// [
+// N=1520
+// ]
+// d:\dd\clr\src\ndp\clr\src\vm\i386\asmconstants.h(321) : see declaration of 'FindCompileTimeConstant<N>::FindCompileTimeConstant'
+// with
+// [
+// N=1520
+// ]
+template<size_t N>
+class FindCompileTimeConstant
+{
+private:
+ FindCompileTimeConstant();
+};
+
+void BogusFunction()
+{
+ // Sample usage to generate the error
+ FindCompileTimeConstant<offsetof(AppDomain, m_dwId)> bogus_variable;
+}
+#endif // defined(__cplusplus) && defined(USE_COMPILE_TIME_CONSTANT_FINDER)
diff --git a/src/vm/i386/asmhelpers.asm b/src/vm/i386/asmhelpers.asm
new file mode 100644
index 0000000000..66a22b7962
--- /dev/null
+++ b/src/vm/i386/asmhelpers.asm
@@ -0,0 +1,2400 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+
+; ==++==
+;
+
+;
+; ==--==
+;
+; FILE: asmhelpers.asm
+;
+; *** NOTE: If you make changes to this file, propagate the changes to
+; asmhelpers.s in this directory
+;
+
+;
+; ======================================================================================
+
+ .586
+ .model flat
+
+include asmconstants.inc
+
+ assume fs: nothing
+ option casemap:none
+ .code
+
+EXTERN __imp__RtlUnwind@16:DWORD
+ifdef _DEBUG
+EXTERN _HelperMethodFrameConfirmState@20:PROC
+endif
+ifdef FEATURE_MIXEDMODE
+EXTERN _IJWNOADThunkJumpTargetHelper@4:PROC
+endif
+EXTERN _StubRareEnableWorker@4:PROC
+ifdef FEATURE_COMINTEROP
+EXTERN _StubRareDisableHRWorker@4:PROC
+endif ; FEATURE_COMINTEROP
+EXTERN _StubRareDisableTHROWWorker@4:PROC
+EXTERN __imp__TlsGetValue@4:DWORD
+TlsGetValue PROTO stdcall
+ifdef FEATURE_HIJACK
+EXTERN _OnHijackWorker@4:PROC
+endif ;FEATURE_HIJACK
+EXTERN _COMPlusEndCatch@20:PROC
+EXTERN _COMPlusFrameHandler:PROC
+ifdef FEATURE_COMINTEROP
+EXTERN _COMPlusFrameHandlerRevCom:PROC
+endif ; FEATURE_COMINTEROP
+EXTERN __alloca_probe:PROC
+EXTERN _NDirectImportWorker@4:PROC
+EXTERN _UMThunkStubRareDisableWorker@8:PROC
+ifndef FEATURE_IMPLICIT_TLS
+ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK
+; This is defined in C (threads.cpp) and enforces EE_THREAD_NOT_REQUIRED contracts
+GetThreadGenericFullCheck EQU ?GetThreadGenericFullCheck@@YGPAVThread@@XZ
+EXTERN GetThreadGenericFullCheck:PROC
+endif ; ENABLE_GET_THREAD_GENERIC_FULL_CHECK
+
+EXTERN _gThreadTLSIndex:DWORD
+EXTERN _gAppDomainTLSIndex:DWORD
+endif ; FEATURE_IMPLICIT_TLS
+
+EXTERN _VarargPInvokeStubWorker@12:PROC
+EXTERN _GenericPInvokeCalliStubWorker@12:PROC
+
+; To debug that LastThrownObjectException really is EXCEPTION_COMPLUS
+ifdef TRACK_CXX_EXCEPTION_CODE_HACK
+EXTERN __imp____CxxFrameHandler:PROC
+endif
+
+EXTERN _GetThread@0:PROC
+EXTERN _GetAppDomain@0:PROC
+
+ifdef MDA_SUPPORTED
+EXTERN _PInvokeStackImbalanceWorker@8:PROC
+endif
+
+ifndef FEATURE_CORECLR
+EXTERN _CopyCtorCallStubWorker@4:PROC
+endif
+
+EXTERN _PreStubWorker@8:PROC
+
+ifdef FEATURE_COMINTEROP
+EXTERN _CLRToCOMWorker@8:PROC
+endif
+
+ifdef FEATURE_REMOTING
+EXTERN _TransparentProxyStubWorker@8:PROC
+endif
+
+ifdef FEATURE_PREJIT
+EXTERN _ExternalMethodFixupWorker@16:PROC
+EXTERN _VirtualMethodFixupWorker@8:PROC
+EXTERN _StubDispatchFixupWorker@16:PROC
+endif
+
+ifdef FEATURE_COMINTEROP
+EXTERN _ComPreStubWorker@8:PROC
+endif
+
+ifdef FEATURE_READYTORUN
+EXTERN _DynamicHelperWorker@20:PROC
+endif
+
+ifdef FEATURE_REMOTING
+EXTERN _InContextTPQuickDispatchAsmStub@0:PROC
+endif
+
+EXTERN @JIT_InternalThrow@4:PROC
+
+EXTERN @ProfileEnter@8:PROC
+EXTERN @ProfileLeave@8:PROC
+EXTERN @ProfileTailcall@8:PROC
+
+UNREFERENCED macro arg
+ local unref
+ unref equ size arg
+endm
+
+FASTCALL_FUNC macro FuncName,cbArgs
+FuncNameReal EQU @&FuncName&@&cbArgs
+FuncNameReal proc public
+endm
+
+FASTCALL_ENDFUNC macro
+FuncNameReal endp
+endm
+
+ifdef FEATURE_COMINTEROP
+ifdef _DEBUG
+ CPFH_STACK_SIZE equ SIZEOF_FrameHandlerExRecord + STACK_OVERWRITE_BARRIER_SIZE*4
+else ; _DEBUG
+ CPFH_STACK_SIZE equ SIZEOF_FrameHandlerExRecord
+endif ; _DEBUG
+
+PUSH_CPFH_FOR_COM macro trashReg, pFrameBaseReg, pFrameOffset
+
+ ;
+ ; Setup the FrameHandlerExRecord
+ ;
+ push dword ptr [pFrameBaseReg + pFrameOffset]
+ push _COMPlusFrameHandlerRevCom
+ mov trashReg, fs:[0]
+ push trashReg
+ mov fs:[0], esp
+
+ifdef _DEBUG
+ mov trashReg, STACK_OVERWRITE_BARRIER_SIZE
+@@:
+ push STACK_OVERWRITE_BARRIER_VALUE
+ dec trashReg
+ jnz @B
+endif ; _DEBUG
+
+endm ; PUSH_CPFH_FOR_COM
+
+
+POP_CPFH_FOR_COM macro trashReg
+
+ ;
+ ; Unlink FrameHandlerExRecord from FS:0 chain
+ ;
+ifdef _DEBUG
+ add esp, STACK_OVERWRITE_BARRIER_SIZE*4
+endif
+ mov trashReg, [esp + OFFSETOF__FrameHandlerExRecord__m_ExReg__Next]
+ mov fs:[0], trashReg
+ add esp, SIZEOF_FrameHandlerExRecord
+
+endm ; POP_CPFH_FOR_COM
+endif ; FEATURE_COMINTEROP
+
+;
+; FramedMethodFrame prolog
+;
+STUB_PROLOG macro
+ ; push ebp-frame
+ push ebp
+ mov ebp,esp
+
+ ; save CalleeSavedRegisters
+ push ebx
+ push esi
+ push edi
+
+ ; push ArgumentRegisters
+ push ecx
+ push edx
+endm
+
+;
+; FramedMethodFrame epilog
+;
+STUB_EPILOG macro
+ ; pop ArgumentRegisters
+ pop edx
+ pop ecx
+
+ ; pop CalleeSavedRegisters
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+endm
+
+;
+; FramedMethodFrame epilog
+;
+STUB_EPILOG_RETURN macro
+ ; pop ArgumentRegisters
+ add esp, 8
+
+ ; pop CalleeSavedRegisters
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+endm
+
+STUB_PROLOG_2_HIDDEN_ARGS macro
+
+ ;
+ ; The stub arguments are where we want to setup the TransitionBlock. We will
+ ; setup the TransitionBlock later once we can trash them
+ ;
+ ; push ebp-frame
+ ; push ebp
+ ; mov ebp,esp
+
+ ; save CalleeSavedRegisters
+ ; push ebx
+
+ push esi
+ push edi
+
+ ; push ArgumentRegisters
+ push ecx
+ push edx
+
+ mov ecx, [esp + 4*4]
+ mov edx, [esp + 5*4]
+
+ ; Setup up proper EBP frame now that the stub arguments can be trashed
+ mov [esp + 4*4],ebx
+ mov [esp + 5*4],ebp
+ lea ebp, [esp + 5*4]
+endm
+
+ResetCurrentContext PROC stdcall public
+ LOCAL ctrlWord:WORD
+
+ ; Clear the direction flag (used for rep instructions)
+ cld
+
+ fnstcw ctrlWord
+ fninit ; reset FPU
+ and ctrlWord, 0f00h ; preserve precision and rounding control
+ or ctrlWord, 007fh ; mask all exceptions
+ fldcw ctrlWord ; preserve precision control
+ RET
+ResetCurrentContext ENDP
+
+;Incoming:
+; ESP+4: Pointer to buffer to which FPU state should be saved
+_CaptureFPUContext@4 PROC public
+
+ mov ecx, [esp+4]
+ fnstenv [ecx]
+ retn 4
+
+_CaptureFPUContext@4 ENDP
+
+; Incoming:
+; ESP+4: Pointer to buffer from which FPU state should be restored
+_RestoreFPUContext@4 PROC public
+
+ mov ecx, [esp+4]
+ fldenv [ecx]
+ retn 4
+
+_RestoreFPUContext@4 ENDP
+
+ifndef FEATURE_CORECLR
+ifdef _DEBUG
+; For C++ exceptions, we desperately need to know the SEH code. This allows us to properly
+; distinguish managed exceptions from C++ exceptions from standard SEH like hard stack overflow.
+; We do this by providing our own handler that squirrels away the exception code and then
+; defers to the C++ service. Fortunately, two symbols exist for the C++ symbol.
+___CxxFrameHandler3 PROC public
+
+ ; We don't know what arguments are passed to us (except for the first arg on stack)
+ ; It turns out that EAX is part of the non-standard calling convention of this
+ ; function.
+
+ push eax
+ push edx
+
+ cmp dword ptr [_gThreadTLSIndex], -1
+ je Chain ; CLR is not initialized yet
+
+ call _GetThread@0
+
+ test eax, eax ; not a managed thread
+ jz Chain
+
+ mov edx, [esp + 0ch] ; grab the first argument
+ mov edx, [edx] ; grab the SEH exception code
+
+ mov dword ptr [eax + Thread_m_LastCxxSEHExceptionCode], edx
+
+Chain:
+
+ pop edx
+
+ ; [esp] contains the value of EAX we must restore. We would like
+ ; [esp] to contain the address of the real imported CxxFrameHandler
+ ; so we can chain to it.
+
+ mov eax, [__imp____CxxFrameHandler]
+ mov eax, [eax]
+ xchg [esp], eax
+
+ ret
+
+___CxxFrameHandler3 ENDP
+endif ; _DEBUG
+endif ; FEATURE_CORECLR
+
+; Register CLR exception handlers defined on the C++ side with SAFESEH.
+; Note that these directives must be in a file that defines symbols that will be used during linking,
+; otherwise it's possible that the resulting .obj will completly be ignored by the linker and these
+; directives will have no effect.
+COMPlusFrameHandler proto c
+.safeseh COMPlusFrameHandler
+
+COMPlusNestedExceptionHandler proto c
+.safeseh COMPlusNestedExceptionHandler
+
+FastNExportExceptHandler proto c
+.safeseh FastNExportExceptHandler
+
+UMThunkPrestubHandler proto c
+.safeseh UMThunkPrestubHandler
+
+ifdef FEATURE_COMINTEROP
+COMPlusFrameHandlerRevCom proto c
+.safeseh COMPlusFrameHandlerRevCom
+endif
+
+; Note that RtlUnwind trashes EBX, ESI and EDI, so this wrapper preserves them
+CallRtlUnwind PROC stdcall public USES ebx esi edi, pEstablisherFrame :DWORD, callback :DWORD, pExceptionRecord :DWORD, retVal :DWORD
+
+ push retVal
+ push pExceptionRecord
+ push callback
+ push pEstablisherFrame
+ call dword ptr [__imp__RtlUnwind@16]
+
+ ; return 1
+ push 1
+ pop eax
+
+ RET
+CallRtlUnwind ENDP
+
+_ResumeAtJitEHHelper@4 PROC public
+ mov edx, [esp+4] ; edx = pContext (EHContext*)
+
+ mov ebx, [edx+EHContext_Ebx]
+ mov esi, [edx+EHContext_Esi]
+ mov edi, [edx+EHContext_Edi]
+ mov ebp, [edx+EHContext_Ebp]
+ mov ecx, [edx+EHContext_Esp]
+ mov eax, [edx+EHContext_Eip]
+ mov [ecx-4], eax
+ mov eax, [edx+EHContext_Eax]
+ mov [ecx-8], eax
+ mov eax, [edx+EHContext_Ecx]
+ mov [ecx-0Ch], eax
+ mov eax, [edx+EHContext_Edx]
+ mov [ecx-10h], eax
+ lea esp, [ecx-10h]
+ pop edx
+ pop ecx
+ pop eax
+ ret
+_ResumeAtJitEHHelper@4 ENDP
+
+; int __stdcall CallJitEHFilterHelper(size_t *pShadowSP, EHContext *pContext);
+; on entry, only the pContext->Esp, Ebx, Esi, Edi, Ebp, and Eip are initialized
+_CallJitEHFilterHelper@8 PROC public
+ push ebp
+ mov ebp, esp
+ push ebx
+ push esi
+ push edi
+
+ pShadowSP equ [ebp+8]
+ pContext equ [ebp+12]
+
+ mov eax, pShadowSP ; Write esp-4 to the shadowSP slot
+ test eax, eax
+ jz DONE_SHADOWSP_FILTER
+ mov ebx, esp
+ sub ebx, 4
+ or ebx, SHADOW_SP_IN_FILTER_ASM
+ mov [eax], ebx
+ DONE_SHADOWSP_FILTER:
+
+ mov edx, [pContext]
+ mov eax, [edx+EHContext_Eax]
+ mov ebx, [edx+EHContext_Ebx]
+ mov esi, [edx+EHContext_Esi]
+ mov edi, [edx+EHContext_Edi]
+ mov ebp, [edx+EHContext_Ebp]
+
+ call dword ptr [edx+EHContext_Eip]
+ifdef _DEBUG
+ nop ; Indicate that it is OK to call managed code directly from here
+endif
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp ; don't use 'leave' here, as ebp as been trashed
+ retn 8
+_CallJitEHFilterHelper@8 ENDP
+
+
+; void __stdcall CallJITEHFinallyHelper(size_t *pShadowSP, EHContext *pContext);
+; on entry, only the pContext->Esp, Ebx, Esi, Edi, Ebp, and Eip are initialized
+_CallJitEHFinallyHelper@8 PROC public
+ push ebp
+ mov ebp, esp
+ push ebx
+ push esi
+ push edi
+
+ pShadowSP equ [ebp+8]
+ pContext equ [ebp+12]
+
+ mov eax, pShadowSP ; Write esp-4 to the shadowSP slot
+ test eax, eax
+ jz DONE_SHADOWSP_FINALLY
+ mov ebx, esp
+ sub ebx, 4
+ mov [eax], ebx
+ DONE_SHADOWSP_FINALLY:
+
+ mov edx, [pContext]
+ mov eax, [edx+EHContext_Eax]
+ mov ebx, [edx+EHContext_Ebx]
+ mov esi, [edx+EHContext_Esi]
+ mov edi, [edx+EHContext_Edi]
+ mov ebp, [edx+EHContext_Ebp]
+ call dword ptr [edx+EHContext_Eip]
+ifdef _DEBUG
+ nop ; Indicate that it is OK to call managed code directly from here
+endif
+
+ ; Reflect the changes to the context and only update non-volatile registers.
+ ; This will be used later to update REGDISPLAY
+ mov edx, [esp+12+12]
+ mov [edx+EHContext_Ebx], ebx
+ mov [edx+EHContext_Esi], esi
+ mov [edx+EHContext_Edi], edi
+ mov [edx+EHContext_Ebp], ebp
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp ; don't use 'leave' here, as ebp as been trashed
+ retn 8
+_CallJitEHFinallyHelper@8 ENDP
+
+
+_GetSpecificCpuTypeAsm@0 PROC public
+ push ebx ; ebx is trashed by the cpuid calls
+
+ ; See if the chip supports CPUID
+ pushfd
+ pop ecx ; Get the EFLAGS
+ mov eax, ecx ; Save for later testing
+ xor ecx, 200000h ; Invert the ID bit.
+ push ecx
+ popfd ; Save the updated flags.
+ pushfd
+ pop ecx ; Retrieve the updated flags
+ xor ecx, eax ; Test if it actually changed (bit set means yes)
+ push eax
+ popfd ; Restore the flags
+
+ test ecx, 200000h
+ jz Assume486
+
+ xor eax, eax
+ cpuid
+
+ test eax, eax
+ jz Assume486 ; brif CPUID1 not allowed
+
+ mov eax, 1
+ cpuid
+
+ ; filter out everything except family and model
+ ; Note that some multi-procs have different stepping number for each proc
+ and eax, 0ff0h
+
+ jmp CpuTypeDone
+
+Assume486:
+ mov eax, 0400h ; report 486
+CpuTypeDone:
+ pop ebx
+ retn
+_GetSpecificCpuTypeAsm@0 ENDP
+
+; DWORD __stdcall GetSpecificCpuFeaturesAsm(DWORD *pInfo);
+_GetSpecificCpuFeaturesAsm@4 PROC public
+ push ebx ; ebx is trashed by the cpuid calls
+
+ ; See if the chip supports CPUID
+ pushfd
+ pop ecx ; Get the EFLAGS
+ mov eax, ecx ; Save for later testing
+ xor ecx, 200000h ; Invert the ID bit.
+ push ecx
+ popfd ; Save the updated flags.
+ pushfd
+ pop ecx ; Retrieve the updated flags
+ xor ecx, eax ; Test if it actually changed (bit set means yes)
+ push eax
+ popfd ; Restore the flags
+
+ test ecx, 200000h
+ jz CpuFeaturesFail
+
+ xor eax, eax
+ cpuid
+
+ test eax, eax
+ jz CpuFeaturesDone ; br if CPUID1 not allowed
+
+ mov eax, 1
+ cpuid
+ mov eax, edx ; return all feature flags
+ mov edx, [esp+8]
+ test edx, edx
+ jz CpuFeaturesDone
+ mov [edx],ebx ; return additional useful information
+ jmp CpuFeaturesDone
+
+CpuFeaturesFail:
+ xor eax, eax ; Nothing to report
+CpuFeaturesDone:
+ pop ebx
+ retn 4
+_GetSpecificCpuFeaturesAsm@4 ENDP
+
+
+;-----------------------------------------------------------------------
+; The out-of-line portion of the code to enable preemptive GC.
+; After the work is done, the code jumps back to the "pRejoinPoint"
+; which should be emitted right after the inline part is generated.
+;
+; Assumptions:
+; ebx = Thread
+; Preserves
+; all registers except ecx.
+;
+;-----------------------------------------------------------------------
+_StubRareEnable proc public
+ push eax
+ push edx
+
+ push ebx
+ call _StubRareEnableWorker@4
+
+ pop edx
+ pop eax
+ retn
+_StubRareEnable ENDP
+
+ifdef FEATURE_COMINTEROP
+_StubRareDisableHR proc public
+ push edx
+
+ push ebx ; Thread
+ call _StubRareDisableHRWorker@4
+
+ pop edx
+ retn
+_StubRareDisableHR ENDP
+endif ; FEATURE_COMINTEROP
+
+_StubRareDisableTHROW proc public
+ push eax
+ push edx
+
+ push ebx ; Thread
+ call _StubRareDisableTHROWWorker@4
+
+ pop edx
+ pop eax
+ retn
+_StubRareDisableTHROW endp
+
+
+ifdef FEATURE_MIXEDMODE
+; VOID __stdcall IJWNOADThunkJumpTarget(void);
+; This routine is used by the IJWNOADThunk to determine the callsite of the domain-specific stub to call.
+_IJWNOADThunkJumpTarget@0 proc public
+
+ push ebp
+ mov ebp, esp
+
+ ; EAX contains IJWNOADThunk*
+ ; Must retain ebx, ecx, edx, esi, edi.
+
+ ; save ebx - holds the IJWNOADThunk*
+ ; save ecx - holds the current AppDomain ID.
+ ; save edx - holds the cached AppDomain ID.
+ push ebx
+ push ecx
+
+ ; put the IJWNOADThunk into ebx for safe keeping
+ mov ebx, eax
+
+ ; get thread - assumes registers are preserved
+ call _GetThread@0
+
+ ; if thread is null, go down un-optimized path
+ test eax,eax
+ jz cachemiss
+
+ ; get current domain - assumes registers are preserved
+ call _GetAppDomain@0
+
+ ; if domain is null, go down un-optimized path
+ test eax,eax
+ jz cachemiss
+
+ ; get the current appdomain id
+ mov ecx, [eax + AppDomain__m_dwId]
+
+ ; test it against each cache location
+ mov eax, ebx
+ add eax, IJWNOADThunk__m_cache
+ cmp ecx, [eax]
+ je cachehit
+
+ add eax, IJWNOADThunk__NextCacheOffset
+ cmp ecx, [eax]
+ je cachehit
+
+ add eax, IJWNOADThunk__NextCacheOffset
+ cmp ecx, [eax]
+ je cachehit
+
+ add eax, IJWNOADThunk__NextCacheOffset
+ cmp ecx, [eax]
+ je cachehit
+
+cachemiss:
+ ; save extra registers
+ push edx
+ push esi
+ push edi
+
+ ; call unoptimized path
+ push ebx ; only arg is IJWNOADThunk*
+ call _IJWNOADThunkJumpTargetHelper@4
+
+ ; restore extra registers
+ pop edi
+ pop esi
+ pop edx
+
+ ; jump back up to the epilog
+ jmp complete
+
+cachehit:
+ ; found a matching ADID, get the code addr.
+ mov eax, [eax + IJWNOADThunk__CodeAddrOffsetFromADID]
+
+ ; if the callsite is null, go down the un-optimized path
+ test eax, eax
+ jz cachemiss
+
+complete:
+ ; restore regs
+ pop ecx
+ pop ebx
+
+ mov esp, ebp
+ pop ebp
+
+ ; Jump to callsite
+ jmp eax
+
+ ; This will never be executed. It is just to help out stack-walking logic
+ ; which disassembles the epilog to unwind the stack.
+ ret
+_IJWNOADThunkJumpTarget@0 endp
+
+endif
+
+InternalExceptionWorker proc public
+ pop edx ; recover RETADDR
+ add esp, eax ; release caller's args
+ push edx ; restore RETADDR
+ jmp @JIT_InternalThrow@4
+InternalExceptionWorker endp
+
+; EAX -> number of caller arg bytes on the stack that we must remove before going
+; to the throw helper, which assumes the stack is clean.
+_ArrayOpStubNullException proc public
+ ; kFactorReg and kTotalReg could not have been modified, but let's pop
+ ; them anyway for consistency and to avoid future bugs.
+ pop esi
+ pop edi
+ mov ecx, CORINFO_NullReferenceException_ASM
+ jmp InternalExceptionWorker
+_ArrayOpStubNullException endp
+
+; EAX -> number of caller arg bytes on the stack that we must remove before going
+; to the throw helper, which assumes the stack is clean.
+_ArrayOpStubRangeException proc public
+ ; kFactorReg and kTotalReg could not have been modified, but let's pop
+ ; them anyway for consistency and to avoid future bugs.
+ pop esi
+ pop edi
+ mov ecx, CORINFO_IndexOutOfRangeException_ASM
+ jmp InternalExceptionWorker
+_ArrayOpStubRangeException endp
+
+; EAX -> number of caller arg bytes on the stack that we must remove before going
+; to the throw helper, which assumes the stack is clean.
+_ArrayOpStubTypeMismatchException proc public
+ ; kFactorReg and kTotalReg could not have been modified, but let's pop
+ ; them anyway for consistency and to avoid future bugs.
+ pop esi
+ pop edi
+ mov ecx, CORINFO_ArrayTypeMismatchException_ASM
+ jmp InternalExceptionWorker
+_ArrayOpStubTypeMismatchException endp
+
+;------------------------------------------------------------------------------
+; This helper routine enregisters the appropriate arguments and makes the
+; actual call.
+;------------------------------------------------------------------------------
+; void STDCALL CallDescrWorkerInternal(CallDescrWorkerParams * pParams)
+CallDescrWorkerInternal PROC stdcall public USES EBX,
+ pParams: DWORD
+
+ mov ebx, pParams
+
+ mov ecx, [ebx+CallDescrData__numStackSlots]
+ mov eax, [ebx+CallDescrData__pSrc] ; copy the stack
+ test ecx, ecx
+ jz donestack
+ lea eax, [eax+4*ecx-4] ; last argument
+ push dword ptr [eax]
+ dec ecx
+ jz donestack
+ sub eax, 4
+ push dword ptr [eax]
+ dec ecx
+ jz donestack
+stackloop:
+ sub eax, 4
+ push dword ptr [eax]
+ dec ecx
+ jnz stackloop
+donestack:
+
+ ; now we must push each field of the ArgumentRegister structure
+ mov eax, [ebx+CallDescrData__pArgumentRegisters]
+ mov edx, dword ptr [eax]
+ mov ecx, dword ptr [eax+4]
+
+ call [ebx+CallDescrData__pTarget]
+ifdef _DEBUG
+ nop ; This is a tag that we use in an assert. Fcalls expect to
+ ; be called from Jitted code or from certain blessed call sites like
+ ; this one. (See HelperMethodFrame::InsureInit)
+endif
+
+ ; Save FP return value if necessary
+ mov ecx, [ebx+CallDescrData__fpReturnSize]
+ cmp ecx, 0
+ je ReturnsInt
+
+ cmp ecx, 4
+ je ReturnsFloat
+ cmp ecx, 8
+ je ReturnsDouble
+ ; unexpected
+ jmp Epilog
+
+ReturnsInt:
+ mov [ebx+CallDescrData__returnValue], eax
+ mov [ebx+CallDescrData__returnValue+4], edx
+
+Epilog:
+ RET
+
+ReturnsFloat:
+ fstp dword ptr [ebx+CallDescrData__returnValue] ; Spill the Float return value
+ jmp Epilog
+
+ReturnsDouble:
+ fstp qword ptr [ebx+CallDescrData__returnValue] ; Spill the Double return value
+ jmp Epilog
+
+CallDescrWorkerInternal endp
+
+ifdef _DEBUG
+; int __fastcall HelperMethodFrameRestoreState(HelperMethodFrame*, struct MachState *)
+FASTCALL_FUNC HelperMethodFrameRestoreState,8
+ mov eax, edx ; eax = MachState*
+else
+; int __fastcall HelperMethodFrameRestoreState(struct MachState *)
+FASTCALL_FUNC HelperMethodFrameRestoreState,4
+ mov eax, ecx ; eax = MachState*
+endif
+ ; restore the registers from the m_MachState stucture. Note that
+ ; we only do this for register that where not saved on the stack
+ ; at the time the machine state snapshot was taken.
+
+ cmp [eax+MachState__pRetAddr], 0
+
+ifdef _DEBUG
+ jnz noConfirm
+ push ebp
+ push ebx
+ push edi
+ push esi
+ push ecx ; HelperFrame*
+ call _HelperMethodFrameConfirmState@20
+ ; on return, eax = MachState*
+ cmp [eax+MachState__pRetAddr], 0
+noConfirm:
+endif
+
+ jz doRet
+
+ lea edx, [eax+MachState__esi] ; Did we have to spill ESI
+ cmp [eax+MachState__pEsi], edx
+ jnz SkipESI
+ mov esi, [edx] ; Then restore it
+SkipESI:
+
+ lea edx, [eax+MachState__edi] ; Did we have to spill EDI
+ cmp [eax+MachState__pEdi], edx
+ jnz SkipEDI
+ mov edi, [edx] ; Then restore it
+SkipEDI:
+
+ lea edx, [eax+MachState__ebx] ; Did we have to spill EBX
+ cmp [eax+MachState__pEbx], edx
+ jnz SkipEBX
+ mov ebx, [edx] ; Then restore it
+SkipEBX:
+
+ lea edx, [eax+MachState__ebp] ; Did we have to spill EBP
+ cmp [eax+MachState__pEbp], edx
+ jnz SkipEBP
+ mov ebp, [edx] ; Then restore it
+SkipEBP:
+
+doRet:
+ xor eax, eax
+ retn
+FASTCALL_ENDFUNC HelperMethodFrameRestoreState
+
+
+ifndef FEATURE_IMPLICIT_TLS
+;---------------------------------------------------------------------------
+; Portable GetThread() function: used if no platform-specific optimizations apply.
+; This is in assembly code because we count on edx not getting trashed on calls
+; to this function.
+;---------------------------------------------------------------------------
+; Thread* __stdcall GetThreadGeneric(void);
+GetThreadGeneric PROC stdcall public USES ecx edx
+
+ifdef _DEBUG
+ cmp dword ptr [_gThreadTLSIndex], -1
+ jnz @F
+ int 3
+@@:
+endif
+ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK
+ ; non-PAL, debug-only GetThreadGeneric should defer to GetThreadGenericFullCheck
+ ; to do extra contract enforcement. (See GetThreadGenericFullCheck for details.)
+ ; This code is intentionally not added to asmhelper.s, as this enforcement is only
+ ; implemented for non-PAL builds.
+ call GetThreadGenericFullCheck
+else
+ push dword ptr [_gThreadTLSIndex]
+ call dword ptr [__imp__TlsGetValue@4]
+endif
+ ret
+GetThreadGeneric ENDP
+
+;---------------------------------------------------------------------------
+; Portable GetAppdomain() function: used if no platform-specific optimizations apply.
+; This is in assembly code because we count on edx not getting trashed on calls
+; to this function.
+;---------------------------------------------------------------------------
+; Appdomain* __stdcall GetAppDomainGeneric(void);
+GetAppDomainGeneric PROC stdcall public USES ecx edx
+
+ifdef _DEBUG
+ cmp dword ptr [_gAppDomainTLSIndex], -1
+ jnz @F
+ int 3
+@@:
+endif
+
+ push dword ptr [_gAppDomainTLSIndex]
+ call dword ptr [__imp__TlsGetValue@4]
+ ret
+GetAppDomainGeneric ENDP
+endif
+
+
+ifdef FEATURE_HIJACK
+
+; A JITted method's return address was hijacked to return to us here.
+; VOID OnHijackTripThread()
+OnHijackTripThread PROC stdcall public
+
+ ; Don't fiddle with this unless you change HijackFrame::UpdateRegDisplay
+ ; and HijackArgs
+ push eax ; make room for the real return address (Eip)
+ push ebp
+ push eax
+ push ecx
+ push edx
+ push ebx
+ push esi
+ push edi
+
+ ; unused space for floating point state
+ sub esp,12
+
+ push esp
+ call _OnHijackWorker@4
+
+ ; unused space for floating point state
+ add esp,12
+
+ pop edi
+ pop esi
+ pop ebx
+ pop edx
+ pop ecx
+ pop eax
+ pop ebp
+ retn ; return to the correct place, adjusted by our caller
+OnHijackTripThread ENDP
+
+; VOID OnHijackFPTripThread()
+OnHijackFPTripThread PROC stdcall public
+
+ ; Don't fiddle with this unless you change HijackFrame::UpdateRegDisplay
+ ; and HijackArgs
+ push eax ; make room for the real return address (Eip)
+ push ebp
+ push eax
+ push ecx
+ push edx
+ push ebx
+ push esi
+ push edi
+
+ sub esp,12
+
+ ; save top of the floating point stack (there is return value passed in it)
+ ; save full 10 bytes to avoid precision loss
+ fstp tbyte ptr [esp]
+
+ push esp
+ call _OnHijackWorker@4
+
+ ; restore top of the floating point stack
+ fld tbyte ptr [esp]
+
+ add esp,12
+
+ pop edi
+ pop esi
+ pop ebx
+ pop edx
+ pop ecx
+ pop eax
+ pop ebp
+ retn ; return to the correct place, adjusted by our caller
+OnHijackFPTripThread ENDP
+
+endif ; FEATURE_HIJACK
+
+
+; Note that the debugger skips this entirely when doing SetIP,
+; since COMPlusCheckForAbort should always return 0. Excep.cpp:LeaveCatch
+; asserts that to be true. If this ends up doing more work, then the
+; debugger may need additional support.
+; void __stdcall JIT_EndCatch();
+JIT_EndCatch PROC stdcall public
+
+ ; make temp storage for return address, and push the address of that
+ ; as the last arg to COMPlusEndCatch
+ mov ecx, [esp]
+ push ecx;
+ push esp;
+
+ ; push the rest of COMPlusEndCatch's args, right-to-left
+ push esi
+ push edi
+ push ebx
+ push ebp
+
+ call _COMPlusEndCatch@20 ; returns old esp value in eax, stores jump address
+ ; now eax = new esp, [esp] = new eip
+
+ pop edx ; edx = new eip
+ mov esp, eax ; esp = new esp
+ jmp edx ; eip = new eip
+
+JIT_EndCatch ENDP
+
+;==========================================================================
+; This function is reached only via the embedded ImportThunkGlue code inside
+; an NDirectMethodDesc. It's purpose is to load the DLL associated with an
+; N/Direct method, then backpatch the DLL target into the methoddesc.
+;
+; Initial state:
+;
+; Preemptive GC is *enabled*: we are actually in an unmanaged state.
+;
+;
+; [esp+...] - The *unmanaged* parameters to the DLL target.
+; [esp+4] - Return address back into the JIT'ted code that made
+; the DLL call.
+; [esp] - Contains the "return address." Because we got here
+; thru a call embedded inside a MD, this "return address"
+; gives us an easy to way to find the MD (which was the
+; whole purpose of the embedded call manuever.)
+;
+;
+;
+;==========================================================================
+_NDirectImportThunk@0 proc public
+
+ ; Preserve argument registers
+ push ecx
+ push edx
+
+ ; Invoke the function that does the real work.
+ push eax
+ call _NDirectImportWorker@4
+
+ ; Restore argument registers
+ pop edx
+ pop ecx
+
+ ; If we got back from NDirectImportWorker, the MD has been successfully
+ ; linked and "eax" contains the DLL target. Proceed to execute the
+ ; original DLL call.
+ jmp eax ; Jump to DLL target
+_NDirectImportThunk@0 endp
+
+;==========================================================================
+; The call in fixup precode initally points to this function.
+; The pupose of this function is to load the MethodDesc and forward the call the prestub.
+_PrecodeFixupThunk@0 proc public
+
+ pop eax ; Pop the return address. It points right after the call instruction in the precode.
+ push esi
+ push edi
+
+ ; Inline computation done by FixupPrecode::GetMethodDesc()
+ movzx esi,byte ptr [eax+2] ; m_PrecodeChunkIndex
+ movzx edi,byte ptr [eax+1] ; m_MethodDescChunkIndex
+ mov eax,dword ptr [eax+esi*8+3]
+ lea eax,[eax+edi*4]
+
+ pop edi
+ pop esi
+ jmp _ThePreStub@0
+
+_PrecodeFixupThunk@0 endp
+
+; LPVOID __stdcall CTPMethodTable__CallTargetHelper2(
+; const void *pTarget,
+; LPVOID pvFirst,
+; LPVOID pvSecond)
+CTPMethodTable__CallTargetHelper2 proc stdcall public,
+ pTarget : DWORD,
+ pvFirst : DWORD,
+ pvSecond : DWORD
+ mov ecx, pvFirst
+ mov edx, pvSecond
+
+ call pTarget
+ifdef _DEBUG
+ nop ; Mark this as a special call site that can
+ ; directly call unmanaged code
+endif
+ ret
+CTPMethodTable__CallTargetHelper2 endp
+
+; LPVOID __stdcall CTPMethodTable__CallTargetHelper3(
+; const void *pTarget,
+; LPVOID pvFirst,
+; LPVOID pvSecond,
+; LPVOID pvThird)
+CTPMethodTable__CallTargetHelper3 proc stdcall public,
+ pTarget : DWORD,
+ pvFirst : DWORD,
+ pvSecond : DWORD,
+ pvThird : DWORD
+ push pvThird
+
+ mov ecx, pvFirst
+ mov edx, pvSecond
+
+ call pTarget
+ifdef _DEBUG
+ nop ; Mark this as a special call site that can
+ ; directly call unmanaged code
+endif
+ ret
+CTPMethodTable__CallTargetHelper3 endp
+
+
+; void __stdcall setFPReturn(int fpSize, INT64 retVal)
+_setFPReturn@12 proc public
+ mov ecx, [esp+4]
+
+ ; leave the return value in eax:edx if it is not the floating point case
+ mov eax, [esp+8]
+ mov edx, [esp+12]
+
+ cmp ecx, 4
+ jz setFPReturn4
+
+ cmp ecx, 8
+ jnz setFPReturnNot8
+ fld qword ptr [esp+8]
+setFPReturnNot8:
+ retn 12
+
+setFPReturn4:
+ fld dword ptr [esp+8]
+ retn 12
+_setFPReturn@12 endp
+
+; void __stdcall getFPReturn(int fpSize, INT64 *pretVal)
+_getFPReturn@8 proc public
+ mov ecx, [esp+4]
+ mov eax, [esp+8]
+ cmp ecx, 4
+ jz getFPReturn4
+
+ cmp ecx, 8
+ jnz getFPReturnNot8
+ fstp qword ptr [eax]
+getFPReturnNot8:
+ retn 8
+
+getFPReturn4:
+ fstp dword ptr [eax]
+ retn 8
+_getFPReturn@8 endp
+
+; void __stdcall UM2MThunk_WrapperHelper(void *pThunkArgs,
+; int argLen,
+; void *pAddr,
+; UMEntryThunk *pEntryThunk,
+; Thread *pThread)
+UM2MThunk_WrapperHelper proc stdcall public,
+ pThunkArgs : DWORD,
+ argLen : DWORD,
+ pAddr : DWORD,
+ pEntryThunk : DWORD,
+ pThread : DWORD
+ UNREFERENCED argLen
+
+ push ebx
+
+ mov eax, pEntryThunk
+ mov ecx, pThread
+ mov ebx, pThunkArgs
+ call pAddr
+
+ pop ebx
+
+ ret
+UM2MThunk_WrapperHelper endp
+
+; VOID __cdecl UMThunkStubRareDisable()
+;<TODO>
+; @todo: this is very similar to StubRareDisable
+;</TODO>
+_UMThunkStubRareDisable proc public
+ push eax
+ push ecx
+
+ push eax ; Push the UMEntryThunk
+ push ecx ; Push thread
+ call _UMThunkStubRareDisableWorker@8
+
+ pop ecx
+ pop eax
+ retn
+_UMThunkStubRareDisable endp
+
+
+;+----------------------------------------------------------------------------
+;
+; Method: CRemotingServices::CheckForContextMatch public
+;
+; Synopsis: This code generates a check to see if the current context and
+; the context of the proxy match.
+;
+;+----------------------------------------------------------------------------
+;
+; returns zero if contexts match
+; returns non-zero if contexts do not match
+;
+; UINT_PTR __stdcall CRemotingServices__CheckForContextMatch(Object* pStubData)
+ifdef FEATURE_REMOTING
+_CRemotingServices__CheckForContextMatch@4 proc public
+ push ebx ; spill ebx
+ mov ebx, [eax+4] ; Get the internal context id by unboxing
+ ; the stub data
+ call _GetThread@0 ; Get the current thread, assumes that the
+ ; registers are preserved
+ mov eax, [eax+Thread_m_Context] ; Get the current context from the
+ ; thread
+ sub eax, ebx ; Get the pointer to the context from the
+ ; proxy and compare with the current context
+ pop ebx ; restore the value of ebx
+ retn
+_CRemotingServices__CheckForContextMatch@4 endp
+endif ; FEATURE_REMOTING
+
+;+----------------------------------------------------------------------------
+;
+; Method: CRemotingServices::DispatchInterfaceCall public
+;
+; Synopsis:
+; Push that method desc on the stack and jump to the
+; transparent proxy stub to execute the call.
+; WARNING!! This MethodDesc is not the methoddesc in the vtable
+; of the object instead it is the methoddesc in the vtable of
+; the interface class. Since we use the MethodDesc only to probe
+; the stack via the signature of the method call we are safe.
+; If we want to get any object vtable/class specific
+; information this is not safe.
+;
+;
+;+----------------------------------------------------------------------------
+; void __stdcall CRemotingServices__DispatchInterfaceCall()
+ifdef FEATURE_REMOTING
+_CRemotingServices__DispatchInterfaceCall@0 proc public
+ ; push MethodDesc* passed in eax by precode and forward to the worker
+ push eax
+
+ ; NOTE: At this point the stack looks like
+ ;
+ ; esp---> saved MethodDesc of Interface method
+ ; return addr of calling function
+ ;
+ mov eax, [ecx + TransparentProxyObject___stubData]
+ call [ecx + TransparentProxyObject___stub]
+ifdef _DEBUG
+ nop ; Mark this as a special call site that can directly
+ ; call managed code
+endif
+ test eax, eax
+ jnz CtxMismatch
+ jmp _InContextTPQuickDispatchAsmStub@0
+
+CtxMismatch:
+ pop eax ; restore MethodDesc *
+ jmp _TransparentProxyStub_CrossContext@0 ; jump to slow TP stub
+_CRemotingServices__DispatchInterfaceCall@0 endp
+endif ; FEATURE_REMOTING
+
+
+;+----------------------------------------------------------------------------
+;
+; Method: CRemotingServices::CallFieldGetter private
+;
+; Synopsis: Calls the field getter function (Object::__FieldGetter) in
+; managed code by setting up the stack and calling the target
+;
+;
+;+----------------------------------------------------------------------------
+; void __stdcall CRemotingServices__CallFieldGetter(
+; MethodDesc *pMD,
+; LPVOID pThis,
+; LPVOID pFirst,
+; LPVOID pSecond,
+; LPVOID pThird)
+ifdef FEATURE_REMOTING
+CRemotingServices__CallFieldGetter proc stdcall public,
+ pMD : DWORD,
+ pThis : DWORD,
+ pFirst : DWORD,
+ pSecond : DWORD,
+ pThird : DWORD
+
+ push [pSecond] ; push the second argument on the stack
+ push [pThird] ; push the third argument on the stack
+
+ mov ecx, [pThis] ; enregister pThis, the 'this' pointer
+ mov edx, [pFirst] ; enregister pFirst, the first argument
+
+ mov eax, [pMD] ; load MethodDesc of object::__FieldGetter
+ call _TransparentProxyStub_CrossContext@0 ; call the TP stub
+
+ ret
+CRemotingServices__CallFieldGetter endp
+endif ; FEATURE_REMOTING
+
+;+----------------------------------------------------------------------------
+;
+; Method: CRemotingServices::CallFieldSetter private
+;
+; Synopsis: Calls the field setter function (Object::__FieldSetter) in
+; managed code by setting up the stack and calling the target
+;
+;
+;+----------------------------------------------------------------------------
+; void __stdcall CRemotingServices__CallFieldSetter(
+; MethodDesc *pMD,
+; LPVOID pThis,
+; LPVOID pFirst,
+; LPVOID pSecond,
+; LPVOID pThird)
+ifdef FEATURE_REMOTING
+CRemotingServices__CallFieldSetter proc stdcall public,
+ pMD : DWORD,
+ pThis : DWORD,
+ pFirst : DWORD,
+ pSecond : DWORD,
+ pThird : DWORD
+
+ push [pSecond] ; push the field name (second arg)
+ push [pThird] ; push the object (third arg) on the stack
+
+ mov ecx, [pThis] ; enregister pThis, the 'this' pointer
+ mov edx, [pFirst] ; enregister the first argument
+
+ mov eax, [pMD] ; load MethodDesc of object::__FieldGetter
+ call _TransparentProxyStub_CrossContext@0 ; call the TP stub
+
+ ret
+CRemotingServices__CallFieldSetter endp
+endif ; FEATURE_REMOTING
+
+;+----------------------------------------------------------------------------
+;
+; Method: CTPMethodTable::GenericCheckForContextMatch private
+;
+; Synopsis: Calls the stub in the TP & returns TRUE if the contexts
+; match, FALSE otherwise.
+;
+; Note: 1. Called during FieldSet/Get, used for proxy extensibility
+;
+;+----------------------------------------------------------------------------
+; BOOL __stdcall CTPMethodTable__GenericCheckForContextMatch(Object* orTP)
+ifdef FEATURE_REMOTING
+CTPMethodTable__GenericCheckForContextMatch proc stdcall public uses ecx, tp : DWORD
+
+ mov ecx, [tp]
+ mov eax, [ecx + TransparentProxyObject___stubData]
+ call [ecx + TransparentProxyObject___stub]
+ifdef _DEBUG
+ nop ; Mark this as a special call site that can directly
+ ; call managed code
+endif
+ test eax, eax
+ mov eax, 0
+ setz al
+ ; NOTE: In the CheckForXXXMatch stubs (for URT ctx/ Ole32 ctx) eax is
+ ; non-zero if contexts *do not* match & zero if they do.
+ ret
+CTPMethodTable__GenericCheckForContextMatch endp
+endif ; FEATURE_REMOTING
+
+
+; void __stdcall JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
+_JIT_ProfilerEnterLeaveTailcallStub@4 proc public
+ ; this function must preserve all registers, including scratch
+ retn 4
+_JIT_ProfilerEnterLeaveTailcallStub@4 endp
+
+;
+; Used to get the current instruction pointer value
+;
+; UINT_PTR __stdcall GetCurrentIP(void);
+_GetCurrentIP@0 proc public
+ mov eax, [esp]
+ retn
+_GetCurrentIP@0 endp
+
+; LPVOID __stdcall GetCurrentSP(void);
+_GetCurrentSP@0 proc public
+ mov eax, esp
+ retn
+_GetCurrentSP@0 endp
+
+
+; void __stdcall ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID);
+_ProfileEnterNaked@4 proc public
+ push esi
+ push edi
+
+ ;
+ ; Push in reverse order the fields of ProfilePlatformSpecificData
+ ;
+ push dword ptr [esp+8] ; EIP of the managed code that we return to. -- struct ip field
+ push ebp ; Methods are always EBP framed
+ add [esp], 8 ; Skip past the return IP, straight to the stack args that were passed to our caller
+ ; Skip past saved EBP value: 4 bytes
+ ; - plus return address from caller's caller: 4 bytes
+ ;
+ ; Assuming Foo() calls Bar(), and Bar() calls ProfileEnterNake() as illustrated (stack
+ ; grows up). We want to get what Foo() passed on the stack to Bar(), so we need to pass
+ ; the return address from caller's caller which is Foo() in this example.
+ ;
+ ; ProfileEnterNaked()
+ ; Bar()
+ ; Foo()
+ ;
+ ; [ESP] is now the ESP of caller's caller pointing to the arguments to the caller.
+
+ push ecx ; -- struct ecx field
+ push edx ; -- struct edx field
+ push eax ; -- struct eax field
+ push 0 ; Create buffer space in the structure -- struct floatingPointValuePresent field
+ push 0 ; Create buffer space in the structure -- struct floatBuffer field
+ push 0 ; Create buffer space in the structure -- struct doubleBuffer2 field
+ push 0 ; Create buffer space in the structure -- struct doubleBuffer1 field
+ push 0 ; Create buffer space in the structure -- struct functionId field
+
+ mov edx, esp ; the address of the Platform structure
+ mov ecx, [esp+52]; The functionIDOrClientID parameter that was pushed to FunctionEnter
+ ; Skip past ProfilePlatformSpecificData we pushed: 40 bytes
+ ; - plus saved edi, esi : 8 bytes
+ ; - plus return address from caller: 4 bytes
+
+ call @ProfileEnter@8
+
+ add esp, 20 ; Remove buffer space
+ pop eax
+ pop edx
+ pop ecx
+ add esp, 8 ; Remove buffer space
+ pop edi
+ pop esi
+
+ retn 4
+_ProfileEnterNaked@4 endp
+
+; void __stdcall ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID);
+_ProfileLeaveNaked@4 proc public
+ push ecx ; We do not strictly need to save ECX, however
+ ; emitNoGChelper(CORINFO_HELP_PROF_FCN_LEAVE) returns true in the JITcompiler
+ push edx ; Return value may be in EAX:EDX
+
+ ;
+ ; Push in reverse order the fields of ProfilePlatformSpecificData
+ ;
+ push dword ptr [esp+8] ; EIP of the managed code that we return to. -- struct ip field
+ push ebp ; Methods are always EBP framed
+ add [esp], 8 ; Skip past the return IP, straight to the stack args that were passed to our caller
+ ; Skip past saved EBP value: 4 bytes
+ ; - plus return address from caller's caller: 4 bytes
+ ;
+ ; Assuming Foo() calls Bar(), and Bar() calls ProfileEnterNake() as illustrated (stack
+ ; grows up). We want to get what Foo() passed on the stack to Bar(), so we need to pass
+ ; the return address from caller's caller which is Foo() in this example.
+ ;
+ ; ProfileEnterNaked()
+ ; Bar()
+ ; Foo()
+ ;
+ ; [ESP] is now the ESP of caller's caller pointing to the arguments to the caller.
+
+ push ecx ; -- struct ecx field
+ push edx ; -- struct edx field
+ push eax ; -- struct eax field
+
+ ; Check if we need to save off any floating point registers
+ fstsw ax
+ and ax, 3800h ; Check the top-of-fp-stack bits
+ cmp ax, 0 ; If non-zero, we have something to save
+ jnz SaveFPReg
+
+ push 0 ; Create buffer space in the structure -- struct floatingPointValuePresent field
+ push 0 ; Create buffer space in the structure -- struct floatBuffer field
+ push 0 ; Create buffer space in the structure -- struct doubleBuffer2 field
+ push 0 ; Create buffer space in the structure -- struct doubleBuffer1 field
+ jmp Continue
+
+SaveFPReg:
+ push 1 ; mark that a float value is present -- struct floatingPointValuePresent field
+ sub esp, 4 ; Make room for the FP value
+ fst dword ptr [esp] ; Copy the FP value to the buffer as a float -- struct floatBuffer field
+ sub esp, 8 ; Make room for the FP value
+ fstp qword ptr [esp] ; Copy FP values to the buffer as a double -- struct doubleBuffer1 and doubleBuffer2 fields
+
+Continue:
+ push 0 ; Create buffer space in the structure -- struct functionId field
+
+ mov edx, esp ; the address of the Platform structure
+ mov ecx, [esp+52]; The clientData that was pushed to FunctionEnter
+ ; Skip past ProfilePlatformSpecificData we pushed: 40 bytes
+ ; - plus saved edx, ecx : 8 bytes
+ ; - plus return address from caller: 4 bytes
+
+ call @ProfileLeave@8
+
+ ;
+ ; Now see if we have to restore and floating point registers
+ ;
+
+ cmp [esp + 16], 0
+ jz NoRestore
+
+ fld qword ptr [esp + 4]
+
+NoRestore:
+
+ add esp, 20 ; Remove buffer space
+ pop eax
+ add esp, 16 ; Remove buffer space
+ pop edx
+ pop ecx
+ retn 4
+_ProfileLeaveNaked@4 endp
+
+
+; void __stdcall ProfileTailcallNaked(FunctionIDOrClientID functionIDOrClientID);
+_ProfileTailcallNaked@4 proc public
+ push ecx
+ push edx
+
+ ;
+ ; Push in reverse order the fields of ProfilePlatformSpecificData
+ ;
+ push dword ptr [esp+8] ; EIP of the managed code that we return to. -- struct ip field
+ push ebp ; Methods are always EBP framed
+ add [esp], 8 ; Skip past the return IP, straight to the stack args that were passed to our caller
+ ; Skip past saved EBP value: 4 bytes
+ ; - plus return address from caller's caller: 4 bytes
+ ;
+ ; Assuming Foo() calls Bar(), and Bar() calls ProfileEnterNake() as illustrated (stack
+ ; grows up). We want to get what Foo() passed on the stack to Bar(), so we need to pass
+ ; the return address from caller's caller which is Foo() in this example.
+ ;
+ ; ProfileEnterNaked()
+ ; Bar()
+ ; Foo()
+ ;
+ ; [ESP] is now the ESP of caller's caller pointing to the arguments to the caller.
+
+ push ecx ; -- struct ecx field
+ push edx ; -- struct edx field
+ push eax ; -- struct eax field
+ push 0 ; Create buffer space in the structure -- struct floatingPointValuePresent field
+ push 0 ; Create buffer space in the structure -- struct floatBuffer field
+ push 0 ; Create buffer space in the structure -- struct doubleBuffer2 field
+ push 0 ; Create buffer space in the structure -- struct doubleBuffer1 field
+ push 0 ; Create buffer space in the structure -- struct functionId field
+
+ mov edx, esp ; the address of the Platform structure
+ mov ecx, [esp+52]; The clientData that was pushed to FunctionEnter
+ ; Skip past ProfilePlatformSpecificData we pushed: 40 bytes
+ ; - plus saved edx, ecx : 8 bytes
+ ; - plus return address from caller: 4 bytes
+
+ call @ProfileTailcall@8
+
+ add esp, 40 ; Remove buffer space
+ pop edx
+ pop ecx
+ retn 4
+_ProfileTailcallNaked@4 endp
+
+;==========================================================================
+; Invoked for vararg forward P/Invoke calls as a stub.
+; Except for secret return buffer, arguments come on the stack so EDX is available as scratch.
+; EAX - the NDirectMethodDesc
+; ECX - may be return buffer address
+; [ESP + 4] - the VASigCookie
+;
+_VarargPInvokeStub@0 proc public
+ ; EDX <- VASigCookie
+ mov edx, [esp + 4] ; skip retaddr
+
+ mov edx, [edx + VASigCookie__StubOffset]
+ test edx, edx
+
+ jz GoCallVarargWorker
+ ; ---------------------------------------
+
+ ; EAX contains MD ptr for the IL stub
+ jmp edx
+
+GoCallVarargWorker:
+ ;
+ ; MD ptr in EAX, VASigCookie ptr at [esp+4]
+ ;
+
+ STUB_PROLOG
+
+ mov esi, esp
+
+ ; save pMD
+ push eax
+
+ push eax ; pMD
+ push dword ptr [esi + 4*7] ; pVaSigCookie
+ push esi ; pTransitionBlock
+
+ call _VarargPInvokeStubWorker@12
+
+ ; restore pMD
+ pop eax
+
+ STUB_EPILOG
+
+ ; jump back to the helper - this time it won't come back here as the stub already exists
+ jmp _VarargPInvokeStub@0
+
+_VarargPInvokeStub@0 endp
+
+;==========================================================================
+; Invoked for marshaling-required unmanaged CALLI calls as a stub.
+; EAX - the unmanaged target
+; ECX, EDX - arguments
+; [ESP + 4] - the VASigCookie
+;
+_GenericPInvokeCalliHelper@0 proc public
+ ; save the target
+ push eax
+
+ ; EAX <- VASigCookie
+ mov eax, [esp + 8] ; skip target and retaddr
+
+ mov eax, [eax + VASigCookie__StubOffset]
+ test eax, eax
+
+ jz GoCallCalliWorker
+ ; ---------------------------------------
+
+ push eax
+
+ ; stack layout at this point:
+ ;
+ ; | ... |
+ ; | stack arguments | ESP + 16
+ ; +----------------------+
+ ; | VASigCookie* | ESP + 12
+ ; +----------------------+
+ ; | return address | ESP + 8
+ ; +----------------------+
+ ; | CALLI target address | ESP + 4
+ ; +----------------------+
+ ; | stub entry point | ESP + 0
+ ; ------------------------
+
+ ; remove VASigCookie from the stack
+ mov eax, [esp + 8]
+ mov [esp + 12], eax
+
+ ; move stub entry point below the RA
+ mov eax, [esp]
+ mov [esp + 8], eax
+
+ ; load EAX with the target address
+ pop eax
+ pop eax
+
+ ; stack layout at this point:
+ ;
+ ; | ... |
+ ; | stack arguments | ESP + 8
+ ; +----------------------+
+ ; | return address | ESP + 4
+ ; +----------------------+
+ ; | stub entry point | ESP + 0
+ ; ------------------------
+
+ ; CALLI target address is in EAX
+ ret
+
+GoCallCalliWorker:
+ ; the target is on the stack and will become m_Datum of PInvokeCalliFrame
+ ; call the stub generating worker
+ pop eax
+
+ ;
+ ; target ptr in EAX, VASigCookie ptr in EDX
+ ;
+
+ STUB_PROLOG
+
+ mov esi, esp
+
+ ; save target
+ push eax
+
+ push eax ; unmanaged target
+ push dword ptr [esi + 4*7] ; pVaSigCookie (first stack argument)
+ push esi ; pTransitionBlock
+
+ call _GenericPInvokeCalliStubWorker@12
+
+ ; restore target
+ pop eax
+
+ STUB_EPILOG
+
+ ; jump back to the helper - this time it won't come back here as the stub already exists
+ jmp _GenericPInvokeCalliHelper@0
+
+_GenericPInvokeCalliHelper@0 endp
+
+ifdef MDA_SUPPORTED
+
+;==========================================================================
+; Invoked from on-the-fly generated stubs when the stack imbalance MDA is
+; enabled. The common low-level work for both direct P/Invoke and unmanaged
+; delegate P/Invoke happens here. PInvokeStackImbalanceWorker is where the
+; actual imbalance check is implemented.
+; [ESP + 4] - the StackImbalanceCookie
+; [EBP + 8] - stack arguments (EBP frame pushed by the calling stub)
+;
+_PInvokeStackImbalanceHelper@0 proc public
+ ; StackImbalanceCookie to EBX
+ push ebx
+ lea ebx, [esp + 8]
+
+ push esi
+ push edi
+
+ ; copy stack args
+ mov edx, ecx
+ mov ecx, [ebx + StackImbalanceCookie__m_dwStackArgSize]
+ sub esp, ecx
+
+ shr ecx, 2
+ lea edi, [esp]
+ lea esi, [ebp + 8]
+
+ cld
+ rep movsd
+
+ ; record pre-call ESP
+ mov [ebx + StackImbalanceCookie__m_dwSavedEsp], esp
+
+ ; call the target (restore ECX in case it's a thiscall)
+ mov ecx, edx
+ call [ebx + StackImbalanceCookie__m_pTarget]
+
+ ; record post-call ESP and restore ESP to pre-pushed state
+ mov ecx, esp
+ lea esp, [ebp - SIZEOF_StackImbalanceCookie - 16] ; 4 DWORDs and the cookie have been pushed
+
+ ; save return value
+ push eax
+ push edx
+ sub esp, 12
+
+.errnz (StackImbalanceCookie__HAS_FP_RETURN_VALUE AND 00ffffffh), HAS_FP_RETURN_VALUE has changed - update asm code
+
+ ; save top of the floating point stack if the target has FP retval
+ test byte ptr [ebx + StackImbalanceCookie__m_callConv + 3], (StackImbalanceCookie__HAS_FP_RETURN_VALUE SHR 24)
+ jz noFPURetVal
+ fstp tbyte ptr [esp] ; save full 10 bytes to avoid precision loss
+noFPURetVal:
+
+ ; call PInvokeStackImbalanceWorker(StackImbalanceCookie *pSICookie, DWORD dwPostESP)
+ push ecx
+ push ebx
+ call _PInvokeStackImbalanceWorker@8
+
+ ; restore return value
+ test byte ptr [ebx + StackImbalanceCookie__m_callConv + 3], (StackImbalanceCookie__HAS_FP_RETURN_VALUE SHR 24)
+ jz noFPURetValToRestore
+ fld tbyte ptr [esp]
+noFPURetValToRestore:
+
+ add esp, 12
+ pop edx
+ pop eax
+
+ ; restore registers
+ pop edi
+ pop esi
+
+ pop ebx
+
+ ; EBP frame and original stack arguments will be removed by the caller
+ ret
+_PInvokeStackImbalanceHelper@0 endp
+
+endif ; MDA_SUPPORTED
+
+ifdef FEATURE_COMINTEROP
+
+;==========================================================================
+; This is a fast alternative to CallDescr* tailored specifically for
+; COM to CLR calls. Stack arguments don't come in a continuous buffer
+; and secret argument can be passed in EAX.
+;
+
+; extern "C" ARG_SLOT __fastcall COMToCLRDispatchHelper(
+; INT_PTR dwArgECX, ; ecx
+; INT_PTR dwArgEDX, ; edx
+; PCODE pTarget, ; [esp + 4]
+; PCODE pSecretArg, ; [esp + 8]
+; INT_PTR *pInputStack, ; [esp + c]
+; WORD wOutputStackSlots, ; [esp +10]
+; UINT16 *pOutputStackOffsets, ; [esp +14]
+; Frame *pCurFrame); ; [esp +18]
+
+FASTCALL_FUNC COMToCLRDispatchHelper, 32
+
+ ; ecx: dwArgECX
+ ; edx: dwArgEDX
+
+ offset_pTarget equ 4
+ offset_pSecretArg equ 8
+ offset_pInputStack equ 0Ch
+ offset_wOutputStackSlots equ 10h
+ offset_pOutputStackOffsets equ 14h
+ offset_pCurFrame equ 18h
+
+ movzx eax, word ptr [esp + offset_wOutputStackSlots]
+ test eax, eax
+ jnz CopyStackArgs
+
+ ; There are no stack args to copy and ECX and EDX are already setup
+ ; with the correct arguments for the callee, so we just have to
+ ; push the CPFH and make the call.
+
+ PUSH_CPFH_FOR_COM eax, esp, offset_pCurFrame ; trashes eax
+
+ mov eax, [esp + offset_pSecretArg + CPFH_STACK_SIZE]
+ call [esp + offset_pTarget + CPFH_STACK_SIZE]
+ifdef _DEBUG
+ nop ; This is a tag that we use in an assert.
+endif
+
+ POP_CPFH_FOR_COM ecx ; trashes ecx
+
+ ret 18h
+
+
+CopyStackArgs:
+ ; eax: num stack slots
+ ; ecx: dwArgECX
+ ; edx: dwArgEDX
+
+ push ebp
+ mov ebp, esp
+ push ebx
+ push esi
+ push edi
+
+ ebpFrame_adjust equ 4h
+ ebp_offset_pCurFrame equ ebpFrame_adjust + offset_pCurFrame
+
+ PUSH_CPFH_FOR_COM ebx, ebp, ebp_offset_pCurFrame ; trashes ebx
+
+ mov edi, [ebp + ebpFrame_adjust + offset_pOutputStackOffsets]
+ mov esi, [ebp + ebpFrame_adjust + offset_pInputStack]
+
+ ; eax: num stack slots
+ ; ecx: dwArgECX
+ ; edx: dwArgEDX
+ ; edi: pOutputStackOffsets
+ ; esi: pInputStack
+
+CopyStackLoop:
+ dec eax
+ movzx ebx, word ptr [edi + 2 * eax] ; ebx <- input stack offset
+ push [esi + ebx] ; stack <- value on the input stack
+ jnz CopyStackLoop
+
+ ; ECX and EDX are setup with the correct arguments for the callee,
+ ; and we've copied the stack arguments over as well, so now it's
+ ; time to make the call.
+
+ mov eax, [ebp + ebpFrame_adjust + offset_pSecretArg]
+ call [ebp + ebpFrame_adjust + offset_pTarget]
+ifdef _DEBUG
+ nop ; This is a tag that we use in an assert.
+endif
+
+ POP_CPFH_FOR_COM ecx ; trashes ecx
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+
+ ret 18h
+
+FASTCALL_ENDFUNC
+
+endif ; FEATURE_COMINTEROP
+
+ifndef FEATURE_CORECLR
+
+;==========================================================================
+; This is small stub whose purpose is to record current stack pointer and
+; call CopyCtorCallStubWorker to invoke copy constructors and destructors
+; as appropriate. This stub operates on arguments already pushed to the
+; stack by JITted IL stub and must not create a new frame, i.e. it must tail
+; call to the target for it to see the arguments that copy ctors have been
+; called on.
+;
+_CopyCtorCallStub@0 proc public
+ ; there may be an argument in ecx - save it
+ push ecx
+
+ ; push pointer to arguments
+ lea edx, [esp + 8]
+ push edx
+
+ call _CopyCtorCallStubWorker@4
+
+ ; restore ecx and tail call to the target
+ pop ecx
+ jmp eax
+_CopyCtorCallStub@0 endp
+
+endif ; !FEATURE_CORECLR
+
+ifdef FEATURE_PREJIT
+
+;==========================================================================
+_StubDispatchFixupStub@0 proc public
+
+ STUB_PROLOG
+
+ mov esi, esp
+
+ push 0
+ push 0
+
+ push eax ; siteAddrForRegisterIndirect (for tailcalls)
+ push esi ; pTransitionBlock
+
+ call _StubDispatchFixupWorker@16
+
+ STUB_EPILOG
+
+_StubDispatchFixupPatchLabel@0:
+public _StubDispatchFixupPatchLabel@0
+
+ ; Tailcall target
+ jmp eax
+
+ ; This will never be executed. It is just to help out stack-walking logic
+ ; which disassembles the epilog to unwind the stack.
+ ret
+
+_StubDispatchFixupStub@0 endp
+
+;==========================================================================
+_ExternalMethodFixupStub@0 proc public
+
+ pop eax ; pop off the return address to the stub
+ ; leaving the actual caller's return address on top of the stack
+
+ STUB_PROLOG
+
+ mov esi, esp
+
+ ; EAX is return address into CORCOMPILE_EXTERNAL_METHOD_THUNK. Subtract 5 to get start address.
+ sub eax, 5
+
+ push 0
+ push 0
+
+ push eax
+
+ ; pTransitionBlock
+ push esi
+
+ call _ExternalMethodFixupWorker@16
+
+ ; eax now contains replacement stub. PreStubWorker will never return
+ ; NULL (it throws an exception if stub creation fails.)
+
+ ; From here on, mustn't trash eax
+
+ STUB_EPILOG
+
+_ExternalMethodFixupPatchLabel@0:
+public _ExternalMethodFixupPatchLabel@0
+
+ ; Tailcall target
+ jmp eax
+
+ ; This will never be executed. It is just to help out stack-walking logic
+ ; which disassembles the epilog to unwind the stack.
+ ret
+
+_ExternalMethodFixupStub@0 endp
+
+ifdef FEATURE_READYTORUN
+;==========================================================================
+_DelayLoad_MethodCall@0 proc public
+
+ STUB_PROLOG_2_HIDDEN_ARGS
+
+ mov esi, esp
+
+ push ecx
+ push edx
+
+ push eax
+
+ ; pTransitionBlock
+ push esi
+
+ call _ExternalMethodFixupWorker@16
+
+ ; eax now contains replacement stub. PreStubWorker will never return
+ ; NULL (it throws an exception if stub creation fails.)
+
+ ; From here on, mustn't trash eax
+
+ STUB_EPILOG
+
+ ; Share the patch label
+ jmp _ExternalMethodFixupPatchLabel@0
+
+ ; This will never be executed. It is just to help out stack-walking logic
+ ; which disassembles the epilog to unwind the stack.
+ ret
+
+_DelayLoad_MethodCall@0 endp
+endif
+
+;=======================================================================================
+; The call in softbound vtable slots initially points to this function.
+; The pupose of this function is to transfer the control to right target and
+; to optionally patch the target of the jump so that we do not take this slow path again.
+;
+_VirtualMethodFixupStub@0 proc public
+
+ pop eax ; Pop the return address. It points right after the call instruction in the thunk.
+ sub eax,5 ; Calculate the address of the thunk
+
+ ; Push ebp frame to get good callstack under debugger
+ push ebp
+ mov ebp, esp
+
+ ; Preserve argument registers
+ push ecx
+ push edx
+
+ push eax ; address of the thunk
+ push ecx ; this ptr
+ call _VirtualMethodFixupWorker@8
+
+ ; Restore argument registers
+ pop edx
+ pop ecx
+
+ ; Pop ebp frame
+ pop ebp
+
+_VirtualMethodFixupPatchLabel@0:
+public _VirtualMethodFixupPatchLabel@0
+
+ ; Proceed to execute the actual method.
+ jmp eax
+
+ ; This will never be executed. It is just to help out stack-walking logic
+ ; which disassembles the epilog to unwind the stack.
+ ret
+
+_VirtualMethodFixupStub@0 endp
+
+endif ; FEATURE_PREJIT
+
+;==========================================================================
+; The prestub
+_ThePreStub@0 proc public
+
+ STUB_PROLOG
+
+ mov esi, esp
+
+ ; EAX contains MethodDesc* from the precode. Push it here as argument
+ ; for PreStubWorker
+ push eax
+
+ push esi
+
+ call _PreStubWorker@8
+
+ ; eax now contains replacement stub. PreStubWorker will never return
+ ; NULL (it throws an exception if stub creation fails.)
+
+ ; From here on, mustn't trash eax
+
+ STUB_EPILOG
+
+ ; Tailcall target
+ jmp eax
+
+ ; This will never be executed. It is just to help out stack-walking logic
+ ; which disassembles the epilog to unwind the stack.
+ ret
+
+_ThePreStub@0 endp
+
+; This method does nothing. It's just a fixed function for the debugger to put a breakpoint
+; on so that it can trace a call target.
+_ThePreStubPatch@0 proc public
+ ; make sure that the basic block is unique
+ test eax,34
+_ThePreStubPatchLabel@0:
+public _ThePreStubPatchLabel@0
+ ret
+_ThePreStubPatch@0 endp
+
+ifdef FEATURE_COMINTEROP
+;==========================================================================
+; CLR -> COM generic or late-bound call
+_GenericComPlusCallStub@0 proc public
+
+ STUB_PROLOG
+
+ ; pTransitionBlock
+ mov esi, esp
+
+ ; return value
+ sub esp, 8
+
+ ; save pMD
+ mov ebx, eax
+
+ push eax ; pMD
+ push esi ; pTransitionBlock
+ call _CLRToCOMWorker@8
+
+ push eax
+ call _setFPReturn@12 ; pop & set the return value
+
+ ; From here on, mustn't trash eax:edx
+
+ ; Get pComPlusCallInfo for return thunk
+ mov ecx, [ebx + ComPlusCallMethodDesc__m_pComPlusCallInfo]
+
+ STUB_EPILOG_RETURN
+
+ ; Tailcall return thunk
+ jmp [ecx + ComPlusCallInfo__m_pRetThunk]
+
+ ; This will never be executed. It is just to help out stack-walking logic
+ ; which disassembles the epilog to unwind the stack.
+ ret
+
+_GenericComPlusCallStub@0 endp
+endif ; FEATURE_COMINTEROP
+
+ifdef FEATURE_REMOTING
+_TransparentProxyStub@0 proc public
+ ; push slot passed in eax
+ push eax
+
+ ; Move into eax the stub data and call the stub
+ mov eax, [ecx + TransparentProxyObject___stubData]
+ call [ecx + TransparentProxyObject___stub]
+ifdef _DEBUG
+ nop ; Mark this as a special call site that can directly
+ ; call managed code
+endif
+ test eax, eax
+ jnz CtxMismatch2
+
+ mov eax, [ecx + TransparentProxyObject___pMT]
+
+ push ebx ; spill EBX
+
+ ; Convert the slot number into the code address
+ ; See MethodTable.h for details on vtable layout
+
+ mov ebx, [esp + 4] ; Reload the slot
+ shr ebx, ASM__VTABLE_SLOTS_PER_CHUNK_LOG2 ; indirectionSlotNumber
+
+ mov eax,[eax + ebx*4 + SIZEOF_MethodTable]
+
+ mov ebx, [esp + 4] ; use unchanged slot from above
+ and ebx, ASM__VTABLE_SLOTS_PER_CHUNK-1 ; offsetInChunk
+ mov eax, [eax + ebx*4]
+
+ ; At this point, eax contains the code address
+
+ ; Restore EBX
+ pop ebx
+
+ ; Remove the slot number from the stack
+ lea esp, [esp+4]
+
+ jmp eax
+
+ ; CONTEXT MISMATCH CASE, call out to the real proxy to dispatch
+
+CtxMismatch2:
+ pop eax ; restore MethodDesc *
+ jmp _TransparentProxyStub_CrossContext@0 ; jump to slow TP stub
+
+_TransparentProxyStub@0 endp
+
+_TransparentProxyStub_CrossContext@0 proc public
+
+ STUB_PROLOG
+
+ ; pTransitionBlock
+ mov esi, esp
+
+ ; return value
+ sub esp, 3*4 ; 64-bit return value + cb stack pop
+
+ push eax ; pMD
+ push esi ; pTransitionBlock
+ call _TransparentProxyStubWorker@8
+
+ pop ebx ; cbStackPop
+
+ push eax
+ call _setFPReturn@12 ; pop & set the return value
+
+ ; From here on, mustn't trash eax:edx
+ mov ecx, ebx ; cbStackPop
+
+ mov ebx, [esp+6*4] ; get retaddr
+ mov [esp+6*4+ecx], ebx ; put it where it belongs
+
+ STUB_EPILOG_RETURN
+
+ add esp, ecx ; pop all the args
+ ret
+
+_TransparentProxyStub_CrossContext@0 endp
+
+; This method does nothing. It's just a fixed function for the debugger to put a breakpoint
+; on so that it can trace a call target.
+_TransparentProxyStubPatch@0 proc public
+ ; make sure that the basic block is unique
+ test eax,12
+_TransparentProxyStubPatchLabel@0:
+public _TransparentProxyStubPatchLabel@0
+ ret
+_TransparentProxyStubPatch@0 endp
+
+endif ; FEATURE_REMOTING
+
+ifdef FEATURE_COMINTEROP
+;--------------------------------------------------------------------------
+; This is the code that all com call method stubs run initially.
+; Most of the real work occurs in ComStubWorker(), a C++ routine.
+; The template only does the part that absolutely has to be in assembly
+; language.
+;--------------------------------------------------------------------------
+_ComCallPreStub@0 proc public
+ pop eax ;ComCallMethodDesc*
+
+ ; push ebp-frame
+ push ebp
+ mov ebp,esp
+
+ ; save CalleeSavedRegisters
+ push ebx
+ push esi
+ push edi
+
+ push eax ; ComCallMethodDesc*
+ sub esp, 5*4 ; next, vtable, gscookie, 64-bit error return
+
+ lea edi, [esp]
+ lea esi, [esp+3*4]
+
+ push edi ; pErrorReturn
+ push esi ; pFrame
+ call _ComPreStubWorker@8
+
+ ; eax now contains replacement stub. ComStubWorker will return NULL if stub creation fails
+ cmp eax, 0
+ je nostub ; oops we could not create a stub
+
+ add esp, 6*4
+
+ ; pop CalleeSavedRegisters
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+
+ jmp eax ; Reexecute with replacement stub.
+ ; We will never get here. This "ret" is just so that code-disassembling
+ ; profilers know to stop disassembling any further
+ ret
+
+nostub:
+
+ ; Even though the ComPreStubWorker sets a 64 bit value as the error return code.
+ ; Only the lower 32 bits contain usefula data. The reason for this is that the
+ ; possible error return types are: failure HRESULT, 0 and floating point 0.
+ ; In each case, the data fits in 32 bits. Instead, we use the upper half of
+ ; the return value to store number of bytes to pop
+ mov eax, [edi]
+ mov edx, [edi+4]
+
+ add esp, 6*4
+
+ ; pop CalleeSavedRegisters
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+
+ pop ecx ; return address
+ add esp, edx ; pop bytes of the stack
+ push ecx ; return address
+
+ ; We need to deal with the case where the method is PreserveSig=true and has an 8
+ ; byte return type. There are 2 types of 8 byte return types: integer and floating point.
+ ; For integer 8 byte return types, we always return 0 in case of failure. For floating
+ ; point return types, we return the value in the floating point register. In both cases
+ ; edx should be 0.
+ xor edx, edx ; edx <-- 0
+
+ ret
+
+_ComCallPreStub@0 endp
+endif ; FEATURE_COMINTEROP
+
+ifdef FEATURE_READYTORUN
+;==========================================================================
+; Define helpers for delay loading of readytorun helpers
+
+DYNAMICHELPER macro frameFlags, suffix
+
+_DelayLoad_Helper&suffix&@0 proc public
+
+ STUB_PROLOG_2_HIDDEN_ARGS
+
+ mov esi, esp
+
+ push frameFlags
+ push ecx ; module
+ push edx ; section index
+
+ push eax ; indirection cell address.
+ push esi ; pTransitionBlock
+
+ call _DynamicHelperWorker@20
+ test eax,eax
+ jnz @F
+
+ mov eax, [esi] ; The result is stored in the argument area of the transition block
+ STUB_EPILOG_RETURN
+ ret
+
+@@:
+ STUB_EPILOG
+ jmp eax
+
+_DelayLoad_Helper&suffix&@0 endp
+
+ endm
+
+DYNAMICHELPER DynamicHelperFrameFlags_Default
+DYNAMICHELPER DynamicHelperFrameFlags_ObjectArg, _Obj
+DYNAMICHELPER <DynamicHelperFrameFlags_ObjectArg OR DynamicHelperFrameFlags_ObjectArg2>, _ObjObj
+
+endif ; FEATURE_READYTORUN
+
+ end
diff --git a/src/vm/i386/cgencpu.h b/src/vm/i386/cgencpu.h
new file mode 100644
index 0000000000..2da98821bc
--- /dev/null
+++ b/src/vm/i386/cgencpu.h
@@ -0,0 +1,573 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// CGENX86.H -
+//
+// Various helper routines for generating x86 assembly code.
+//
+// DO NOT INCLUDE THIS FILE DIRECTLY - ALWAYS USE CGENSYS.H INSTEAD
+//
+
+
+
+#ifndef _TARGET_X86_
+#error Should only include "cgenx86.h" for X86 builds
+#endif // _TARGET_X86_
+
+#ifndef __cgenx86_h__
+#define __cgenx86_h__
+
+#include "utilcode.h"
+
+// Given a return address retrieved during stackwalk,
+// this is the offset by which it should be decremented to lend somewhere in a call instruction.
+#define STACKWALK_CONTROLPC_ADJUST_OFFSET 1
+
+// preferred alignment for data
+#define DATA_ALIGNMENT 4
+
+class MethodDesc;
+class FramedMethodFrame;
+class Module;
+class ComCallMethodDesc;
+class BaseDomain;
+
+// CPU-dependent functions
+Stub * GenerateInitPInvokeFrameHelper();
+
+#ifdef MDA_SUPPORTED
+EXTERN_C void STDCALL PInvokeStackImbalanceHelper(void);
+#endif // MDA_SUPPORTED
+
+#ifndef FEATURE_CORECLR
+EXTERN_C void STDCALL CopyCtorCallStub(void);
+#endif // !FEATURE_CORECLR
+
+BOOL Runtime_Test_For_SSE2();
+
+#ifdef CROSSGEN_COMPILE
+#define GetEEFuncEntryPoint(pfn) 0x1001
+#else
+#define GetEEFuncEntryPoint(pfn) GFN_TADDR(pfn)
+#endif
+
+//**********************************************************************
+// To be used with GetSpecificCpuInfo()
+
+#define CPU_X86_FAMILY(cpuType) (((cpuType) & 0x0F00) >> 8)
+#define CPU_X86_MODEL(cpuType) (((cpuType) & 0x00F0) >> 4)
+// Stepping is masked out by GetSpecificCpuInfo()
+// #define CPU_X86_STEPPING(cpuType) (((cpuType) & 0x000F) )
+
+#define CPU_X86_USE_CMOV(cpuFeat) ((cpuFeat & 0x00008001) == 0x00008001)
+#define CPU_X86_USE_SSE2(cpuFeat) (((cpuFeat & 0x04000000) == 0x04000000) && Runtime_Test_For_SSE2())
+
+// Values for CPU_X86_FAMILY(cpuType)
+#define CPU_X86_486 4
+#define CPU_X86_PENTIUM 5
+#define CPU_X86_PENTIUM_PRO 6
+#define CPU_X86_PENTIUM_4 0xF
+
+// Values for CPU_X86_MODEL(cpuType) for CPU_X86_PENTIUM_PRO
+#define CPU_X86_MODEL_PENTIUM_PRO_BANIAS 9 // Pentium M (Mobile PPro with P4 feautres)
+
+#define COMMETHOD_PREPAD 8 // # extra bytes to allocate in addition to sizeof(ComCallMethodDesc)
+#ifdef FEATURE_COMINTEROP
+#define COMMETHOD_CALL_PRESTUB_SIZE 5 // x86: CALL(E8) xx xx xx xx
+#define COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET 1 // the offset of the call target address inside the prestub
+#endif // FEATURE_COMINTEROP
+
+#define STACK_ALIGN_SIZE 4
+
+#define JUMP_ALLOCATE_SIZE 8 // # bytes to allocate for a jump instruction
+#define BACK_TO_BACK_JUMP_ALLOCATE_SIZE 8 // # bytes to allocate for a back to back jump instruction
+
+#define HAS_COMPACT_ENTRYPOINTS 1
+
+// Needed for PInvoke inlining in ngened images
+#define HAS_NDIRECT_IMPORT_PRECODE 1
+
+#ifdef FEATURE_REMOTING
+#define HAS_REMOTING_PRECODE 1
+#endif
+#ifdef FEATURE_PREJIT
+#define HAS_FIXUP_PRECODE 1
+#define HAS_FIXUP_PRECODE_CHUNKS 1
+#endif
+
+// ThisPtrRetBufPrecode one is necessary for closed delegates over static methods with return buffer
+#define HAS_THISPTR_RETBUF_PRECODE 1
+
+#define CODE_SIZE_ALIGN 4
+#define CACHE_LINE_SIZE 32 // As per Intel Optimization Manual the cache line size is 32 bytes
+#define LOG2SLOT LOG2_PTRSIZE
+
+#define ENREGISTERED_RETURNTYPE_MAXSIZE 8
+#define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 4
+#define CALLDESCR_ARGREGS 1 // CallDescrWorker has ArgumentRegister parameter
+
+// Max size of patched TLS helpers
+#ifdef _DEBUG
+// Debug build needs extra space for last error trashing
+#define TLS_GETTER_MAX_SIZE 0x20
+#else
+#define TLS_GETTER_MAX_SIZE 0x10
+#endif
+
+//=======================================================================
+// IMPORTANT: This value is used to figure out how much to allocate
+// for a fixed array of FieldMarshaler's. That means it must be at least
+// as large as the largest FieldMarshaler subclass. This requirement
+// is guarded by an assert.
+//=======================================================================
+#define MAXFIELDMARSHALERSIZE 24
+
+//**********************************************************************
+// Parameter size
+//**********************************************************************
+
+typedef INT32 StackElemType;
+#define STACK_ELEM_SIZE sizeof(StackElemType)
+
+
+
+#include "stublinkerx86.h"
+
+
+
+// !! This expression assumes STACK_ELEM_SIZE is a power of 2.
+#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1)))
+
+
+//**********************************************************************
+// Frames
+//**********************************************************************
+//--------------------------------------------------------------------
+// This represents some of the FramedMethodFrame fields that are
+// stored at negative offsets.
+//--------------------------------------------------------------------
+typedef DPTR(struct CalleeSavedRegisters) PTR_CalleeSavedRegisters;
+struct CalleeSavedRegisters {
+ INT32 edi;
+ INT32 esi;
+ INT32 ebx;
+ INT32 ebp;
+};
+
+//--------------------------------------------------------------------
+// This represents the arguments that are stored in volatile registers.
+// This should not overlap the CalleeSavedRegisters since those are already
+// saved separately and it would be wasteful to save the same register twice.
+// If we do use a non-volatile register as an argument, then the ArgIterator
+// will probably have to communicate this back to the PromoteCallerStack
+// routine to avoid a double promotion.
+//--------------------------------------------------------------------
+#define ENUM_ARGUMENT_REGISTERS() \
+ ARGUMENT_REGISTER(ECX) \
+ ARGUMENT_REGISTER(EDX)
+
+#define ENUM_ARGUMENT_REGISTERS_BACKWARD() \
+ ARGUMENT_REGISTER(EDX) \
+ ARGUMENT_REGISTER(ECX)
+
+typedef DPTR(struct ArgumentRegisters) PTR_ArgumentRegisters;
+struct ArgumentRegisters {
+ #define ARGUMENT_REGISTER(regname) INT32 regname;
+ ENUM_ARGUMENT_REGISTERS_BACKWARD()
+ #undef ARGUMENT_REGISTER
+};
+#define NUM_ARGUMENT_REGISTERS 2
+
+#define SCRATCH_REGISTER_X86REG kEAX
+
+#define THIS_REG ECX
+#define THIS_kREG kECX
+
+#define ARGUMENT_REG1 ECX
+#define ARGUMENT_REG2 EDX
+
+// forward decl
+struct REGDISPLAY;
+typedef REGDISPLAY *PREGDISPLAY;
+
+// Sufficient context for Try/Catch restoration.
+struct EHContext {
+ INT32 Eax;
+ INT32 Ebx;
+ INT32 Ecx;
+ INT32 Edx;
+ INT32 Esi;
+ INT32 Edi;
+ INT32 Ebp;
+ INT32 Esp;
+ INT32 Eip;
+
+ void Setup(PCODE resumePC, PREGDISPLAY regs);
+ void UpdateFrame(PREGDISPLAY regs);
+
+ inline TADDR GetSP() {
+ LIMITED_METHOD_CONTRACT;
+ return (TADDR)Esp;
+ }
+ inline void SetSP(LPVOID esp) {
+ LIMITED_METHOD_CONTRACT;
+ Esp = (INT32)(size_t)esp;
+ }
+
+ inline LPVOID GetFP() {
+ LIMITED_METHOD_CONTRACT;
+ return (LPVOID)(UINT_PTR)Ebp;
+ }
+
+ inline void SetArg(LPVOID arg) {
+ LIMITED_METHOD_CONTRACT;
+ Eax = (INT32)(size_t)arg;
+ }
+
+ inline void Init()
+ {
+ LIMITED_METHOD_CONTRACT;
+ Eax = 0;
+ Ebx = 0;
+ Ecx = 0;
+ Edx = 0;
+ Esi = 0;
+ Edi = 0;
+ Ebp = 0;
+ Esp = 0;
+ Eip = 0;
+ }
+};
+
+#define ARGUMENTREGISTERS_SIZE sizeof(ArgumentRegisters)
+
+//**********************************************************************
+// Exception handling
+//**********************************************************************
+
+inline PCODE GetIP(const CONTEXT * context) {
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ return PCODE(context->Eip);
+}
+
+inline void SetIP(CONTEXT *context, PCODE eip) {
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ context->Eip = (DWORD)eip;
+}
+
+inline TADDR GetSP(const CONTEXT * context) {
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ return (TADDR)(context->Esp);
+}
+
+EXTERN_C LPVOID STDCALL GetCurrentSP();
+
+inline void SetSP(CONTEXT *context, TADDR esp) {
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ context->Esp = (DWORD)esp;
+}
+
+inline void SetFP(CONTEXT *context, TADDR ebp) {
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ context->Ebp = (INT32)ebp;
+}
+
+inline TADDR GetFP(const CONTEXT * context)
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ return (TADDR)context->Ebp;
+}
+
+// Get Rel32 destination, emit jumpStub if necessary
+inline INT32 rel32UsingJumpStub(INT32 UNALIGNED * pRel32, PCODE target, MethodDesc *pMethod = NULL, LoaderAllocator *pLoaderAllocator = NULL)
+{
+ // We do not need jump stubs on i386
+ LIMITED_METHOD_CONTRACT;
+
+ TADDR baseAddr = (TADDR)pRel32 + 4;
+ return (INT32)(target - baseAddr);
+}
+
+#ifdef FEATURE_COMINTEROP
+inline void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target)
+{
+ WRAPPER_NO_CONTRACT;
+
+ BYTE *pBuffer = (BYTE*)pCOMMethod - COMMETHOD_CALL_PRESTUB_SIZE;
+
+ pBuffer[0] = X86_INSTR_CALL_REL32; //CALLNEAR32
+ *((LPVOID*)(1+pBuffer)) = (LPVOID) (((LPBYTE)target) - (pBuffer+5));
+
+ _ASSERTE(IS_ALIGNED(pBuffer + COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET, sizeof(void*)) &&
+ *((SSIZE_T*)(pBuffer + COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET)) == ((LPBYTE)target - (LPBYTE)pCOMMethod));
+}
+#endif // FEATURE_COMINTEROP
+
+//------------------------------------------------------------------------
+WORD GetUnpatchedCodeData(LPCBYTE pAddr);
+
+//------------------------------------------------------------------------
+inline WORD GetUnpatchedOpcodeWORD(LPCBYTE pAddr)
+{
+ WRAPPER_NO_CONTRACT;
+ if (CORDebuggerAttached())
+ {
+ return GetUnpatchedCodeData(pAddr);
+ }
+ else
+ {
+ return *((WORD *)pAddr);
+ }
+}
+
+//------------------------------------------------------------------------
+inline BYTE GetUnpatchedOpcodeBYTE(LPCBYTE pAddr)
+{
+ WRAPPER_NO_CONTRACT;
+ if (CORDebuggerAttached())
+ {
+ return (BYTE) GetUnpatchedCodeData(pAddr);
+ }
+ else
+ {
+ return *pAddr;
+ }
+}
+
+ //------------------------------------------------------------------------
+// The following must be a distinguishable set of instruction sequences for
+// various stub dispatch calls.
+//
+// An x86 JIT which uses full stub dispatch must generate only
+// the following stub dispatch calls:
+//
+// (1) isCallRelativeIndirect:
+// call dword ptr [rel32] ; FF 15 ---rel32----
+// (2) isCallRelative:
+// call abc ; E8 ---rel32----
+// (3) isCallRegisterIndirect:
+// 3-byte nop ;
+// call dword ptr [eax] ; FF 10
+//
+// NOTE: You must be sure that pRetAddr is a true return address for
+// a stub dispatch call.
+
+BOOL isCallRelativeIndirect(const BYTE *pRetAddr);
+BOOL isCallRelative(const BYTE *pRetAddr);
+BOOL isCallRegisterIndirect(const BYTE *pRetAddr);
+
+inline BOOL isCallRelativeIndirect(const BYTE *pRetAddr)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ BOOL fRet = (GetUnpatchedOpcodeWORD(&pRetAddr[-6]) == X86_INSTR_CALL_IND);
+ _ASSERTE(!fRet || !isCallRelative(pRetAddr));
+ _ASSERTE(!fRet || !isCallRegisterIndirect(pRetAddr));
+ return fRet;
+}
+
+inline BOOL isCallRelative(const BYTE *pRetAddr)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ BOOL fRet = (GetUnpatchedOpcodeBYTE(&pRetAddr[-5]) == X86_INSTR_CALL_REL32);
+ _ASSERTE(!fRet || !isCallRelativeIndirect(pRetAddr));
+ _ASSERTE(!fRet || !isCallRegisterIndirect(pRetAddr));
+ return fRet;
+}
+
+inline BOOL isCallRegisterIndirect(const BYTE *pRetAddr)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ BOOL fRet = (GetUnpatchedOpcodeWORD(&pRetAddr[-5]) == X86_INSTR_NOP3_1)
+ && (GetUnpatchedOpcodeBYTE(&pRetAddr[-3]) == X86_INSTR_NOP3_3)
+ && (GetUnpatchedOpcodeWORD(&pRetAddr[-2]) == X86_INSTR_CALL_IND_EAX);
+ _ASSERTE(!fRet || !isCallRelative(pRetAddr));
+ _ASSERTE(!fRet || !isCallRelativeIndirect(pRetAddr));
+ return fRet;
+}
+
+//------------------------------------------------------------------------
+inline void emitJump(LPBYTE pBuffer, LPVOID target)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ pBuffer[0] = X86_INSTR_JMP_REL32; //JUMPNEAR32
+ *((LPVOID*)(1+pBuffer)) = (LPVOID) (((LPBYTE)target) - (pBuffer+5));
+}
+
+//------------------------------------------------------------------------
+inline void emitJumpInd(LPBYTE pBuffer, LPVOID target)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ *((WORD*)pBuffer) = X86_INSTR_JMP_IND; // 0x25FF jmp dword ptr[addr32]
+ *((LPVOID*)(2+pBuffer)) = target;
+}
+
+//------------------------------------------------------------------------
+inline PCODE isJump(PCODE pCode)
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+ return *PTR_BYTE(pCode) == X86_INSTR_JMP_REL32;
+}
+
+//------------------------------------------------------------------------
+// Given the same pBuffer that was used by emitJump this method
+// decodes the instructions and returns the jump target
+inline PCODE decodeJump(PCODE pCode)
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+ CONSISTENCY_CHECK(*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32);
+ return rel32Decode(pCode+1);
+}
+
+//
+// On IA64 back to back jumps should be separated by a nop bundle to get
+// the best performance from the hardware's branch prediction logic.
+// For all other platforms back to back jumps don't require anything special
+// That is why we have these two wrapper functions that call emitJump and decodeJump
+//
+
+//------------------------------------------------------------------------
+inline void emitBackToBackJump(LPBYTE pBuffer, LPVOID target)
+{
+ WRAPPER_NO_CONTRACT;
+ emitJump(pBuffer, target);
+}
+
+//------------------------------------------------------------------------
+inline PCODE isBackToBackJump(PCODE pBuffer)
+{
+ WRAPPER_NO_CONTRACT;
+ SUPPORTS_DAC;
+ return isJump(pBuffer);
+}
+
+//------------------------------------------------------------------------
+inline PCODE decodeBackToBackJump(PCODE pBuffer)
+{
+ WRAPPER_NO_CONTRACT;
+ SUPPORTS_DAC;
+ return decodeJump(pBuffer);
+}
+
+EXTERN_C void __stdcall setFPReturn(int fpSize, INT64 retVal);
+EXTERN_C void __stdcall getFPReturn(int fpSize, INT64 *pretval);
+
+
+// SEH info forward declarations
+
+inline BOOL IsUnmanagedValueTypeReturnedByRef(UINT sizeofvaluetype)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ // odd-sized small structures are not
+ // enregistered e.g. struct { char a,b,c; }
+ return (sizeofvaluetype > 8) ||
+ (sizeofvaluetype & (sizeofvaluetype - 1)); // check that the size is power of two
+}
+
+#include <pshpack1.h>
+DECLSPEC_ALIGN(4) struct UMEntryThunkCode
+{
+ BYTE m_alignpad[2]; // used to guarantee alignment of backpactched portion
+ BYTE m_movEAX; //MOV EAX,imm32
+ LPVOID m_uet; // pointer to start of this structure
+ BYTE m_jmp; //JMP NEAR32
+ const BYTE * m_execstub; // pointer to destination code // make sure the backpatched portion is dword aligned.
+
+ void Encode(BYTE* pTargetCode, void* pvSecretParam);
+
+ LPCBYTE GetEntryPoint() const
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ return (LPCBYTE)&m_movEAX;
+ }
+
+ static int GetEntryPointOffset()
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ return 2;
+ }
+};
+#include <poppack.h>
+
+struct HijackArgs
+{
+ DWORD FPUState[3]; // 12 bytes for FPU state (10 bytes for FP top-of-stack + 2 bytes padding)
+ DWORD Edi;
+ DWORD Esi;
+ DWORD Ebx;
+ DWORD Edx;
+ DWORD Ecx;
+ union
+ {
+ DWORD Eax;
+ size_t ReturnValue[1];
+ };
+ DWORD Ebp;
+ union
+ {
+ DWORD Eip;
+ size_t ReturnAddress;
+ };
+};
+
+// ClrFlushInstructionCache is used when we want to call FlushInstructionCache
+// for a specific architecture in the common code, but not for other architectures.
+// On IA64 ClrFlushInstructionCache calls the Kernel FlushInstructionCache function
+// to flush the instruction cache.
+// We call ClrFlushInstructionCache whenever we create or modify code in the heap.
+// Currently ClrFlushInstructionCache has no effect on X86
+//
+
+inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode)
+{
+ // FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode);
+ MemoryBarrier();
+ return TRUE;
+}
+
+#ifndef FEATURE_IMPLICIT_TLS
+//
+// JIT HELPER ALIASING FOR PORTABILITY.
+//
+// Create alias for optimized implementations of helpers provided on this platform
+//
+
+#define JIT_MonEnter JIT_MonEnterWorker
+#define JIT_MonEnterWorker JIT_MonEnterWorker
+#define JIT_MonReliableEnter JIT_MonReliableEnter
+#define JIT_MonTryEnter JIT_MonTryEnter
+#define JIT_MonExit JIT_MonExitWorker
+#define JIT_MonExitWorker JIT_MonExitWorker
+#define JIT_MonEnterStatic JIT_MonEnterStatic
+#define JIT_MonExitStatic JIT_MonExitStatic
+
+#endif
+
+// optimized static helpers generated dynamically at runtime
+// #define JIT_GetSharedGCStaticBase
+// #define JIT_GetSharedNonGCStaticBase
+// #define JIT_GetSharedGCStaticBaseNoCtor
+// #define JIT_GetSharedNonGCStaticBaseNoCtor
+
+#define JIT_ChkCastClass JIT_ChkCastClass
+#define JIT_ChkCastClassSpecial JIT_ChkCastClassSpecial
+#define JIT_IsInstanceOfClass JIT_IsInstanceOfClass
+#define JIT_ChkCastInterface JIT_ChkCastInterface
+#define JIT_IsInstanceOfInterface JIT_IsInstanceOfInterface
+#define JIT_NewCrossContext JIT_NewCrossContext
+#define JIT_Stelem_Ref JIT_Stelem_Ref
+
+#endif // __cgenx86_h__
diff --git a/src/vm/i386/cgenx86.cpp b/src/vm/i386/cgenx86.cpp
new file mode 100644
index 0000000000..ff2f2df5a3
--- /dev/null
+++ b/src/vm/i386/cgenx86.cpp
@@ -0,0 +1,2257 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// CGENX86.CPP -
+//
+// Various helper routines for generating x86 assembly code.
+//
+//
+
+// Precompiled Header
+
+#include "common.h"
+
+#include "field.h"
+#include "stublink.h"
+#include "cgensys.h"
+#include "frames.h"
+#include "excep.h"
+#include "dllimport.h"
+#include "comdelegate.h"
+#include "log.h"
+#include "security.h"
+#include "comdelegate.h"
+#include "array.h"
+#include "jitinterface.h"
+#include "codeman.h"
+#ifdef FEATURE_REMOTING
+#include "remoting.h"
+#endif
+#include "dbginterface.h"
+#include "eeprofinterfaces.h"
+#include "eeconfig.h"
+#include "asmconstants.h"
+#include "class.h"
+#include "virtualcallstub.h"
+#include "mdaassistants.h"
+#include "jitinterface.h"
+
+#ifdef FEATURE_COMINTEROP
+#include "comtoclrcall.h"
+#include "runtimecallablewrapper.h"
+#include "comcache.h"
+#include "olevariant.h"
+#endif // FEATURE_COMINTEROP
+
+#ifdef FEATURE_PREJIT
+#include "compile.h"
+#endif
+
+#include "stublink.inl"
+
+extern "C" DWORD STDCALL GetSpecificCpuTypeAsm(void);
+extern "C" DWORD STDCALL GetSpecificCpuFeaturesAsm(DWORD *pInfo);
+
+// NOTE on Frame Size C_ASSERT usage in this file
+// if the frame size changes then the stubs have to be revisited for correctness
+// kindly revist the logic and then update the constants so that the C_ASSERT will again fire
+// if someone changes the frame size. You are expected to keep this hard coded constant
+// up to date so that changes in the frame size trigger errors at compile time if the code is not altered
+
+void generate_noref_copy (unsigned nbytes, StubLinkerCPU* sl);
+
+#ifndef DACCESS_COMPILE
+
+//=============================================================================
+// Runtime test to see if the OS has enabled support for the SSE2 instructions
+//
+//
+BOOL Runtime_Test_For_SSE2()
+{
+#ifdef FEATURE_CORESYSTEM
+ return TRUE;
+#else
+
+ BOOL result = IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE);
+
+ if (result == FALSE)
+ return FALSE;
+
+ // **********************************************************************
+ // *** ***
+ // *** IMPORTANT NOTE: ***
+ // *** ***
+ // *** All of these RunningOnXXX APIs return true when ***
+ // *** the OS that you are running on is that OS or later. ***
+ // *** For example RunningOnWin2003() will return true ***
+ // *** when you are running on Win2k3, Vista, Win7 or later. ***
+ // *** ***
+ // **********************************************************************
+
+
+ // Windows 7 and later should alwys be using SSE2 instructions
+ // this is true for both for native and Wow64
+ //
+ if (RunningOnWin7())
+ return TRUE;
+
+ if (RunningInWow64())
+ {
+ // There is an issue with saving/restoring the SSE2 registers under wow64
+ // So we figure out if we are running on an impacted OS and Service Pack level
+ // See DevDiv Bugs 89587 for the wow64 bug.
+ //
+
+ _ASSERTE(ExOSInfoAvailable()); // This is always available on Vista and later
+
+ //
+ // The issue is fixed in Windows Server 2008 or Vista/SP1
+ //
+ // It is not fixed in Vista/RTM, so check for that case
+ //
+ if ((ExOSInfoRunningOnServer() == FALSE))
+ {
+ OSVERSIONINFOEX osvi;
+
+ ZeroMemory(&osvi, sizeof(OSVERSIONINFOEX));
+ osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
+ osvi.wServicePackMajor = 0;
+
+ DWORDLONG dwlConditionMask = 0;
+ VER_SET_CONDITION( dwlConditionMask, CLR_VER_SERVICEPACKMAJOR, VER_EQUAL);
+
+ if (VerifyVersionInfo(&osvi, CLR_VER_SERVICEPACKMAJOR, dwlConditionMask))
+ result = FALSE;
+ }
+ }
+
+ return result;
+#endif
+}
+
+//---------------------------------------------------------------
+// Returns the type of CPU (the value of x of x86)
+// (Please note, that it returns 6 for P5-II)
+//---------------------------------------------------------------
+void GetSpecificCpuInfo(CORINFO_CPU * cpuInfo)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ static CORINFO_CPU val = { 0, 0, 0 };
+
+ if (val.dwCPUType)
+ {
+ *cpuInfo = val;
+ return;
+ }
+
+ CORINFO_CPU tempVal;
+ tempVal.dwCPUType = GetSpecificCpuTypeAsm(); // written in ASM & doesn't participate in contracts
+ _ASSERTE(tempVal.dwCPUType);
+
+#ifdef _DEBUG
+ {
+ SO_NOT_MAINLINE_REGION();
+
+ /* Set Family+Model+Stepping string (eg., x690 for Banias, or xF30 for P4 Prescott)
+ * instead of Family only
+ */
+
+ const DWORD cpuDefault = 0xFFFFFFFF;
+ static ConfigDWORD cpuFamily;
+ DWORD configCpuFamily = cpuFamily.val_DontUse_(CLRConfig::INTERNAL_CPUFamily, cpuDefault);
+ if (configCpuFamily != cpuDefault)
+ {
+ assert((configCpuFamily & 0xFFF) == configCpuFamily);
+ tempVal.dwCPUType = (tempVal.dwCPUType & 0xFFFF0000) | configCpuFamily;
+ }
+ }
+#endif
+
+ tempVal.dwFeatures = GetSpecificCpuFeaturesAsm(&tempVal.dwExtendedFeatures); // written in ASM & doesn't participate in contracts
+
+#ifdef _DEBUG
+ {
+ SO_NOT_MAINLINE_REGION();
+
+ /* Set the 32-bit feature mask
+ */
+
+ const DWORD cpuFeaturesDefault = 0xFFFFFFFF;
+ static ConfigDWORD cpuFeatures;
+ DWORD configCpuFeatures = cpuFeatures.val_DontUse_(CLRConfig::INTERNAL_CPUFeatures, cpuFeaturesDefault);
+ if (configCpuFeatures != cpuFeaturesDefault)
+ {
+ tempVal.dwFeatures = configCpuFeatures;
+ }
+ }
+#endif
+
+ val = *cpuInfo = tempVal;
+}
+
+#endif // #ifndef DACCESS_COMPILE
+
+
+//---------------------------------------------------------------------------------------
+//
+// Initialize the EHContext using the resume PC and the REGDISPLAY. The EHContext is currently used in two
+// scenarios: to store the register state before calling an EH clause, and to retrieve the ambient SP of a
+// particular stack frame. resumePC means different things in the two scenarios. In the former case, it
+// is the IP at which we are going to resume execution when we call an EH clause. In the latter case, it
+// is just the current IP.
+//
+// Arguments:
+// resumePC - refer to the comment above
+// regs - This is the REGDISPLAY obtained from the CrawlFrame used in the stackwalk. It represents the
+// stack frame of the method containing the EH clause we are about to call. For getting the
+// ambient SP, this is the stack frame we are interested in.
+//
+
+void EHContext::Setup(PCODE resumePC, PREGDISPLAY regs)
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ // EAX ECX EDX are scratch
+ this->Esp = regs->Esp;
+ this->Ebx = *regs->pEbx;
+ this->Esi = *regs->pEsi;
+ this->Edi = *regs->pEdi;
+ this->Ebp = *regs->pEbp;
+
+ this->Eip = (ULONG)(size_t)resumePC;
+}
+
+//
+// Update the registers using new context
+//
+// This is necessary to reflect GC pointer changes during the middle of a unwind inside a
+// finally clause, because:
+// 1. GC won't see the part of stack inside try (which has thrown an exception) that is already
+// unwinded and thus GC won't update GC pointers for this portion of the stack, but rather the
+// call stack in finally.
+// 2. upon return of finally, the unwind process continues and unwinds stack based on the part
+// of stack inside try and won't see the updated values in finally.
+// As a result, we need to manually update the context using register values upon return of finally
+//
+// Note that we only update the registers for finally clause because
+// 1. For filter handlers, stack walker is able to see the whole stack (including the try part)
+// with the help of ExceptionFilterFrame as filter handlers are called in first pass
+// 2. For catch handlers, the current unwinding is already finished
+//
+void EHContext::UpdateFrame(PREGDISPLAY regs)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ // EAX ECX EDX are scratch.
+ // No need to update ESP as unwinder takes care of that for us
+
+ LOG((LF_EH, LL_INFO1000, "Updating saved EBX: *%p= %p\n", regs->pEbx, this->Ebx));
+ LOG((LF_EH, LL_INFO1000, "Updating saved ESI: *%p= %p\n", regs->pEsi, this->Esi));
+ LOG((LF_EH, LL_INFO1000, "Updating saved EDI: *%p= %p\n", regs->pEdi, this->Edi));
+ LOG((LF_EH, LL_INFO1000, "Updating saved EBP: *%p= %p\n", regs->pEbp, this->Ebp));
+
+ *regs->pEbx = this->Ebx;
+ *regs->pEsi = this->Esi;
+ *regs->pEdi = this->Edi;
+ *regs->pEbp = this->Ebp;
+}
+
+void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACT_VOID
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ HOST_NOCALLS;
+ SUPPORTS_DAC;
+ }
+ CONTRACT_END;
+
+ ENABLE_FORBID_GC_LOADER_USE_IN_THIS_SCOPE();
+
+ MethodDesc * pFunc = GetFunction();
+ _ASSERTE(pFunc != NULL);
+ UpdateRegDisplayHelper(pRD, pFunc->CbStackPop());
+
+ RETURN;
+}
+
+void TransitionFrame::UpdateRegDisplayHelper(const PREGDISPLAY pRD, UINT cbStackPop)
+{
+ CONTRACT_VOID
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ HOST_NOCALLS;
+ SUPPORTS_DAC;
+ }
+ CONTRACT_END;
+
+ CalleeSavedRegisters* regs = GetCalleeSavedRegisters();
+
+ // reset pContext; it's only valid for active (top-most) frame
+
+ pRD->pContext = NULL;
+
+ pRD->pEdi = (DWORD*) &regs->edi;
+ pRD->pEsi = (DWORD*) &regs->esi;
+ pRD->pEbx = (DWORD*) &regs->ebx;
+ pRD->pEbp = (DWORD*) &regs->ebp;
+ pRD->PCTAddr = GetReturnAddressPtr();
+ pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr);
+ pRD->Esp = (DWORD)(pRD->PCTAddr + sizeof(TADDR) + cbStackPop);
+
+ RETURN;
+}
+
+void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACT_VOID
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ HOST_NOCALLS;
+ PRECONDITION(m_MachState.isValid()); // InsureInit has been called
+ SUPPORTS_DAC;
+ }
+ CONTRACT_END;
+
+ ENABLE_FORBID_GC_LOADER_USE_IN_THIS_SCOPE();
+
+ // reset pContext; it's only valid for active (top-most) frame
+ pRD->pContext = NULL;
+
+#ifdef DACCESS_COMPILE
+
+ //
+ // In the dac case we may have gotten here
+ // without the frame being initialized, so
+ // try and initialize on the fly.
+ //
+
+ if (!m_MachState.isValid())
+ {
+ MachState unwindState;
+
+ InsureInit(false, &unwindState);
+ pRD->PCTAddr = dac_cast<TADDR>(unwindState.pRetAddr());
+ pRD->ControlPC = unwindState.GetRetAddr();
+ pRD->Esp = unwindState._esp;
+
+ // Get some special host instance memory
+ // so we have a place to point to.
+ // This host memory has no target address
+ // and so won't be looked up or used for
+ // anything else.
+ MachState* thisState = (MachState*)
+ DacAllocHostOnlyInstance(sizeof(*thisState), true);
+
+ thisState->_edi = unwindState._edi;
+ pRD->pEdi = (DWORD *)&thisState->_edi;
+ thisState->_esi = unwindState._esi;
+ pRD->pEsi = (DWORD *)&thisState->_esi;
+ thisState->_ebx = unwindState._ebx;
+ pRD->pEbx = (DWORD *)&thisState->_ebx;
+ thisState->_ebp = unwindState._ebp;
+ pRD->pEbp = (DWORD *)&thisState->_ebp;
+
+ // InsureInit always sets m_RegArgs to zero
+ // in the real code. I'm not sure exactly
+ // what should happen in the on-the-fly case,
+ // but go with what would happen from an InsureInit.
+ RETURN;
+ }
+
+#endif // #ifdef DACCESS_COMPILE
+
+ // DACCESS: The MachState pointers are kept as PTR_TADDR so
+ // the host pointers here refer to the appropriate size and
+ // these casts are not a problem.
+ pRD->pEdi = (DWORD*) m_MachState.pEdi();
+ pRD->pEsi = (DWORD*) m_MachState.pEsi();
+ pRD->pEbx = (DWORD*) m_MachState.pEbx();
+ pRD->pEbp = (DWORD*) m_MachState.pEbp();
+ pRD->PCTAddr = dac_cast<TADDR>(m_MachState.pRetAddr());
+ pRD->ControlPC = m_MachState.GetRetAddr();
+ pRD->Esp = (DWORD) m_MachState.esp();
+
+ RETURN;
+}
+
+#ifdef _DEBUG_IMPL
+// Confirm that if the machine state was not initialized, then
+// any unspilled callee saved registers did not change
+EXTERN_C MachState* STDCALL HelperMethodFrameConfirmState(HelperMethodFrame* frame, void* esiVal, void* ediVal, void* ebxVal, void* ebpVal)
+ {
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ DEBUG_ONLY;
+ }
+ CONTRACTL_END;
+
+ MachState* state = frame->MachineState();
+
+ // if we've already executed this check once for this helper method frame then
+ // we don't do the check again because it is very expensive.
+ if (frame->HaveDoneConfirmStateCheck())
+ {
+ return state;
+ }
+
+ // probe to avoid a kazillion violations in the code that follows.
+ BEGIN_DEBUG_ONLY_CODE;
+ if (!state->isValid())
+ {
+ frame->InsureInit(false, NULL);
+ _ASSERTE(state->_pEsi != &state->_esi || state->_esi == (TADDR)esiVal);
+ _ASSERTE(state->_pEdi != &state->_edi || state->_edi == (TADDR)ediVal);
+ _ASSERTE(state->_pEbx != &state->_ebx || state->_ebx == (TADDR)ebxVal);
+ _ASSERTE(state->_pEbp != &state->_ebp || state->_ebp == (TADDR)ebpVal);
+ }
+ END_DEBUG_ONLY_CODE;
+
+ // set that we have executed this check once for this helper method frame.
+ frame->SetHaveDoneConfirmStateCheck();
+
+ return state;
+}
+#endif
+
+void ExternalMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACT_VOID
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ HOST_NOCALLS;
+ SUPPORTS_DAC;
+ }
+ CONTRACT_END;
+
+ UpdateRegDisplayHelper(pRD, CbStackPopUsingGCRefMap(GetGCRefMap()));
+
+ RETURN;
+}
+
+
+void StubDispatchFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACT_VOID
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ HOST_NOCALLS;
+ SUPPORTS_DAC;
+ }
+ CONTRACT_END;
+
+ PTR_BYTE pGCRefMap = GetGCRefMap();
+ if (pGCRefMap != NULL)
+ {
+ UpdateRegDisplayHelper(pRD, CbStackPopUsingGCRefMap(pGCRefMap));
+ }
+ else
+ if (GetFunction() != NULL)
+ {
+ FramedMethodFrame::UpdateRegDisplay(pRD);
+ }
+ else
+ {
+ UpdateRegDisplayHelper(pRD, 0);
+
+ // If we do not have owning MethodDesc, we need to pretend that
+ // the call happened on the call instruction to get the ESP unwound properly.
+ //
+ // This path is hit when we are throwing null reference exception from
+ // code:VSD_ResolveWorker or code:StubDispatchFixupWorker
+ pRD->ControlPC = GetAdjustedCallAddress(pRD->ControlPC);
+ }
+
+ RETURN;
+}
+
+PCODE StubDispatchFrame::GetReturnAddress()
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ }
+ CONTRACTL_END;
+
+ PCODE retAddress = FramedMethodFrame::GetReturnAddress();
+ if (GetFunction() == NULL && GetGCRefMap() == NULL)
+ {
+ // See comment in code:StubDispatchFrame::UpdateRegDisplay
+ retAddress = GetAdjustedCallAddress(retAddress);
+ }
+ return retAddress;
+}
+
+void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACT_VOID
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ HOST_NOCALLS;
+ SUPPORTS_DAC;
+ }
+ CONTRACT_END;
+
+ CalleeSavedRegisters* regs = GetCalleeSavedRegisters();
+
+ // reset pContext; it's only valid for active (top-most) frame
+ pRD->pContext = NULL;
+
+ pRD->pEdi = (DWORD*) &regs->edi;
+ pRD->pEsi = (DWORD*) &regs->esi;
+ pRD->pEbx = (DWORD*) &regs->ebx;
+ pRD->pEbp = (DWORD*) &regs->ebp;
+ pRD->PCTAddr = GetReturnAddressPtr();
+ pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr);
+ pRD->Esp = m_Esp;
+ RETURN;
+}
+
+void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACT_VOID
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ // We should skip over InlinedCallFrame if it is not active.
+ // It will be part of a JITed method's frame, and the stack-walker
+ // can handle such a case.
+#ifdef PROFILING_SUPPORTED
+ PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this));
+#endif
+ HOST_NOCALLS;
+ MODE_ANY;
+ SUPPORTS_DAC;
+ }
+ CONTRACT_END;
+
+ // @TODO: Remove this after the debugger is fixed to avoid stack-walks from bad places
+ // @TODO: This may be still needed for sampling profilers
+ if (!InlinedCallFrame::FrameHasActiveCall(this))
+ {
+ LOG((LF_CORDB, LL_ERROR, "WARNING: InlinedCallFrame::UpdateRegDisplay called on inactive frame %p\n", this));
+ return;
+ }
+
+ DWORD stackArgSize = (DWORD) dac_cast<TADDR>(m_Datum);
+
+ if (stackArgSize & ~0xFFFF)
+ {
+ NDirectMethodDesc * pMD = PTR_NDirectMethodDesc(m_Datum);
+
+ /* if this is not an NDirect frame, something is really wrong */
+
+ _ASSERTE(pMD->SanityCheck() && pMD->IsNDirect());
+
+ stackArgSize = pMD->GetStackArgumentSize();
+ }
+
+ // reset pContext; it's only valid for active (top-most) frame
+ pRD->pContext = NULL;
+
+
+ pRD->pEbp = (DWORD*) &m_pCalleeSavedFP;
+
+ /* The return address is just above the "ESP" */
+ pRD->PCTAddr = PTR_HOST_MEMBER_TADDR(InlinedCallFrame, this,
+ m_pCallerReturnAddress);
+ pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr);
+
+ /* Now we need to pop off the outgoing arguments */
+ pRD->Esp = (DWORD) dac_cast<TADDR>(m_pCallSiteSP) + stackArgSize;
+ RETURN;
+}
+
+#ifdef FEATURE_HIJACK
+//==========================
+// Resumable Exception Frame
+//
+TADDR ResumableFrame::GetReturnAddressPtr()
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+ return dac_cast<TADDR>(m_Regs) + offsetof(CONTEXT, Eip);
+}
+
+void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACT_VOID
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ HOST_NOCALLS;
+ SUPPORTS_DAC;
+ }
+ CONTRACT_END;
+
+ // reset pContext; it's only valid for active (top-most) frame
+ pRD->pContext = NULL;
+
+ CONTEXT* pUnwoundContext = m_Regs;
+
+#if !defined(DACCESS_COMPILE)
+ // "pContextForUnwind" field is only used on X86 since not only is it initialized just for it,
+ // but its used only under the confines of STACKWALKER_MAY_POP_FRAMES preprocessor define,
+ // which is defined for x86 only (refer to its definition in stackwalk.cpp).
+ if (pRD->pContextForUnwind != NULL)
+ {
+ pUnwoundContext = pRD->pContextForUnwind;
+
+ pUnwoundContext->Eax = m_Regs->Eax;
+ pUnwoundContext->Ecx = m_Regs->Ecx;
+ pUnwoundContext->Edx = m_Regs->Edx;
+
+ pUnwoundContext->Edi = m_Regs->Edi;
+ pUnwoundContext->Esi = m_Regs->Esi;
+ pUnwoundContext->Ebx = m_Regs->Ebx;
+ pUnwoundContext->Ebp = m_Regs->Ebp;
+ pUnwoundContext->Eip = m_Regs->Eip;
+ }
+#endif // !defined(DACCESS_COMPILE)
+
+ pRD->pEax = &pUnwoundContext->Eax;
+ pRD->pEcx = &pUnwoundContext->Ecx;
+ pRD->pEdx = &pUnwoundContext->Edx;
+
+ pRD->pEdi = &pUnwoundContext->Edi;
+ pRD->pEsi = &pUnwoundContext->Esi;
+ pRD->pEbx = &pUnwoundContext->Ebx;
+ pRD->pEbp = &pUnwoundContext->Ebp;
+
+ pRD->ControlPC = pUnwoundContext->Eip;
+ pRD->PCTAddr = dac_cast<TADDR>(m_Regs) + offsetof(CONTEXT, Eip);
+
+ pRD->Esp = m_Regs->Esp;
+
+ RETURN;
+}
+
+// The HijackFrame has to know the registers that are pushed by OnHijackTripThread
+void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACTL {
+ NOTHROW;
+ GC_NOTRIGGER;
+ HOST_NOCALLS;
+ SUPPORTS_DAC;
+ }
+ CONTRACTL_END;
+
+ // This only describes the top-most frame
+ pRD->pContext = NULL;
+
+ pRD->pEdi = &m_Args->Edi;
+ pRD->pEsi = &m_Args->Esi;
+ pRD->pEbx = &m_Args->Ebx;
+ pRD->pEdx = &m_Args->Edx;
+ pRD->pEcx = &m_Args->Ecx;
+ pRD->pEax = &m_Args->Eax;
+
+ pRD->pEbp = &m_Args->Ebp;
+ pRD->PCTAddr = dac_cast<TADDR>(m_Args) + offsetof(HijackArgs, Eip);
+ pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr);
+ pRD->Esp = (DWORD)(pRD->PCTAddr + sizeof(TADDR));
+}
+
+#endif // FEATURE_HIJACK
+
+void PInvokeCalliFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACT_VOID
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ HOST_NOCALLS;
+ SUPPORTS_DAC;
+ }
+ CONTRACT_END;
+
+ VASigCookie *pVASigCookie = GetVASigCookie();
+ UpdateRegDisplayHelper(pRD, pVASigCookie->sizeOfArgs+sizeof(int));
+
+ RETURN;
+}
+
+void TailCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACT_VOID
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ HOST_NOCALLS;
+ SUPPORTS_DAC;
+ }
+ CONTRACT_END;
+
+ // reset pContext; it's only valid for active (top-most) frame
+ pRD->pContext = NULL;
+
+ pRD->pEdi = (DWORD*)&m_regs.edi;
+ pRD->pEsi = (DWORD*)&m_regs.esi;
+ pRD->pEbx = (DWORD*)&m_regs.ebx;
+ pRD->pEbp = (DWORD*)&m_regs.ebp;
+
+ pRD->PCTAddr = GetReturnAddressPtr();
+ pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr);
+ pRD->Esp = (DWORD)(pRD->PCTAddr + sizeof(TADDR));
+
+ RETURN;
+}
+
+//------------------------------------------------------------------------
+// This is declared as returning WORD instead of PRD_TYPE because of
+// header issues with cgencpu.h including dbginterface.h.
+WORD GetUnpatchedCodeData(LPCBYTE pAddr)
+{
+#ifndef _TARGET_X86_
+#error Make sure this works before porting to platforms other than x86.
+#endif
+ CONTRACT(WORD) {
+ NOTHROW;
+ GC_NOTRIGGER;
+ PRECONDITION(CORDebuggerAttached());
+ PRECONDITION(CheckPointer(pAddr));
+ SO_TOLERANT;
+ } CONTRACT_END;
+
+ // Ordering is because x86 is little-endien.
+ BYTE bLow = pAddr[0];
+ BYTE bHigh = pAddr[1];
+
+#ifndef DACCESS_COMPILE
+ // Need to make sure that the code we're reading is free of breakpoint patches.
+ PRD_TYPE unpatchedOpcode;
+ if (g_pDebugInterface->CheckGetPatchedOpcode((CORDB_ADDRESS_TYPE *)pAddr,
+ &unpatchedOpcode))
+ {
+ // PRD_TYPE is supposed to be an opaque debugger structure representing data to remove a patch.
+ // Although PRD_TYPE is currently typedef'ed to be a DWORD_PTR, it's actually semantically just a BYTE.
+ // (since a patch on x86 is just an 0xCC instruction).
+ // Ideally, the debugger subsystem would expose a patch-code stripper that returns BYTE/WORD/etc, and
+ // not force us to crack it ourselves here.
+ bLow = (BYTE) unpatchedOpcode;
+ }
+ //
+#endif
+
+ WORD w = bLow + (bHigh << 8);
+ RETURN w;
+}
+
+
+#ifndef DACCESS_COMPILE
+
+//-------------------------------------------------------------------------
+// One-time creation of special prestub to initialize UMEntryThunks.
+//-------------------------------------------------------------------------
+Stub *GenerateUMThunkPrestub()
+{
+ CONTRACT(Stub*)
+ {
+ STANDARD_VM_CHECK;
+ POSTCONDITION(CheckPointer(RETVAL));
+ }
+ CONTRACT_END;
+
+ CPUSTUBLINKER sl;
+ CPUSTUBLINKER *psl = &sl;
+
+ CodeLabel* rgRareLabels[] = { psl->NewCodeLabel(),
+ psl->NewCodeLabel(),
+ psl->NewCodeLabel()
+ };
+
+
+ CodeLabel* rgRejoinLabels[] = { psl->NewCodeLabel(),
+ psl->NewCodeLabel(),
+ psl->NewCodeLabel()
+ };
+
+ // emit the initial prolog
+ psl->EmitComMethodStubProlog(UMThkCallFrame::GetMethodFrameVPtr(), rgRareLabels, rgRejoinLabels, FALSE /*Don't profile*/);
+
+ // mov ecx, [esi+UMThkCallFrame.pUMEntryThunk]
+ psl->X86EmitIndexRegLoad(kECX, kESI, UMThkCallFrame::GetOffsetOfUMEntryThunk());
+
+ // The call conv is a __stdcall
+ psl->X86EmitPushReg(kECX);
+
+ // call UMEntryThunk::DoRunTimeInit
+ psl->X86EmitCall(psl->NewExternalCodeLabel((LPVOID)UMEntryThunk::DoRunTimeInit), 4);
+
+ // mov ecx, [esi+UMThkCallFrame.pUMEntryThunk]
+ psl->X86EmitIndexRegLoad(kEAX, kESI, UMThkCallFrame::GetOffsetOfUMEntryThunk());
+
+ // lea eax, [eax + UMEntryThunk.m_code] // point to fixedup UMEntryThunk
+ psl->X86EmitOp(0x8d, kEAX, kEAX,
+ UMEntryThunk::GetCodeOffset() + UMEntryThunkCode::GetEntryPointOffset());
+
+ psl->EmitComMethodStubEpilog(UMThkCallFrame::GetMethodFrameVPtr(), rgRareLabels, rgRejoinLabels, FALSE /*Don't profile*/);
+
+ RETURN psl->Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap());
+}
+
+Stub *GenerateInitPInvokeFrameHelper()
+{
+ CONTRACT(Stub*)
+ {
+ STANDARD_VM_CHECK;
+ POSTCONDITION(CheckPointer(RETVAL));
+ }
+ CONTRACT_END;
+
+ CPUSTUBLINKER sl;
+ CPUSTUBLINKER *psl = &sl;
+
+ CORINFO_EE_INFO::InlinedCallFrameInfo FrameInfo;
+ InlinedCallFrame::GetEEInfo(&FrameInfo);
+
+ // EDI contains address of the frame on stack (the frame ptr, not its negspace)
+ unsigned negSpace = FrameInfo.offsetOfFrameVptr;
+
+ // mov esi, GetThread()
+ psl->X86EmitCurrentThreadFetch(kESI, (1<<kEDI)|(1<<kEBX)|(1<<kECX)|(1<<kEDX));
+
+ // mov [edi + FrameInfo.offsetOfGSCookie], GetProcessGSCookie()
+ psl->X86EmitOffsetModRM(0xc7, (X86Reg)0x0, kEDI, FrameInfo.offsetOfGSCookie - negSpace);
+ psl->Emit32(GetProcessGSCookie());
+
+ // mov [edi + FrameInfo.offsetOfFrameVptr], InlinedCallFrame::GetFrameVtable()
+ psl->X86EmitOffsetModRM(0xc7, (X86Reg)0x0, kEDI, FrameInfo.offsetOfFrameVptr - negSpace);
+ psl->Emit32(InlinedCallFrame::GetMethodFrameVPtr());
+
+ // mov eax, [esi + offsetof(Thread, m_pFrame)]
+ // mov [edi + FrameInfo.offsetOfFrameLink], eax
+ psl->X86EmitIndexRegLoad(kEAX, kESI, offsetof(Thread, m_pFrame));
+ psl->X86EmitIndexRegStore(kEDI, FrameInfo.offsetOfFrameLink - negSpace, kEAX);
+
+ // mov [edi + FrameInfo.offsetOfCalleeSavedEbp], ebp
+ psl->X86EmitIndexRegStore(kEDI, FrameInfo.offsetOfCalleeSavedFP - negSpace, kEBP);
+
+ // mov [edi + FrameInfo.offsetOfReturnAddress], 0
+ psl->X86EmitOffsetModRM(0xc7, (X86Reg)0x0, kEDI, FrameInfo.offsetOfReturnAddress - negSpace);
+ psl->Emit32(0);
+
+ // mov [esi + offsetof(Thread, m_pFrame)], edi
+ psl->X86EmitIndexRegStore(kESI, offsetof(Thread, m_pFrame), kEDI);
+
+ // leave current Thread in ESI
+ psl->X86EmitReturn(0);
+
+ // A single process-wide stub that will never unload
+ RETURN psl->Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap());
+}
+
+#ifdef FEATURE_INCLUDE_ALL_INTERFACES
+
+static void STDCALL LeaveRuntimeHelperWithFrame (Thread *pThread, size_t target, Frame *pFrame)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ MODE_PREEMPTIVE;
+ ENTRY_POINT;
+ }
+ CONTRACTL_END;
+
+ Thread::LeaveRuntimeThrowComplus(target);
+ GCX_COOP_THREAD_EXISTS(pThread);
+ pFrame->Push(pThread);
+
+}
+
+static void STDCALL EnterRuntimeHelperWithFrame (Thread *pThread, Frame *pFrame)
+{
+ // make sure we restore the original Win32 last error before leaving this function - we are
+ // called right after returning from the P/Invoke target and the error has not been saved yet
+ BEGIN_PRESERVE_LAST_ERROR;
+
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ MODE_PREEMPTIVE;
+ ENTRY_POINT;
+ }
+ CONTRACTL_END;
+
+ {
+ HRESULT hr = Thread::EnterRuntimeNoThrow();
+ GCX_COOP_THREAD_EXISTS(pThread);
+ if (FAILED(hr))
+ {
+ INSTALL_UNWIND_AND_CONTINUE_HANDLER;
+ ThrowHR (hr);
+ UNINSTALL_UNWIND_AND_CONTINUE_HANDLER;
+ }
+
+ pFrame->Pop(pThread);
+ }
+
+ END_PRESERVE_LAST_ERROR;
+}
+
+// "ip" is the return address
+// This function disassembles the code at the return address to determine
+// how many arguments to pop off.
+// Returns the number of DWORDs that should be popped off on return.
+
+static int STDCALL GetStackSizeForVarArgCall(BYTE* ip)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ SO_TOLERANT;
+ }
+ CONTRACTL_END;
+
+ int retValue = 0;
+ //BEGIN_ENTRYPOINT_VOIDRET;
+
+ // The instruction immediately following the call may be a move into esp used for
+ // P/Invoke stack resilience. For caller-pop calls it's always mov esp, [ebp-n].
+ if (ip[0] == 0x8b)
+ {
+ if (ip[1] == 0x65)
+ {
+ // mov esp, [ebp+disp8]
+ ip += 3;
+ }
+ else if (ip[1] == 0xa5)
+ {
+ // mov esp, [ebp+disp32]
+ ip += 6;
+ }
+ }
+
+ if (ip[0] == 0x81 && ip[1] == 0xc4)
+ {
+ // add esp, imm32
+ retValue = (*(int*)&ip[2])/4;
+ }
+ else if (ip[0] == 0x83 && ip[1] == 0xc4)
+ {
+ // add esp, imm8
+ retValue = ip[2]/4;
+ }
+ else if (ip[0] == 0x59)
+ {
+ // pop ecx
+ retValue = 1;
+ }
+ else
+ {
+ retValue = 0;
+ }
+ //END_ENTRYPOINT_VOIDRET;
+ return retValue;
+}
+
+void LeaveRuntimeStackProbeOnly()
+{
+ CONTRACTL {
+ THROWS;
+ GC_TRIGGERS;
+ ENTRY_POINT;
+ }
+ CONTRACTL_END;
+
+#ifdef FEATURE_STACK_PROBE
+ RetailStackProbe(ADJUST_PROBE(DEFAULT_ENTRY_PROBE_AMOUNT));
+#endif
+}
+
+//-----------------------------------------------------------------------------
+// Hosting stub for calls from CLR code to unmanaged code
+//
+// We push a LeaveRuntimeFrame, and then re-push all the arguments.
+// Note that we have to support all the different native calling conventions
+// viz. stdcall, thiscall, cdecl, varargs
+
+#if 0
+
+This is a diagramatic description of what the stub does:
+
+ (lower addresses)
+
+ | |
+ +----------------+ <--- ESP
+ | |
+ | copied |
+ | arguments |
+ | |
+ | |
+ +----------------+
+ | EDX |
+ | ECX |
+ +----------------+
+| | | GSCookie |
+| | +----------------+ <--- ESI
+| | | vptr |
+| | +----------------+
+| | | m_Next |
+| | +----------------+
+| | | EDI | Scratch register
+| | | ESI | For LeaveRuntimeFrame*
+| | | EBX | For Thread*
+| | +----------------+ <--- EBP
+| | | EBP |
++----------------+ <---ESP +----------------+
+| ret addr | | ret addr |
++----------------+ +----------------+
+| | | |
+| arguments | | arguments |
+| | | |
+| | | |
++----------------+ +----------------+
+| | | |
+| caller's frame | | caller's frame |
+| | | |
+
+ (higher addresses)
+
+ Stack on entry Stack before the call
+ to this stub. to unmanaged code.
+
+#endif
+
+//-----------------------------------------------------------------------------
+// This the layout of the frame of the stub
+
+struct StubForHostStackFrame
+{
+ LPVOID m_outgingArgs[1];
+ ArgumentRegisters m_argumentRegisters;
+ GSCookie m_gsCookie;
+ LeaveRuntimeFrame m_LeaveRuntimeFrame;
+ CalleeSavedRegisters m_calleeSavedRegisters;
+ LPVOID m_retAddr;
+ LPVOID m_incomingArgs[1];
+
+public:
+
+ // Where does the FP/EBP point to?
+ static INT32 GetFPpositionOffset()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return offsetof(StubForHostStackFrame, m_calleeSavedRegisters) +
+ offsetof(CalleeSavedRegisters, ebp);
+ }
+
+ static INT32 GetFPrelOffsOfArgumentRegisters()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return offsetof(StubForHostStackFrame, m_argumentRegisters) - GetFPpositionOffset();
+ }
+
+ static INT32 GetFPrelOffsOfCalleeSavedRegisters()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return offsetof(StubForHostStackFrame, m_calleeSavedRegisters) - GetFPpositionOffset();
+ }
+
+ static INT32 GetFPrelOffsOfRetAddr()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return offsetof(StubForHostStackFrame, m_retAddr) - GetFPpositionOffset();
+ }
+
+ static INT32 GetFPrelOffsOfIncomingArgs()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return offsetof(StubForHostStackFrame, m_incomingArgs) - GetFPpositionOffset();
+ }
+};
+
+static Stub *GenerateStubForHostWorker(LoaderHeap *pHeap,
+ LPVOID pNativeTarget, // NULL to fetch from the last pushed argument (COM)
+ Stub *pInnerStub, // stub to call instead of pNativeTarget, or NULL
+ LONG dwComSlot, // only valid if pNativeTarget is NULL
+ WORD wStackArgumentSize, // -1 for varargs
+ WORD wStackPopSize) // 0 for cdecl
+{
+ STANDARD_VM_CONTRACT;
+
+ // We need to call LeaveRuntime before the target, and EnterRuntime after the target
+ CPUSTUBLINKER sl;
+
+ sl.X86EmitPushEBPframe();
+
+ // save EBX, ESI, EDI
+ sl.X86EmitPushReg(kEBX);
+ sl.X86EmitPushReg(kESI);
+ sl.X86EmitPushReg(kEDI);
+
+ // Frame
+ sl.X86EmitPushReg(kDummyPushReg); // m_Next
+ sl.X86EmitPushImm32((UINT)(size_t)LeaveRuntimeFrame::GetMethodFrameVPtr());
+
+ // mov esi, esp; esi is Frame
+ sl.X86EmitMovRegSP(kESI);
+
+ sl.X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
+
+ // Save outgoing arguments on the stack
+ sl.X86EmitPushReg(kECX);
+ sl.X86EmitPushReg(kEDX);
+
+ INT32 offs = 0;
+ if (wStackArgumentSize == (WORD)-1)
+ {
+ // Re-push the return address as an argument to GetStackSizeForVarArgCall()
+ // This will return the number of stack arguments (in DWORDs)
+ sl.X86EmitIndexPush(kEBP, StubForHostStackFrame::GetFPrelOffsOfRetAddr());
+ sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID)GetStackSizeForVarArgCall), 4);
+
+ // We generate the following code sequence to re-push all the arguments
+ //
+ // Note that we cannot use "sub ESP, EAX" as ESP might jump past the
+ // stack guard-page.
+ //
+ // cmp EAX, 0
+ // LoopTop:
+ // jz LoopDone
+ // push dword ptr[EBP + EAX*4 + 4]
+ // sub EAX, 1
+ // jmp LoopTop
+ // LoopDone:
+ // ...
+
+ sl.X86EmitCmpRegImm32(kEAX, 0);
+ CodeLabel * pLoopTop = sl.EmitNewCodeLabel();
+ CodeLabel * pLoopDone = sl.NewCodeLabel();
+ sl.X86EmitCondJump(pLoopDone, X86CondCode::kJZ);
+ sl.X86EmitBaseIndexPush(kEBP, kEAX, 4, StubForHostStackFrame::GetFPrelOffsOfIncomingArgs() - sizeof(LPVOID));
+ sl.X86EmitSubReg(kEAX, 1);
+ sl.X86EmitNearJump(pLoopTop);
+ sl.EmitLabel(pLoopDone);
+ }
+ else
+ {
+ offs = StubForHostStackFrame::GetFPrelOffsOfIncomingArgs() + wStackArgumentSize;
+
+ int numStackSlots = wStackArgumentSize / sizeof(LPVOID);
+ for (int i = 0; i < numStackSlots; i++) {
+ offs -= sizeof(LPVOID);
+ sl.X86EmitIndexPush(kEBP, offs);
+ }
+ }
+
+ //-------------------------------------------------------------------------
+
+ // EBX has Thread*
+ // X86TLSFetch_TRASHABLE_REGS will get trashed
+ sl.X86EmitCurrentThreadFetch(kEBX, 0);
+
+ if (pNativeTarget != NULL)
+ {
+ // push Frame
+ sl.X86EmitPushReg(kESI);
+
+ // push target
+ if (pNativeTarget == (LPVOID)-1)
+ {
+ // target comes right above arguments
+ sl.X86EmitIndexPush(kEBP, StubForHostStackFrame::GetFPrelOffsOfIncomingArgs() + wStackArgumentSize);
+ }
+ else
+ {
+ // target is fixed
+ sl.X86EmitPushImm32((UINT)(size_t)pNativeTarget);
+ }
+ }
+ else
+ {
+ // mov eax, [first_arg]
+ // mov eax, [eax]
+ // push [eax + slot_offset]
+ sl.X86EmitIndexRegLoad(kEAX, kEBP, offs);
+ sl.X86EmitIndexRegLoad(kEAX, kEAX, 0);
+ sl.X86EmitIndexPush(kEAX, sizeof(LPVOID) * dwComSlot);
+
+ // push Frame
+ sl.X86EmitPushReg(kESI);
+ // push [esp + 4]
+ sl.X86EmitEspOffset(0xff, (X86Reg)6, 4);
+ }
+
+ // push Thread
+ sl.X86EmitPushReg(kEBX);
+ sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID)LeaveRuntimeHelperWithFrame), 0xc);
+
+ //-------------------------------------------------------------------------
+ // call NDirect
+ // See diagram above to see what the stack looks like at this point
+
+ // Restore outgoing arguments
+ unsigned offsToArgRegs = StubForHostStackFrame::GetFPrelOffsOfArgumentRegisters();
+ sl.X86EmitIndexRegLoad(kECX, kEBP, offsToArgRegs + offsetof(ArgumentRegisters, ECX));
+ sl.X86EmitIndexRegLoad(kEDX, kEBP, offsToArgRegs + offsetof(ArgumentRegisters, EDX));
+
+ if (pNativeTarget != NULL || pInnerStub != NULL)
+ {
+ if (pNativeTarget == (LPVOID)-1)
+ {
+ // mov eax, target
+ sl.X86EmitIndexRegLoad(kEAX, kEBP, StubForHostStackFrame::GetFPrelOffsOfIncomingArgs() + wStackArgumentSize);
+ // call eax
+ sl.Emit16(X86_INSTR_CALL_EAX);
+ }
+ else
+ {
+ if (pNativeTarget == NULL)
+ {
+ // pop target and discard it (we go to the inner stub)
+ _ASSERTE(pInnerStub != NULL);
+ sl.X86EmitPopReg(kEAX);
+ }
+
+ LPVOID pTarget = (pInnerStub != NULL ? (LPVOID)pInnerStub->GetEntryPoint() : pNativeTarget);
+ sl.X86EmitCall(sl.NewExternalCodeLabel(pTarget), wStackPopSize / 4);
+ }
+ }
+ else
+ {
+ // pop target
+ sl.X86EmitPopReg(kEAX);
+ // call eax
+ sl.Emit16(X86_INSTR_CALL_EAX);
+ }
+
+ //-------------------------------------------------------------------------
+ // Save return value registers and call EnterRuntimeHelperWithFrame
+ //
+
+ sl.X86EmitPushReg(kEAX);
+ sl.X86EmitPushReg(kEDX);
+
+ // push Frame
+ sl.X86EmitPushReg(kESI);
+ // push Thread
+ sl.X86EmitPushReg(kEBX);
+ // call EnterRuntime
+ sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID)EnterRuntimeHelperWithFrame), 8);
+
+ sl.X86EmitPopReg(kEDX);
+ sl.X86EmitPopReg(kEAX);
+
+ //-------------------------------------------------------------------------
+ // Tear down the frame
+ //
+
+ sl.EmitCheckGSCookie(kESI, LeaveRuntimeFrame::GetOffsetOfGSCookie());
+
+ // lea esp, [ebp - offsToCalleeSavedRegs]
+ unsigned offsToCalleeSavedRegs = StubForHostStackFrame::GetFPrelOffsOfCalleeSavedRegisters();
+ sl.X86EmitIndexLea((X86Reg)kESP_Unsafe, kEBP, offsToCalleeSavedRegs);
+
+ sl.X86EmitPopReg(kEDI);
+ sl.X86EmitPopReg(kESI);
+ sl.X86EmitPopReg(kEBX);
+
+ sl.X86EmitPopReg(kEBP);
+
+ // ret [wStackPopSize]
+ sl.X86EmitReturn(wStackPopSize);
+
+ if (pInnerStub != NULL)
+ {
+ // this stub calls another stub
+ return sl.LinkInterceptor(pHeap, pInnerStub, pNativeTarget);
+ }
+ else
+ {
+ return sl.Link(pHeap);
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+Stub *NDirectMethodDesc::GenerateStubForHost(LPVOID pNativeTarget, Stub *pInnerStub)
+{
+ STANDARD_VM_CONTRACT;
+
+ // We need to call LeaveRuntime before the target, and EnterRuntime after the target
+
+ if (IsQCall())
+ {
+ // We need just the stack probe for QCalls
+ CPUSTUBLINKER sl;
+ sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID)LeaveRuntimeStackProbeOnly), 0);
+
+ sl.X86EmitNearJump(sl.NewExternalCodeLabel((LPVOID)pNativeTarget));
+
+ return sl.Link(GetLoaderAllocator()->GetStubHeap());
+ }
+
+ WORD wArgSize = (IsVarArgs() ? (WORD)-1 : GetStackArgumentSize());
+ WORD wPopSize = ((IsStdCall() || IsThisCall()) ? GetStackArgumentSize() : 0);
+
+ return GenerateStubForHostWorker(GetDomain()->GetLoaderAllocator()->GetStubHeap(),
+ pNativeTarget,
+ pInnerStub,
+ 0,
+ wArgSize,
+ wPopSize);
+}
+
+
+#ifdef FEATURE_COMINTEROP
+
+//-----------------------------------------------------------------------------
+Stub *ComPlusCallInfo::GenerateStubForHost(LoaderHeap *pHeap, Stub *pInnerStub)
+{
+ STANDARD_VM_CONTRACT;
+
+ WORD wArgSize = GetStackArgumentSize();
+
+ return GenerateStubForHostWorker(pHeap,
+ NULL,
+ pInnerStub,
+ m_cachedComSlot,
+ wArgSize,
+ wArgSize); // always stdcall
+}
+
+#endif // FEATURE_COMINTEROP
+
+//-----------------------------------------------------------------------------
+// static
+Stub *COMDelegate::GenerateStubForHost(MethodDesc *pInvokeMD, MethodDesc *pStubMD, LPVOID pNativeTarget, Stub *pInnerStub)
+{
+ STANDARD_VM_CONTRACT;
+
+ // get unmanaged calling convention from pInvokeMD's metadata
+ PInvokeStaticSigInfo sigInfo(pInvokeMD);
+ CorPinvokeMap callConv = sigInfo.GetCallConv();
+
+ WORD wArgSize = pStubMD->AsDynamicMethodDesc()->GetNativeStackArgSize();
+ WORD wPopSize = (callConv == pmCallConvCdecl ? 0 : wArgSize);
+
+ return GenerateStubForHostWorker(NULL, // we want to free this stub when the delegate dies
+ pNativeTarget,
+ pInnerStub,
+ 0,
+ wArgSize,
+ wPopSize);
+}
+
+//-----------------------------------------------------------------------------
+// static
+Stub *NDirect::GenerateStubForHost(Module *pModule, CorUnmanagedCallingConvention callConv, WORD wArgSize)
+{
+ STANDARD_VM_CONTRACT;
+
+ // This one is for unmanaged CALLI where the target is passed as last argument
+ // (first pushed to stack)
+
+ WORD wPopSize = (callConv == IMAGE_CEE_CS_CALLCONV_C ? 0 : (wArgSize + STACK_ELEM_SIZE));
+
+ return GenerateStubForHostWorker(pModule->GetDomain()->GetLoaderAllocator()->GetStubHeap(),
+ (LPVOID)-1,
+ NULL,
+ 0,
+ wArgSize,
+ wPopSize);
+}
+
+#endif // FEATURE_INCLUDE_ALL_INTERFACES
+
+
+#ifdef MDA_SUPPORTED
+
+//-----------------------------------------------------------------------------
+Stub *NDirectMethodDesc::GenerateStubForMDA(LPVOID pNativeTarget, Stub *pInnerStub, BOOL fCalledByStub)
+{
+ STANDARD_VM_CONTRACT;
+
+ CPUSTUBLINKER sl;
+ sl.X86EmitPushEBPframe();
+
+ DWORD callConv = (DWORD)(IsThisCall() ? pmCallConvThiscall : (IsStdCall() ? pmCallConvStdcall : pmCallConvCdecl));
+ _ASSERTE((callConv & StackImbalanceCookie::HAS_FP_RETURN_VALUE) == 0);
+
+ MetaSig msig(this);
+ if (msig.HasFPReturn())
+ {
+ // check for the HRESULT swapping impl flag
+ DWORD dwImplFlags;
+ IfFailThrow(GetMDImport()->GetMethodImplProps(GetMemberDef(), NULL, &dwImplFlags));
+
+ if (dwImplFlags & miPreserveSig)
+ {
+ // pass a flag to PInvokeStackImbalanceHelper that it should save & restore FPU return value
+ callConv |= StackImbalanceCookie::HAS_FP_RETURN_VALUE;
+ }
+ }
+
+ // init StackImbalanceCookie
+ sl.X86EmitPushReg(kEAX); // m_dwSavedEsp (just making space)
+ sl.X86EmitPushImm32(callConv); // m_callConv
+
+ if (IsVarArgs())
+ {
+ // Re-push the return address as an argument to GetStackSizeForVarArgCall()
+ if (fCalledByStub)
+ {
+ // We will be called by another stub that doesn't know the stack size,
+ // so we need to skip a frame to get to the managed caller.
+ sl.X86EmitIndexRegLoad(kEAX, kEBP, 0);
+ sl.X86EmitIndexPush(kEAX, 4);
+ }
+ else
+ {
+ sl.X86EmitIndexPush(kEBP, 4);
+ }
+
+ // This will return the number of stack arguments (in DWORDs)
+ sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID)GetStackSizeForVarArgCall), 4);
+
+ // shl eax,2
+ sl.Emit16(0xe0c1);
+ sl.Emit8(0x02);
+
+ sl.X86EmitPushReg(kEAX); // m_dwStackArgSize
+ }
+ else
+ {
+ sl.X86EmitPushImm32(GetStackArgumentSize()); // m_dwStackArgSize
+ }
+
+ LPVOID pTarget = (pInnerStub != NULL ? (LPVOID)pInnerStub->GetEntryPoint() : pNativeTarget);
+ sl.X86EmitPushImmPtr(pTarget); // m_pTarget
+ sl.X86EmitPushImmPtr(this); // m_pMD
+
+ // stack layout at this point
+
+ // | ... |
+ // | stack arguments | EBP + 8
+ // +-----------------------+
+ // | return address | EBP + 4
+ // +-----------------------+
+ // | saved EBP | EBP + 0
+ // +-----------------------+
+ // | SIC::m_dwSavedEsp |
+ // | SIC::m_callConv |
+ // | SIC::m_dwStackArgSize |
+ // | SIC::m_pTarget |
+ // | SIC::m_pMD | EBP - 20
+ // ------------------------
+
+ // call the helper
+ sl.X86EmitCall(sl.NewExternalCodeLabel(PInvokeStackImbalanceHelper), sizeof(StackImbalanceCookie));
+
+ // pop StackImbalanceCookie
+ sl.X86EmitMovSPReg(kEBP);
+
+ sl.X86EmitPopReg(kEBP);
+ sl.X86EmitReturn((IsStdCall() || IsThisCall()) ? GetStackArgumentSize() : 0);
+
+ if (pInnerStub)
+ {
+ return sl.LinkInterceptor(GetLoaderAllocator()->GetStubHeap(), pInnerStub, pNativeTarget);
+ }
+ else
+ {
+ return sl.Link(GetLoaderAllocator()->GetStubHeap());
+ }
+}
+
+//-----------------------------------------------------------------------------
+// static
+Stub *COMDelegate::GenerateStubForMDA(MethodDesc *pInvokeMD, MethodDesc *pStubMD, LPVOID pNativeTarget, Stub *pInnerStub)
+{
+ STANDARD_VM_CONTRACT;
+
+ WORD wStackArgSize = pStubMD->AsDynamicMethodDesc()->GetNativeStackArgSize();
+
+ // get unmanaged calling convention from pInvokeMD's metadata
+ PInvokeStaticSigInfo sigInfo(pInvokeMD);
+ DWORD callConv = (DWORD)sigInfo.GetCallConv();
+ _ASSERTE((callConv & StackImbalanceCookie::HAS_FP_RETURN_VALUE) == 0);
+
+ MetaSig msig(pInvokeMD);
+ if (msig.HasFPReturn())
+ {
+ // pass a flag to PInvokeStackImbalanceHelper that it should save & restore FPU return value
+ callConv |= StackImbalanceCookie::HAS_FP_RETURN_VALUE;
+ }
+
+ CPUSTUBLINKER sl;
+ sl.X86EmitPushEBPframe();
+
+ LPVOID pTarget = (pInnerStub != NULL ? (LPVOID)pInnerStub->GetEntryPoint() : pNativeTarget);
+
+ // init StackImbalanceCookie
+ sl.X86EmitPushReg(kEAX); // m_dwSavedEsp (just making space)
+ sl.X86EmitPushImm32(callConv); // m_callConv
+ sl.X86EmitPushImm32(wStackArgSize); // m_dwStackArgSize
+ sl.X86EmitPushImmPtr(pTarget); // m_pTarget
+ sl.X86EmitPushImmPtr(pInvokeMD); // m_pMD
+
+ // stack layout at this point
+
+ // | ... |
+ // | stack arguments | EBP + 8
+ // +-----------------------+
+ // | return address | EBP + 4
+ // +-----------------------+
+ // | saved EBP | EBP + 0
+ // +-----------------------+
+ // | SIC::m_dwSavedEsp |
+ // | SIC::m_callConv |
+ // | SIC::m_dwStackArgSize |
+ // | SIC::m_pTarget |
+ // | SIC::m_pMD | EBP - 20
+ // ------------------------
+
+ // call the helper
+ sl.X86EmitCall(sl.NewExternalCodeLabel(PInvokeStackImbalanceHelper), sizeof(StackImbalanceCookie));
+
+ // pop StackImbalanceCookie
+ sl.X86EmitMovSPReg(kEBP);
+
+ sl.X86EmitPopReg(kEBP);
+ sl.X86EmitReturn(callConv == pmCallConvCdecl ? 0 : wStackArgSize);
+
+ if (pInnerStub != NULL)
+ {
+ return sl.LinkInterceptor(pInnerStub, pNativeTarget);
+ }
+ else
+ {
+ return sl.Link(); // don't use loader heap as we want to be able to free the stub
+ }
+}
+
+#endif // MDA_SUPPORTED
+
+extern "C" VOID STDCALL StubRareEnableWorker(Thread *pThread)
+{
+ WRAPPER_NO_CONTRACT;
+
+ //printf("RareEnable\n");
+ pThread->RareEnablePreemptiveGC();
+}
+
+
+
+
+// Disable when calling into managed code from a place that fails via Exceptions
+extern "C" VOID STDCALL StubRareDisableTHROWWorker(Thread *pThread)
+{
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+
+ // Do not add a CONTRACT here. We haven't set up SEH. We rely
+ // on HandleThreadAbort and COMPlusThrowBoot dealing with this situation properly.
+
+ // WARNING!!!!
+ // when we start executing here, we are actually in cooperative mode. But we
+ // haven't synchronized with the barrier to reentry yet. So we are in a highly
+ // dangerous mode. If we call managed code, we will potentially be active in
+ // the GC heap, even as GC's are occuring!
+
+ // Check for ShutDown scenario. This happens only when we have initiated shutdown
+ // and someone is trying to call in after the CLR is suspended. In that case, we
+ // must either raise an unmanaged exception or return an HRESULT, depending on the
+ // expectations of our caller.
+ if (!CanRunManagedCode())
+ {
+ // DO NOT IMPROVE THIS EXCEPTION! It cannot be a managed exception. It
+ // cannot be a real exception object because we cannot execute any managed
+ // code here.
+ pThread->m_fPreemptiveGCDisabled = 0;
+ COMPlusThrowBoot(E_PROCESS_SHUTDOWN_REENTRY);
+ }
+
+ // We must do the following in this order, because otherwise we would be constructing
+ // the exception for the abort without synchronizing with the GC. Also, we have no
+ // CLR SEH set up, despite the fact that we may throw a ThreadAbortException.
+ pThread->RareDisablePreemptiveGC();
+ pThread->HandleThreadAbort();
+}
+
+// Note that this logic is copied below, in PopSEHRecords
+__declspec(naked)
+VOID __cdecl PopSEHRecords(LPVOID pTargetSP)
+{
+ // No CONTRACT possible on naked functions
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+
+ __asm{
+ mov ecx, [esp+4] ;; ecx <- pTargetSP
+ mov eax, fs:[0] ;; get current SEH record
+ poploop:
+ cmp eax, ecx
+ jge done
+ mov eax, [eax] ;; get next SEH record
+ jmp poploop
+ done:
+ mov fs:[0], eax
+ retn
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// JITInterface
+//
+//////////////////////////////////////////////////////////////////////////////
+
+/*********************************************************************/
+#ifdef EnC_SUPPORTED
+#pragma warning (disable : 4731)
+void ResumeAtJit(PCONTEXT pContext, LPVOID oldESP)
+{
+ // No CONTRACT here, because we can't run the risk of it pushing any SEH into the
+ // current method.
+
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+
+#ifdef _DEBUG
+ DWORD curESP;
+ __asm mov curESP, esp
+#endif
+
+ if (oldESP)
+ {
+ _ASSERTE(curESP < (DWORD)(size_t)oldESP);
+ // should have popped the SEH records by now as stack has been overwritten
+ _ASSERTE(GetCurrentSEHRecord() > oldESP);
+ }
+
+ // For the "push Eip, ..., ret"
+ _ASSERTE(curESP < pContext->Esp - sizeof(DWORD));
+ pContext->Esp -= sizeof(DWORD);
+
+ __asm {
+ mov ebp, pContext
+
+ // Push Eip onto the targetESP, so that the final "ret" will consume it
+ mov ecx, [ebp]CONTEXT.Esp
+ mov edx, [ebp]CONTEXT.Eip
+ mov [ecx], edx
+
+ // Restore all registers except Esp, Ebp, Eip
+ mov eax, [ebp]CONTEXT.Eax
+ mov ebx, [ebp]CONTEXT.Ebx
+ mov ecx, [ebp]CONTEXT.Ecx
+ mov edx, [ebp]CONTEXT.Edx
+ mov esi, [ebp]CONTEXT.Esi
+ mov edi, [ebp]CONTEXT.Edi
+
+ push [ebp]CONTEXT.Esp // pContext->Esp is (targetESP-sizeof(DWORD))
+ push [ebp]CONTEXT.Ebp
+ pop ebp
+ pop esp
+
+ // esp is (targetESP-sizeof(DWORD)), and [esp] is the targetEIP.
+ // The ret will set eip to targetEIP and esp will be automatically
+ // incremented to targetESP
+
+ ret
+ }
+}
+#pragma warning (default : 4731)
+#endif // !EnC_SUPPORTED
+
+
+#pragma warning(push)
+#pragma warning(disable: 4035)
+DWORD getcpuid(DWORD arg, unsigned char result[16])
+{
+ LIMITED_METHOD_CONTRACT
+
+ __asm
+ {
+ push ebx
+ push esi
+ mov eax, arg
+ cpuid
+ mov esi, result
+ mov [esi+ 0], eax
+ mov [esi+ 4], ebx
+ mov [esi+ 8], ecx
+ mov [esi+12], edx
+ pop esi
+ pop ebx
+ }
+}
+
+// The following function uses Deterministic Cache Parameter leafs to determine the cache hierarchy information on Prescott & Above platforms.
+// This function takes 3 arguments:
+// Arg1 is an input to ECX. Used as index to specify which cache level to return infoformation on by CPUID.
+// Arg2 is an input to EAX. For deterministic code enumeration, we pass in 4H in arg2.
+// Arg3 is a pointer to the return buffer
+// No need to check whether or not CPUID is supported because we have already called CPUID with success to come here.
+
+DWORD getextcpuid(DWORD arg1, DWORD arg2, unsigned char result[16])
+{
+ LIMITED_METHOD_CONTRACT
+
+ __asm
+ {
+ push ebx
+ push esi
+ mov ecx, arg1
+ mov eax, arg2
+ cpuid
+ mov esi, result
+ mov [esi+ 0], eax
+ mov [esi+ 4], ebx
+ mov [esi+ 8], ecx
+ mov [esi+12], edx
+ pop esi
+ pop ebx
+ }
+}
+
+#pragma warning(pop)
+
+
+// This function returns the number of logical processors on a given physical chip. If it cannot
+// determine the number of logical cpus, or the machine is not populated uniformly with the same
+// type of processors, this function returns 1.
+DWORD GetLogicalCpuCount()
+{
+ // No CONTRACT possible because GetLogicalCpuCount uses SEH
+
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+
+ static DWORD val = 0;
+
+ // cache value for later re-use
+ if (val)
+ {
+ return val;
+ }
+
+ struct Param : DefaultCatchFilterParam
+ {
+ DWORD retVal;
+ } param;
+ param.pv = COMPLUS_EXCEPTION_EXECUTE_HANDLER;
+ param.retVal = 1;
+
+ PAL_TRY(Param *, pParam, &param)
+ {
+ unsigned char buffer[16];
+
+ DWORD maxCpuId = getcpuid(0, buffer);
+
+ if (maxCpuId < 1)
+ goto lDone;
+
+ DWORD* dwBuffer = (DWORD*)buffer;
+
+ if (dwBuffer[1] == 'uneG') {
+ if (dwBuffer[3] == 'Ieni') {
+ if (dwBuffer[2] == 'letn') { // get SMT/multicore enumeration for Intel EM64T
+
+ // TODO: Currently GetLogicalCpuCountFromOS() and GetLogicalCpuCountFallback() are broken on
+ // multi-core processor, but we never call into those two functions since we don't halve the
+ // gen0size when it's prescott and above processor. We keep the old version here for earlier
+ // generation system(Northwood based), perf data suggests on those systems, halve gen0 size
+ // still boost the performance(ex:Biztalk boosts about 17%). So on earlier systems(Northwood)
+ // based, we still go ahead and halve gen0 size. The logic in GetLogicalCpuCountFromOS()
+ // and GetLogicalCpuCountFallback() works fine for those earlier generation systems.
+ // If it's a Prescott and above processor or Multi-core, perf data suggests not to halve gen0
+ // size at all gives us overall better performance.
+ // This is going to be fixed with a new version in orcas time frame.
+
+ if( (maxCpuId > 3) && (maxCpuId < 0x80000000) )
+ goto lDone;
+
+ val = GetLogicalCpuCountFromOS(); //try to obtain HT enumeration from OS API
+ if (val )
+ {
+ pParam->retVal = val; // OS API HT enumeration successful, we are Done
+ goto lDone;
+ }
+
+ val = GetLogicalCpuCountFallback(); // OS API failed, Fallback to HT enumeration using CPUID
+ if( val )
+ pParam->retVal = val;
+ }
+ }
+ }
+lDone: ;
+ }
+ PAL_EXCEPT_FILTER(DefaultCatchFilter)
+ {
+ }
+ PAL_ENDTRY
+
+ if (val == 0)
+ {
+ val = param.retVal;
+ }
+
+ return param.retVal;
+}
+
+void UMEntryThunkCode::Encode(BYTE* pTargetCode, void* pvSecretParam)
+{
+ LIMITED_METHOD_CONTRACT;
+
+#ifdef _DEBUG
+ m_alignpad[0] = X86_INSTR_INT3;
+ m_alignpad[1] = X86_INSTR_INT3;
+#endif // _DEBUG
+ m_movEAX = X86_INSTR_MOV_EAX_IMM32;
+ m_uet = pvSecretParam;
+ m_jmp = X86_INSTR_JMP_REL32;
+ m_execstub = (BYTE*) ((pTargetCode) - (4+((BYTE*)&m_execstub)));
+
+ FlushInstructionCache(GetCurrentProcess(),GetEntryPoint(),sizeof(UMEntryThunkCode));
+}
+
+UMEntryThunk* UMEntryThunk::Decode(LPVOID pCallback)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ if (*((BYTE*)pCallback) != X86_INSTR_MOV_EAX_IMM32 ||
+ ( ((size_t)pCallback) & 3) != 2) {
+ return NULL;
+ }
+ return *(UMEntryThunk**)( 1 + (BYTE*)pCallback );
+}
+
+BOOL DoesSlotCallPrestub(PCODE pCode)
+{
+ CONTRACTL {
+ NOTHROW;
+ GC_NOTRIGGER;
+ SO_TOLERANT;
+ PRECONDITION(pCode != NULL);
+ PRECONDITION(pCode != GetPreStubEntryPoint());
+ } CONTRACTL_END;
+
+ // x86 has the following possible sequences for prestub logic:
+ // 1. slot -> temporary entrypoint -> prestub
+ // 2. slot -> precode -> prestub
+ // 3. slot -> precode -> jumprel32 (NGEN case) -> prestub
+
+#ifdef HAS_COMPACT_ENTRYPOINTS
+ if (MethodDescChunk::GetMethodDescFromCompactEntryPoint(pCode, TRUE) != NULL)
+ {
+ return TRUE;
+ }
+#endif // HAS_COMPACT_ENTRYPOINTS
+
+ if (!IS_ALIGNED(pCode, PRECODE_ALIGNMENT))
+ {
+ return FALSE;
+ }
+
+#ifdef HAS_FIXUP_PRECODE
+ if (*PTR_BYTE(pCode) == X86_INSTR_CALL_REL32)
+ {
+ // Note that call could have been patched to jmp in the meantime
+ pCode = rel32Decode(pCode+1);
+
+ // NGEN case
+ if (*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32) {
+ pCode = rel32Decode(pCode+1);
+ }
+
+ return pCode == (TADDR)PrecodeFixupThunk;
+ }
+#endif
+
+ if (*PTR_BYTE(pCode) != X86_INSTR_MOV_EAX_IMM32 ||
+ *PTR_BYTE(pCode+5) != X86_INSTR_MOV_RM_R ||
+ *PTR_BYTE(pCode+7) != X86_INSTR_JMP_REL32)
+ {
+ return FALSE;
+ }
+ pCode = rel32Decode(pCode+8);
+
+ // NGEN case
+ if (*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32) {
+ pCode = rel32Decode(pCode+1);
+ }
+
+ return pCode == GetPreStubEntryPoint();
+}
+
+//==========================================================================================
+// In NGen image, virtual slots inherited from cross-module dependencies point to jump thunks.
+// These jump thunk initially point to VirtualMethodFixupStub which transfers control here.
+// This method 'VirtualMethodFixupWorker' will patch the jump thunk to point to the actual
+// inherited method body after we have execute the precode and a stable entry point.
+//
+EXTERN_C PVOID STDCALL VirtualMethodFixupWorker(Object * pThisPtr, CORCOMPILE_VIRTUAL_IMPORT_THUNK *pThunk)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_COOPERATIVE;
+ ENTRY_POINT;
+ }
+ CONTRACTL_END;
+
+ _ASSERTE(pThisPtr != NULL);
+ VALIDATEOBJECT(pThisPtr);
+
+ MethodTable * pMT = pThisPtr->GetTrueMethodTable();
+
+ WORD slotNumber = pThunk->slotNum;
+ _ASSERTE(slotNumber != (WORD)-1);
+
+ PCODE pCode = pMT->GetRestoredSlot(slotNumber);
+
+ if (!DoesSlotCallPrestub(pCode))
+ {
+ // Skip fixup precode jump for better perf
+ PCODE pDirectTarget = Precode::TryToSkipFixupPrecode(pCode);
+ if (pDirectTarget != NULL)
+ pCode = pDirectTarget;
+
+ INT64 oldValue = *(INT64*)pThunk;
+ BYTE* pOldValue = (BYTE*)&oldValue;
+
+ if (pOldValue[0] == X86_INSTR_CALL_REL32)
+ {
+ INT64 newValue = oldValue;
+ BYTE* pNewValue = (BYTE*)&newValue;
+ pNewValue[0] = X86_INSTR_JMP_REL32;
+
+ INT_PTR pcRelOffset = (BYTE*)pCode - &pThunk->callJmp[5];
+ *(INT32 *)(&pNewValue[1]) = (INT32) pcRelOffset;
+
+ _ASSERTE(IS_ALIGNED(pThunk, sizeof(INT64)));
+ if (EnsureWritableExecutablePagesNoThrow(pThunk, sizeof(INT64)))
+ FastInterlockCompareExchangeLong((INT64*)pThunk, newValue, oldValue);
+
+ FlushInstructionCache(GetCurrentProcess(), pThunk, 8);
+ }
+ }
+
+ return PVOID(pCode);
+}
+
+
+#ifdef FEATURE_READYTORUN
+
+//
+// Allocation of dynamic helpers
+//
+
+#define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR)
+
+#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
+ SIZE_T cb = size; \
+ SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \
+ BYTE * pStart = (BYTE *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \
+ BYTE * p = pStart;
+
+#define END_DYNAMIC_HELPER_EMIT() \
+ _ASSERTE(pStart + cb == p); \
+ while (p < pStart + cbAligned) *p++ = X86_INSTR_INT3; \
+ ClrFlushInstructionCache(pStart, cbAligned); \
+ return (PCODE)pStart
+
+PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
+{
+ STANDARD_VM_CONTRACT;
+
+ BEGIN_DYNAMIC_HELPER_EMIT(10);
+
+ *p++ = 0xB9; // mov ecx, XXXXXX
+ *(INT32 *)p = (INT32)arg;
+ p += 4;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target);
+ p += 4;
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+void DynamicHelpers::EmitHelperWithArg(BYTE*& p, LoaderAllocator * pAllocator, TADDR arg, PCODE target)
+{
+ CONTRACTL
+ {
+ GC_NOTRIGGER;
+ PRECONDITION(p != NULL && target != NULL);
+ }
+ CONTRACTL_END;
+
+ // Move an an argument into the second argument register and jump to a target function.
+
+ *p++ = 0xBA; // mov edx, XXXXXX
+ *(INT32 *)p = (INT32)arg;
+ p += 4;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target);
+ p += 4;
+}
+
+PCODE DynamicHelpers::CreateHelperWithArg(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(10);
+
+ EmitHelperWithArg(p, pAllocator, arg, target);
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(15);
+
+ *p++ = 0xB9; // mov ecx, XXXXXX
+ *(INT32 *)p = (INT32)arg;
+ p += 4;
+
+ *p++ = 0xBA; // mov edx, XXXXXX
+ *(INT32 *)p = (INT32)arg2;
+ p += 4;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target);
+ p += 4;
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateHelperArgMove(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(12);
+
+ *(UINT16 *)p = 0xD18B; // mov edx, ecx
+ p += 2;
+
+ *p++ = 0xB9; // mov ecx, XXXXXX
+ *(INT32 *)p = (INT32)arg;
+ p += 4;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target);
+ p += 4;
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateReturn(LoaderAllocator * pAllocator)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(1);
+
+ *p++ = 0xC3; // ret
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateReturnConst(LoaderAllocator * pAllocator, TADDR arg)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(6);
+
+ *p++ = 0xB8; // mov eax, XXXXXX
+ *(INT32 *)p = (INT32)arg;
+ p += 4;
+
+ *p++ = 0xC3; // ret
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateReturnIndirConst(LoaderAllocator * pAllocator, TADDR arg, INT8 offset)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT((offset != 0) ? 9 : 6);
+
+ *p++ = 0xA1; // mov eax, [XXXXXX]
+ *(INT32 *)p = (INT32)arg;
+ p += 4;
+
+ if (offset != 0)
+ {
+ // add eax, <offset>
+ *p++ = 0x83;
+ *p++ = 0xC0;
+ *p++ = offset;
+ }
+
+ *p++ = 0xC3; // ret
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(12);
+
+ // pop eax
+ *p++ = 0x58;
+
+ // push arg
+ *p++ = 0x68;
+ *(INT32 *)p = arg;
+ p += 4;
+
+ // push eax
+ *p++ = 0x50;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target);
+ p += 4;
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(17);
+
+ // pop eax
+ *p++ = 0x58;
+
+ // push arg
+ *p++ = 0x68;
+ *(INT32 *)p = arg;
+ p += 4;
+
+ // push arg2
+ *p++ = 0x68;
+ *(INT32 *)p = arg2;
+ p += 4;
+
+ // push eax
+ *p++ = 0x50;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target);
+ p += 4;
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, CORINFO_RUNTIME_LOOKUP * pLookup, DWORD dictionaryIndexAndSlot, Module * pModule)
+{
+ STANDARD_VM_CONTRACT;
+
+ PCODE helperAddress = (pLookup->helper == CORINFO_HELP_RUNTIMEHANDLE_METHOD ?
+ GetEEFuncEntryPoint(JIT_GenericHandleMethodWithSlotAndModule) :
+ GetEEFuncEntryPoint(JIT_GenericHandleClassWithSlotAndModule));
+
+ GenericHandleArgs * pArgs = (GenericHandleArgs *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(sizeof(GenericHandleArgs), DYNAMIC_HELPER_ALIGNMENT);
+ pArgs->dictionaryIndexAndSlot = dictionaryIndexAndSlot;
+ pArgs->signature = pLookup->signature;
+ pArgs->module = (CORINFO_MODULE_HANDLE)pModule;
+
+ // It's available only via the run-time helper function
+ if (pLookup->indirections == CORINFO_USEHELPER)
+ {
+ BEGIN_DYNAMIC_HELPER_EMIT(10);
+
+ // ecx contains the generic context parameter
+ // mov edx,pArgs
+ // jmp helperAddress
+ EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress);
+
+ END_DYNAMIC_HELPER_EMIT();
+ }
+ else
+ {
+ int indirectionsSize = 0;
+ for (WORD i = 0; i < pLookup->indirections; i++)
+ indirectionsSize += (pLookup->offsets[i] >= 0x80 ? 6 : 3);
+
+ int codeSize = indirectionsSize + (pLookup->testForNull ? 21 : 3);
+
+ BEGIN_DYNAMIC_HELPER_EMIT(codeSize);
+
+ if (pLookup->testForNull)
+ {
+ // ecx contains the generic context parameter. Save a copy of it in the eax register
+ // mov eax,ecx
+ *(UINT16*)p = 0xc889; p += 2;
+ }
+
+ for (WORD i = 0; i < pLookup->indirections; i++)
+ {
+ // mov ecx,qword ptr [ecx+offset]
+ if (pLookup->offsets[i] >= 0x80)
+ {
+ *(UINT16*)p = 0x898b; p += 2;
+ *(UINT32*)p = (UINT32)pLookup->offsets[i]; p += 4;
+ }
+ else
+ {
+ *(UINT16*)p = 0x498b; p += 2;
+ *p++ = (BYTE)pLookup->offsets[i];
+ }
+ }
+
+ // No null test required
+ if (!pLookup->testForNull)
+ {
+ // No fixups needed for R2R
+
+ // mov eax,ecx
+ *(UINT16*)p = 0xc889; p += 2;
+ *p++ = 0xC3; // ret
+ }
+ else
+ {
+ // ecx contains the value of the dictionary slot entry
+
+ _ASSERTE(pLookup->indirections != 0);
+
+ // test ecx,ecx
+ *(UINT16*)p = 0xc985; p += 2;
+
+ // je 'HELPER_CALL' (a jump of 3 bytes)
+ *(UINT16*)p = 0x0374; p += 2;
+
+ // mov eax,ecx
+ *(UINT16*)p = 0xc889; p += 2;
+ *p++ = 0xC3; // ret
+
+ // 'HELPER_CALL'
+ {
+ // Put the generic context back into rcx (was previously saved in eax)
+ // mov ecx,eax
+ *(UINT16*)p = 0xc189; p += 2;
+
+ // mov edx,pArgs
+ // jmp helperAddress
+ EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress);
+ }
+ }
+
+ END_DYNAMIC_HELPER_EMIT();
+ }
+}
+
+#endif // FEATURE_READYTORUN
+
+
+#endif // DACCESS_COMPILE
diff --git a/src/vm/i386/excepcpu.h b/src/vm/i386/excepcpu.h
new file mode 100644
index 0000000000..3f2f0810a7
--- /dev/null
+++ b/src/vm/i386/excepcpu.h
@@ -0,0 +1,87 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+//
+// EXCEPX86.H -
+//
+// This header file is optionally included from Excep.h if the target platform is x86
+//
+
+
+#ifndef __excepx86_h__
+#define __excepx86_h__
+
+#include "corerror.h" // HResults for the COM+ Runtime
+
+#include "../dlls/mscorrc/resource.h"
+
+#define THROW_CONTROL_FOR_THREAD_FUNCTION ThrowControlForThread
+
+#define STATUS_CLR_GCCOVER_CODE STATUS_PRIVILEGED_INSTRUCTION
+
+class Thread;
+
+#if defined(_MSC_VER)
+#pragma warning(disable:4733) // Inline asm assigning to `FS:0` : handler not registered as safe handler
+ // Actually, the handler getting set is properly registered
+#endif
+
+#define INSTALL_EXCEPTION_HANDLING_RECORD(record) \
+ { \
+ PEXCEPTION_REGISTRATION_RECORD __record = (record); \
+ _ASSERTE(__record < GetCurrentSEHRecord()); \
+ __record->Next = (PEXCEPTION_REGISTRATION_RECORD)__readfsdword(0); \
+ __writefsdword(0, (DWORD)__record); \
+ }
+
+//
+// Note: this only pops a handler from the top of the stack. It will not remove a record from the middle of the
+// chain, and I can assure you that you don't want to do that anyway.
+//
+#define UNINSTALL_EXCEPTION_HANDLING_RECORD(record) \
+ { \
+ PEXCEPTION_REGISTRATION_RECORD __record = (record); \
+ _ASSERTE(__record == GetCurrentSEHRecord()); \
+ __writefsdword(0, (DWORD)__record->Next); \
+ }
+
+// stackOverwriteBarrier is used to detect overwriting of stack which will mess up handler registration
+#if defined(_DEBUG)
+#define DECLARE_CPFH_EH_RECORD(pCurThread) \
+ FrameHandlerExRecordWithBarrier *___pExRecordWithBarrier = (FrameHandlerExRecordWithBarrier *)_alloca(sizeof(FrameHandlerExRecordWithBarrier)); \
+ for (int ___i =0; ___i < STACK_OVERWRITE_BARRIER_SIZE; ___i++) \
+ ___pExRecordWithBarrier->m_StackOverwriteBarrier[___i] = STACK_OVERWRITE_BARRIER_VALUE; \
+ FrameHandlerExRecord *___pExRecord = &(___pExRecordWithBarrier->m_ExRecord); \
+ ___pExRecord->m_ExReg.Handler = (PEXCEPTION_ROUTINE)COMPlusFrameHandler; \
+ ___pExRecord->m_pEntryFrame = (pCurThread)->GetFrame();
+
+#else
+#define DECLARE_CPFH_EH_RECORD(pCurThread) \
+ FrameHandlerExRecord *___pExRecord = (FrameHandlerExRecord *)_alloca(sizeof(FrameHandlerExRecord)); \
+ ___pExRecord->m_ExReg.Handler = (PEXCEPTION_ROUTINE)COMPlusFrameHandler; \
+ ___pExRecord->m_pEntryFrame = (pCurThread)->GetFrame();
+
+#endif
+
+//
+// Retrieves the redirected CONTEXT* from the stack frame of one of the
+// RedirectedHandledJITCaseForXXX_Stub's.
+//
+PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(CONTEXT * pContext);
+
+PEXCEPTION_REGISTRATION_RECORD GetCurrentSEHRecord();
+PEXCEPTION_REGISTRATION_RECORD GetFirstCOMPlusSEHRecord(Thread*);
+
+// Determine the address of the instruction that made the current call.
+inline
+PCODE GetAdjustedCallAddress(PCODE returnAddress)
+{
+ LIMITED_METHOD_CONTRACT;
+ return returnAddress - 5;
+}
+
+BOOL AdjustContextForVirtualStub(EXCEPTION_RECORD *pExceptionRecord, CONTEXT *pContext);
+
+#endif // __excepx86_h__
diff --git a/src/vm/i386/excepx86.cpp b/src/vm/i386/excepx86.cpp
new file mode 100644
index 0000000000..27c923b749
--- /dev/null
+++ b/src/vm/i386/excepx86.cpp
@@ -0,0 +1,3734 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+//
+
+/* EXCEP.CPP:
+ *
+ */
+#include "common.h"
+
+#include "frames.h"
+#include "excep.h"
+#include "object.h"
+#include "field.h"
+#include "dbginterface.h"
+#include "cgensys.h"
+#include "comutilnative.h"
+#include "sigformat.h"
+#include "siginfo.hpp"
+#include "gc.h"
+#include "eedbginterfaceimpl.h" //so we can clearexception in COMPlusThrow
+#include "perfcounters.h"
+#include "eventtrace.h"
+#include "eetoprofinterfacewrapper.inl"
+#include "eedbginterfaceimpl.inl"
+#include "dllimportcallback.h"
+#include "threads.h"
+#ifdef FEATURE_REMOTING
+#include "appdomainhelper.h"
+#endif
+#include "eeconfig.h"
+#include "vars.hpp"
+#include "generics.h"
+#include "securityprincipal.h"
+
+#include "asmconstants.h"
+#include "virtualcallstub.h"
+
+MethodDesc * GetUserMethodForILStub(Thread * pThread, UINT_PTR uStubSP, MethodDesc * pILStubMD, Frame ** ppFrameOut);
+
+#if !defined(DACCESS_COMPILE)
+
+#define FORMAT_MESSAGE_BUFFER_LENGTH 1024
+
+BOOL ComPlusFrameSEH(EXCEPTION_REGISTRATION_RECORD*);
+PEXCEPTION_REGISTRATION_RECORD GetPrevSEHRecord(EXCEPTION_REGISTRATION_RECORD*);
+
+extern "C" {
+// in asmhelpers.asm:
+VOID STDCALL ResumeAtJitEHHelper(EHContext *pContext);
+int STDCALL CallJitEHFilterHelper(size_t *pShadowSP, EHContext *pContext);
+VOID STDCALL CallJitEHFinallyHelper(size_t *pShadowSP, EHContext *pContext);
+
+BOOL CallRtlUnwind(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame,
+ void *callback,
+ EXCEPTION_RECORD *pExceptionRecord,
+ void *retval);
+
+BOOL CallRtlUnwindSafe(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame,
+ void *callback,
+ EXCEPTION_RECORD *pExceptionRecord,
+ void *retval);
+}
+
+static inline BOOL
+CPFH_ShouldUnwindStack(const EXCEPTION_RECORD * pCER) {
+
+ LIMITED_METHOD_CONTRACT;
+
+ _ASSERTE(pCER != NULL);
+
+ // We can only unwind those exceptions whose context/record we don't need for a
+ // rethrow. This is complus, and stack overflow. For all the others, we
+ // need to keep the context around for a rethrow, which means they can't
+ // be unwound.
+ if (IsComPlusException(pCER) || pCER->ExceptionCode == STATUS_STACK_OVERFLOW)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+static inline BOOL IsComPlusNestedExceptionRecord(EXCEPTION_REGISTRATION_RECORD* pEHR)
+{
+ LIMITED_METHOD_CONTRACT;
+ if (pEHR->Handler == (PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler)
+ return TRUE;
+ return FALSE;
+}
+
+EXCEPTION_REGISTRATION_RECORD *TryFindNestedEstablisherFrame(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame)
+{
+ LIMITED_METHOD_CONTRACT;
+ while (pEstablisherFrame->Handler != (PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler) {
+ pEstablisherFrame = pEstablisherFrame->Next;
+ if (pEstablisherFrame == EXCEPTION_CHAIN_END) return 0;
+ }
+ return pEstablisherFrame;
+}
+
+#ifdef _DEBUG
+// stores last handler we went to in case we didn't get an endcatch and stack is
+// corrupted we can figure out who did it.
+static MethodDesc *gLastResumedExceptionFunc = NULL;
+static DWORD gLastResumedExceptionHandler = 0;
+#endif
+
+//---------------------------------------------------------------------
+// void RtlUnwindCallback()
+// call back function after global unwind, rtlunwind calls this function
+//---------------------------------------------------------------------
+static void RtlUnwindCallback()
+{
+ LIMITED_METHOD_CONTRACT;
+ _ASSERTE(!"Should never get here");
+}
+
+BOOL NExportSEH(EXCEPTION_REGISTRATION_RECORD* pEHR)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ if ((LPVOID)pEHR->Handler == (LPVOID)UMThunkPrestubHandler)
+ {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+BOOL FastNExportSEH(EXCEPTION_REGISTRATION_RECORD* pEHR)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ if ((LPVOID)pEHR->Handler == (LPVOID)FastNExportExceptHandler)
+ return TRUE;
+ return FALSE;
+}
+
+BOOL ReverseCOMSEH(EXCEPTION_REGISTRATION_RECORD* pEHR)
+{
+ LIMITED_METHOD_CONTRACT;
+
+#ifdef FEATURE_COMINTEROP
+ if ((LPVOID)pEHR->Handler == (LPVOID)COMPlusFrameHandlerRevCom)
+ return TRUE;
+#endif // FEATURE_COMINTEROP
+ return FALSE;
+}
+
+
+//
+// Returns true if the given SEH handler is one of our SEH handlers that is responsible for managing exceptions in
+// regions of managed code.
+//
+BOOL IsUnmanagedToManagedSEHHandler(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame)
+{
+ WRAPPER_NO_CONTRACT;
+
+ //
+ // ComPlusFrameSEH() is for COMPlusFrameHandler & COMPlusNestedExceptionHandler.
+ // FastNExportSEH() is for FastNExportExceptHandler.
+ // NExportSEH() is for UMThunkPrestubHandler.
+ //
+ return (ComPlusFrameSEH(pEstablisherFrame) || FastNExportSEH(pEstablisherFrame) || NExportSEH(pEstablisherFrame) || ReverseCOMSEH(pEstablisherFrame));
+}
+
+Frame *GetCurrFrame(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame)
+{
+ Frame *pFrame;
+ WRAPPER_NO_CONTRACT;
+ _ASSERTE(IsUnmanagedToManagedSEHHandler(pEstablisherFrame));
+ if (NExportSEH(pEstablisherFrame))
+ pFrame = ((ComToManagedExRecord *)pEstablisherFrame)->GetCurrFrame();
+ else
+ pFrame = ((FrameHandlerExRecord *)pEstablisherFrame)->GetCurrFrame();
+
+ _ASSERTE(GetThread() == NULL || GetThread()->GetFrame() <= pFrame);
+
+ return pFrame;
+}
+
+EXCEPTION_REGISTRATION_RECORD* GetNextCOMPlusSEHRecord(EXCEPTION_REGISTRATION_RECORD* pRec) {
+ WRAPPER_NO_CONTRACT;
+ if (pRec == EXCEPTION_CHAIN_END)
+ return EXCEPTION_CHAIN_END;
+
+ do {
+ _ASSERTE(pRec != 0);
+ pRec = pRec->Next;
+ } while (pRec != EXCEPTION_CHAIN_END && !IsUnmanagedToManagedSEHHandler(pRec));
+
+ _ASSERTE(pRec == EXCEPTION_CHAIN_END || IsUnmanagedToManagedSEHHandler(pRec));
+ return pRec;
+}
+
+
+/*
+ * GetClrSEHRecordServicingStackPointer
+ *
+ * This function searchs all the Frame SEH records, and finds the one that is
+ * currently signed up to do all exception handling for the given stack pointer
+ * on the given thread.
+ *
+ * Parameters:
+ * pThread - The thread to search on.
+ * pStackPointer - The stack location that we are finding the Frame SEH Record for.
+ *
+ * Returns
+ * A pointer to the SEH record, or EXCEPTION_CHAIN_END if none was found.
+ *
+ */
+
+PEXCEPTION_REGISTRATION_RECORD
+GetClrSEHRecordServicingStackPointer(Thread *pThread,
+ void *pStackPointer)
+{
+ ThreadExceptionState* pExState = pThread->GetExceptionState();
+
+ //
+ // We can only do this if there is a context in the pExInfo. There are cases (most notably the
+ // EEPolicy::HandleFatalError case) where we don't have that. In these cases we will return
+ // no enclosing handler since we cannot accurately determine the FS:0 entry which services
+ // this stack address.
+ //
+ // The side effect of this is that for these cases, the debugger cannot intercept
+ // the exception
+ //
+ CONTEXT* pContextRecord = pExState->GetContextRecord();
+ if (pContextRecord == NULL)
+ {
+ return EXCEPTION_CHAIN_END;
+ }
+
+ void *exceptionSP = dac_cast<PTR_VOID>(GetSP(pContextRecord));
+
+
+ //
+ // Now set the establishing frame. What this means in English is that we need to find
+ // the fs:0 entry that handles exceptions for the place on the stack given in stackPointer.
+ //
+ PEXCEPTION_REGISTRATION_RECORD pSEHRecord = GetFirstCOMPlusSEHRecord(pThread);
+
+ while (pSEHRecord != EXCEPTION_CHAIN_END)
+ {
+
+ //
+ // Skip any SEHRecord which is not a CLR record or was pushed after the exception
+ // on this thread occurred.
+ //
+ if (IsUnmanagedToManagedSEHHandler(pSEHRecord) && (exceptionSP <= (void *)pSEHRecord))
+ {
+ Frame *pFrame = GetCurrFrame(pSEHRecord);
+ //
+ // Arcane knowledge here. All Frame records are stored on the stack by the runtime
+ // in ever decreasing address space. So, we merely have to search back until
+ // we find the first frame record with a higher stack value to find the
+ // establishing frame for the given stack address.
+ //
+ if (((void *)pFrame) >= pStackPointer)
+ {
+ break;
+ }
+
+ }
+
+ pSEHRecord = GetNextCOMPlusSEHRecord(pSEHRecord);
+ }
+
+ return pSEHRecord;
+}
+
+#ifdef _DEBUG
+// We've deteremined during a stack walk that managed code is transitioning to unamanaged (EE) code. Check that the
+// state of the EH chain is correct.
+//
+// For x86, check that we do INSTALL_COMPLUS_EXCEPTION_HANDLER before calling managed code. This check should be
+// done for all managed code sites, not just transistions. But this will catch most problem cases.
+void VerifyValidTransitionFromManagedCode(Thread *pThread, CrawlFrame *pCF)
+{
+ WRAPPER_NO_CONTRACT;
+
+ _ASSERTE(ExecutionManager::IsManagedCode(GetControlPC(pCF->GetRegisterSet())));
+
+ // Cannot get to the TEB of other threads. So ignore them.
+ if (pThread != GetThread())
+ {
+ return;
+ }
+
+ // Find the EH record guarding the current region of managed code, based on the CrawlFrame passed in.
+ PEXCEPTION_REGISTRATION_RECORD pEHR = GetCurrentSEHRecord();
+
+ while ((pEHR != EXCEPTION_CHAIN_END) && ((ULONG_PTR)pEHR < GetRegdisplaySP(pCF->GetRegisterSet())))
+ {
+ pEHR = pEHR->Next;
+ }
+
+ // VerifyValidTransitionFromManagedCode can be called before the CrawlFrame's MethodDesc is initialized.
+ // Fix that if necessary for the consistency check.
+ MethodDesc * pFunction = pCF->GetFunction();
+ if ((!IsUnmanagedToManagedSEHHandler(pEHR)) && // Will the assert fire? If not, don't waste our time.
+ (pFunction == NULL))
+ {
+ _ASSERTE(pCF->GetRegisterSet());
+ PCODE ip = GetControlPC(pCF->GetRegisterSet());
+ pFunction = ExecutionManager::GetCodeMethodDesc(ip);
+ _ASSERTE(pFunction);
+ }
+
+ // Great, we've got the EH record that's next up the stack from the current SP (which is in managed code). That
+ // had better be a record for one of our handlers responsible for handling exceptions in managed code. If its
+ // not, then someone made it into managed code without setting up one of our EH handlers, and that's really
+ // bad.
+ CONSISTENCY_CHECK_MSGF(IsUnmanagedToManagedSEHHandler(pEHR),
+ ("Invalid transition into managed code!\n\n"
+ "We're walking this thread's stack and we've reached a managed frame at Esp=0x%p. "
+ "(The method is %s::%s) "
+ "The very next FS:0 record (0x%p) up from this point on the stack should be one of "
+ "our 'unmanaged to managed SEH handlers', but its not... its something else, and "
+ "that's very bad. It indicates that someone managed to call into managed code without "
+ "setting up the proper exception handling.\n\n"
+ "Get a good unmanaged stack trace for this thread. All FS:0 records are on the stack, "
+ "so you can see who installed the last handler. Somewhere between that function and "
+ "where the thread is now is where the bad transition occurred.\n\n"
+ "A little extra info: FS:0 = 0x%p, pEHR->Handler = 0x%p\n",
+ GetRegdisplaySP(pCF->GetRegisterSet()),
+ pFunction ->m_pszDebugClassName,
+ pFunction ->m_pszDebugMethodName,
+ pEHR,
+ GetCurrentSEHRecord(),
+ pEHR->Handler));
+}
+
+#endif
+
+//================================================================================
+
+// There are some things that should never be true when handling an
+// exception. This function checks for them. Will assert or trap
+// if it finds an error.
+static inline void
+CPFH_VerifyThreadIsInValidState(Thread* pThread, DWORD exceptionCode, EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame) {
+ WRAPPER_NO_CONTRACT;
+
+ if ( exceptionCode == STATUS_BREAKPOINT
+ || exceptionCode == STATUS_SINGLE_STEP) {
+ return;
+ }
+
+#ifdef _DEBUG
+ // check for overwriting of stack
+ CheckStackBarrier(pEstablisherFrame);
+ // trigger check for bad fs:0 chain
+ GetCurrentSEHRecord();
+#endif
+
+ if (!g_fEEShutDown) {
+ // An exception on the GC thread, or while holding the thread store lock, will likely lock out the entire process.
+ if (::IsGCThread() || ThreadStore::HoldingThreadStore())
+ {
+ _ASSERTE(!"Exception during garbage collection or while holding thread store");
+ EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE);
+ }
+ }
+}
+
+
+#ifdef FEATURE_HIJACK
+void
+CPFH_AdjustContextForThreadSuspensionRace(CONTEXT *pContext, Thread *pThread)
+{
+ WRAPPER_NO_CONTRACT;
+
+ PCODE f_IP = GetIP(pContext);
+ if (Thread::IsAddrOfRedirectFunc((PVOID)f_IP)) {
+
+ // This is a very rare case where we tried to redirect a thread that was
+ // just about to dispatch an exception, and our update of EIP took, but
+ // the thread continued dispatching the exception.
+ //
+ // If this should happen (very rare) then we fix it up here.
+ //
+ _ASSERTE(pThread->GetSavedRedirectContext());
+ SetIP(pContext, GetIP(pThread->GetSavedRedirectContext()));
+ STRESS_LOG1(LF_EH, LL_INFO100, "CPFH_AdjustContextForThreadSuspensionRace: Case 1 setting IP = %x\n", pContext->Eip);
+ }
+
+ if (f_IP == GetEEFuncEntryPoint(THROW_CONTROL_FOR_THREAD_FUNCTION)) {
+
+ // This is a very rare case where we tried to redirect a thread that was
+ // just about to dispatch an exception, and our update of EIP took, but
+ // the thread continued dispatching the exception.
+ //
+ // If this should happen (very rare) then we fix it up here.
+ //
+ SetIP(pContext, GetIP(pThread->m_OSContext));
+ STRESS_LOG1(LF_EH, LL_INFO100, "CPFH_AdjustContextForThreadSuspensionRace: Case 2 setting IP = %x\n", pContext->Eip);
+ }
+
+// We have another even rarer race condition:
+// - A) On thread A, Debugger puts an int 3 in the code stream at address X
+// - A) We hit it and the begin an exception. The eip will be X + 1 (int3 is special)
+// - B) Meanwhile, thread B redirects A's eip to Y. (Although A is really somewhere
+// in the kernel, it looks like it's still in user code, so it can fall under the
+// HandledJitCase and can be redirected)
+// - A) The OS, trying to be nice, expects we have a breakpoint exception at X+1,
+// but does -1 on the address since it knows int3 will leave the eip +1.
+// So the context structure it will pass to the Handler is ideally (X+1)-1 = X
+//
+// ** Here's the race: Since thread B redirected A, the eip is actually Y (not X+1),
+// but the kernel still touches it up to Y-1. So there's a window between when we hit a
+// bp and when the handler gets called that this can happen.
+// This causes an unhandled BP (since the debugger doesn't recognize the bp at Y-1)
+//
+// So what to do: If we land at Y-1 (ie, if f_IP+1 is the addr of a Redirected Func),
+// then restore the EIP back to X. This will skip the redirection.
+// Fortunately, this only occurs in cases where it's ok
+// to skip. The debugger will recognize the patch and handle it.
+
+ if (Thread::IsAddrOfRedirectFunc((PVOID)(f_IP + 1))) {
+ _ASSERTE(pThread->GetSavedRedirectContext());
+ SetIP(pContext, GetIP(pThread->GetSavedRedirectContext()) - 1);
+ STRESS_LOG1(LF_EH, LL_INFO100, "CPFH_AdjustContextForThreadSuspensionRace: Case 3 setting IP = %x\n", pContext->Eip);
+ }
+
+ if (f_IP + 1 == GetEEFuncEntryPoint(THROW_CONTROL_FOR_THREAD_FUNCTION)) {
+ SetIP(pContext, GetIP(pThread->m_OSContext) - 1);
+ STRESS_LOG1(LF_EH, LL_INFO100, "CPFH_AdjustContextForThreadSuspensionRace: Case 4 setting IP = %x\n", pContext->Eip);
+ }
+}
+#endif // FEATURE_HIJACK
+
+
+// We want to leave true null reference exceptions alone. But if we are
+// trashing memory, we don't want the application to swallow it. The 0x100
+// below will give us false positives for debugging, if the app is accessing
+// a field more than 256 bytes down an object, where the reference is null.
+//
+// Removed use of the IgnoreUnmanagedExceptions reg key...simply return false now.
+//
+static inline BOOL
+CPFH_ShouldIgnoreException(EXCEPTION_RECORD *pExceptionRecord) {
+ LIMITED_METHOD_CONTRACT;
+ return FALSE;
+}
+
+static inline void
+CPFH_UpdatePerformanceCounters() {
+ WRAPPER_NO_CONTRACT;
+ COUNTER_ONLY(GetPerfCounters().m_Excep.cThrown++);
+}
+
+
+//******************************************************************************
+EXCEPTION_DISPOSITION COMPlusAfterUnwind(
+ EXCEPTION_RECORD *pExceptionRecord,
+ EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame,
+ ThrowCallbackType& tct)
+{
+ WRAPPER_NO_CONTRACT;
+
+ // Note: we've completed the unwind pass up to the establisher frame, and we're headed off to finish our
+ // cleanup and end up back in jitted code. Any more FS0 handlers pushed from this point on out will _not_ be
+ // unwound. We go ahead and assert right here that indeed there are no handlers below the establisher frame
+ // before we go any further.
+ _ASSERTE(pEstablisherFrame == GetCurrentSEHRecord());
+
+ Thread* pThread = GetThread();
+
+ _ASSERTE(tct.pCurrentExceptionRecord == pEstablisherFrame);
+
+ NestedHandlerExRecord nestedHandlerExRecord;
+ nestedHandlerExRecord.Init((PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler, GetCurrFrame(pEstablisherFrame));
+
+ // ... and now, put the nested record back on.
+ INSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg));
+
+ // We entered COMPlusAfterUnwind in PREEMP, but we need to be in COOP from here on out
+ GCX_COOP_NO_DTOR();
+
+ tct.bIsUnwind = TRUE;
+ tct.pProfilerNotify = NULL;
+
+ LOG((LF_EH, LL_INFO100, "COMPlusFrameHandler: unwinding\n"));
+
+ tct.bUnwindStack = CPFH_ShouldUnwindStack(pExceptionRecord);
+
+ LOG((LF_EH, LL_INFO1000, "COMPlusAfterUnwind: going to: pFunc:%#X, pStack:%#X\n",
+ tct.pFunc, tct.pStack));
+
+ // TODO: UnwindFrames ends up calling into StackWalkFrames which is SO_INTOLERANT
+ // as is UnwindFrames, etc... Should we make COMPlusAfterUnwind SO_INTOLERANT???
+ ANNOTATION_VIOLATION(SOToleranceViolation);
+
+ UnwindFrames(pThread, &tct);
+
+#ifdef DEBUGGING_SUPPORTED
+ ExInfo* pExInfo = pThread->GetExceptionState()->GetCurrentExceptionTracker();
+ if (pExInfo->m_ValidInterceptionContext)
+ {
+ // By now we should have all unknown FS:[0] handlers unwinded along with the managed Frames until
+ // the interception point. We can now pop nested exception handlers and resume at interception context.
+ EHContext context = pExInfo->m_InterceptionContext;
+ pExInfo->m_InterceptionContext.Init();
+ pExInfo->m_ValidInterceptionContext = FALSE;
+
+ UnwindExceptionTrackerAndResumeInInterceptionFrame(pExInfo, &context);
+ }
+#endif // DEBUGGING_SUPPORTED
+
+ _ASSERTE(!"Should not get here");
+ return ExceptionContinueSearch;
+} // EXCEPTION_DISPOSITION COMPlusAfterUnwind()
+
+#ifdef DEBUGGING_SUPPORTED
+
+//---------------------------------------------------------------------------------------
+//
+// This function is called to intercept an exception and start an unwind.
+//
+// Arguments:
+// pCurrentEstablisherFrame - the exception registration record covering the stack range
+// containing the interception point
+// pExceptionRecord - EXCEPTION_RECORD of the exception being intercepted
+//
+// Return Value:
+// ExceptionContinueSearch if the exception cannot be intercepted
+//
+// Notes:
+// If the exception is intercepted, this function never returns.
+//
+
+EXCEPTION_DISPOSITION ClrDebuggerDoUnwindAndIntercept(EXCEPTION_REGISTRATION_RECORD *pCurrentEstablisherFrame,
+ EXCEPTION_RECORD *pExceptionRecord)
+{
+ WRAPPER_NO_CONTRACT;
+
+ if (!CheckThreadExceptionStateForInterception())
+ {
+ return ExceptionContinueSearch;
+ }
+
+ Thread* pThread = GetThread();
+ ThreadExceptionState* pExState = pThread->GetExceptionState();
+
+ EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame;
+ ThrowCallbackType tct;
+ tct.Init();
+
+ pExState->GetDebuggerState()->GetDebuggerInterceptInfo(&pEstablisherFrame,
+ &(tct.pFunc),
+ &(tct.dHandler),
+ &(tct.pStack),
+ NULL,
+ &(tct.pBottomFrame)
+ );
+
+ //
+ // If the handler that we've selected as the handler for the target frame of the unwind is in fact above the
+ // handler that we're currently executing in, then use the current handler instead. Why? Our handlers for
+ // nested exceptions actually process managed frames that live above them, up to the COMPlusFrameHanlder that
+ // pushed the nested handler. If the user selectes a frame above the nested handler, then we will have selected
+ // the COMPlusFrameHandler above the current nested handler. But we don't want to ask RtlUnwind to unwind past
+ // the nested handler that we're currently executing in.
+ //
+ if (pEstablisherFrame > pCurrentEstablisherFrame)
+ {
+ // This should only happen if we're in a COMPlusNestedExceptionHandler.
+ _ASSERTE(IsComPlusNestedExceptionRecord(pCurrentEstablisherFrame));
+
+ pEstablisherFrame = pCurrentEstablisherFrame;
+ }
+
+#ifdef _DEBUG
+ tct.pCurrentExceptionRecord = pEstablisherFrame;
+#endif
+
+ LOG((LF_EH|LF_CORDB, LL_INFO100, "ClrDebuggerDoUnwindAndIntercept: Intercepting at %s\n", tct.pFunc->m_pszDebugMethodName));
+ LOG((LF_EH|LF_CORDB, LL_INFO100, "\t\t: pFunc is 0x%X\n", tct.pFunc));
+ LOG((LF_EH|LF_CORDB, LL_INFO100, "\t\t: pStack is 0x%X\n", tct.pStack));
+
+ CallRtlUnwindSafe(pEstablisherFrame, RtlUnwindCallback, pExceptionRecord, 0);
+
+ ExInfo* pExInfo = pThread->GetExceptionState()->GetCurrentExceptionTracker();
+ if (pExInfo->m_ValidInterceptionContext)
+ {
+ // By now we should have all unknown FS:[0] handlers unwinded along with the managed Frames until
+ // the interception point. We can now pop nested exception handlers and resume at interception context.
+ GCX_COOP();
+ EHContext context = pExInfo->m_InterceptionContext;
+ pExInfo->m_InterceptionContext.Init();
+ pExInfo->m_ValidInterceptionContext = FALSE;
+
+ UnwindExceptionTrackerAndResumeInInterceptionFrame(pExInfo, &context);
+ }
+
+ // on x86 at least, RtlUnwind always returns
+
+ // Note: we've completed the unwind pass up to the establisher frame, and we're headed off to finish our
+ // cleanup and end up back in jitted code. Any more FS0 handlers pushed from this point on out will _not_ be
+ // unwound.
+ return COMPlusAfterUnwind(pExState->GetExceptionRecord(), pEstablisherFrame, tct);
+} // EXCEPTION_DISPOSITION ClrDebuggerDoUnwindAndIntercept()
+
+#endif // DEBUGGING_SUPPORTED
+
+// This is a wrapper around the assembly routine that invokes RtlUnwind in the OS.
+// When we invoke RtlUnwind, the OS will modify the ExceptionFlags field in the
+// exception record to reflect unwind. Since we call RtlUnwind in the first pass
+// with a valid exception record when we find an exception handler AND because RtlUnwind
+// returns on x86, the OS would have flagged the exception record for unwind.
+//
+// Incase the exception is rethrown from the catch/filter-handler AND it's a non-COMPLUS
+// exception, the runtime will use the reference to the saved exception record to reraise
+// the exception, as part of rethrow fixup. Since the OS would have modified the exception record
+// to reflect unwind, this wrapper will "reset" the ExceptionFlags field when RtlUnwind returns.
+// Otherwise, the rethrow will result in second pass, as opposed to first, since the ExceptionFlags
+// would indicate an unwind.
+//
+// This rethrow issue does not affect COMPLUS exceptions since we always create a brand new exception
+// record for them in RaiseTheExceptionInternalOnly.
+BOOL CallRtlUnwindSafe(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame,
+ void *callback,
+ EXCEPTION_RECORD *pExceptionRecord,
+ void *retval)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ // Save the ExceptionFlags value before invoking RtlUnwind.
+ DWORD dwExceptionFlags = pExceptionRecord->ExceptionFlags;
+
+ BOOL fRetVal = CallRtlUnwind(pEstablisherFrame, callback, pExceptionRecord, retval);
+
+ // Reset ExceptionFlags field, if applicable
+ if (pExceptionRecord->ExceptionFlags != dwExceptionFlags)
+ {
+ // We would expect the 32bit OS to have set the unwind flag at this point.
+ _ASSERTE(pExceptionRecord->ExceptionFlags & EXCEPTION_UNWINDING);
+ LOG((LF_EH, LL_INFO100, "CallRtlUnwindSafe: Resetting ExceptionFlags from %lu to %lu\n", pExceptionRecord->ExceptionFlags, dwExceptionFlags));
+ pExceptionRecord->ExceptionFlags = dwExceptionFlags;
+ }
+
+ return fRetVal;
+}
+
+//******************************************************************************
+// The essence of the first pass handler (after we've decided to actually do
+// the first pass handling).
+//******************************************************************************
+inline EXCEPTION_DISPOSITION __cdecl
+CPFH_RealFirstPassHandler( // ExceptionContinueSearch, etc.
+ EXCEPTION_RECORD *pExceptionRecord, // The exception record, with exception type.
+ EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame, // Exception frame on whose behalf this is called.
+ CONTEXT *pContext, // Context from the exception.
+ void *pDispatcherContext, // @todo
+ BOOL bAsynchronousThreadStop, // @todo
+ BOOL fPGCDisabledOnEntry) // @todo
+{
+ // We don't want to use a runtime contract here since this codepath is used during
+ // the processing of a hard SO. Contracts use a significant amount of stack
+ // which we can't afford for those cases.
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+ STATIC_CONTRACT_SO_TOLERANT;
+
+#ifdef _DEBUG
+ static int breakOnFirstPass = -1;
+
+ if (breakOnFirstPass == -1)
+ breakOnFirstPass = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_BreakOnFirstPass);
+
+ if (breakOnFirstPass != 0)
+ {
+ _ASSERTE(!"First pass exception handler");
+ }
+#endif
+
+ EXCEPTION_DISPOSITION retval;
+ DWORD exceptionCode = pExceptionRecord->ExceptionCode;
+ Thread *pThread = GetThread();
+
+#ifdef _DEBUG
+ static int breakOnSO = -1;
+
+ if (breakOnSO == -1)
+ breakOnSO = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_BreakOnSO);
+
+ if (breakOnSO != 0 && exceptionCode == STATUS_STACK_OVERFLOW)
+ {
+ DebugBreak(); // ASSERTing will overwrite the guard region
+ }
+#endif
+
+ // We always want to be in co-operative mode when we run this function and whenever we return
+ // from it, want to go to pre-emptive mode because are returning to OS.
+ _ASSERTE(pThread->PreemptiveGCDisabled());
+
+ BOOL bPopNestedHandlerExRecord = FALSE;
+ LFH found = LFH_NOT_FOUND; // Result of calling LookForHandler.
+ BOOL bRethrownException = FALSE;
+ BOOL bNestedException = FALSE;
+
+#if defined(USE_FEF)
+ BOOL bPopFaultingExceptionFrame = FALSE;
+ FrameWithCookie<FaultingExceptionFrame> faultingExceptionFrame;
+#endif // USE_FEF
+ ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo);
+
+ ThrowCallbackType tct;
+ tct.Init();
+
+ tct.pTopFrame = GetCurrFrame(pEstablisherFrame); // highest frame to search to
+
+#ifdef _DEBUG
+ tct.pCurrentExceptionRecord = pEstablisherFrame;
+ tct.pPrevExceptionRecord = GetPrevSEHRecord(pEstablisherFrame);
+#endif // _DEBUG
+
+ BOOL fIsManagedCode = pContext ? ExecutionManager::IsManagedCode(GetIP(pContext)) : FALSE;
+
+
+ // this establishes a marker so can determine if are processing a nested exception
+ // don't want to use the current frame to limit search as it could have been unwound by
+ // the time get to nested handler (ie if find an exception, unwind to the call point and
+ // then resume in the catch and then get another exception) so make the nested handler
+ // have the same boundary as this one. If nested handler can't find a handler, we won't
+ // end up searching this frame list twice because the nested handler will set the search
+ // boundary in the thread and so if get back to this handler it will have a range that starts
+ // and ends at the same place.
+
+ NestedHandlerExRecord nestedHandlerExRecord;
+ nestedHandlerExRecord.Init((PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler, GetCurrFrame(pEstablisherFrame));
+
+ INSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg));
+ bPopNestedHandlerExRecord = TRUE;
+
+#if defined(USE_FEF)
+ // Note: don't attempt to push a FEF for an exception in managed code if we weren't in cooperative mode when
+ // the exception was received. If preemptive GC was enabled when we received the exception, then it means the
+ // exception was rethrown from unmangaed code (including EE impl), and we shouldn't push a FEF.
+ if (fIsManagedCode &&
+ fPGCDisabledOnEntry &&
+ (pThread->m_pFrame == FRAME_TOP ||
+ pThread->m_pFrame->GetVTablePtr() != FaultingExceptionFrame::GetMethodFrameVPtr() ||
+ (size_t)pThread->m_pFrame > (size_t)pEstablisherFrame))
+ {
+ // setup interrupted frame so that GC during calls to init won't collect the frames
+ // only need it for non COM+ exceptions in managed code when haven't already
+ // got one on the stack (will have one already if we have called rtlunwind because
+ // the instantiation that called unwind would have installed one)
+ faultingExceptionFrame.InitAndLink(pContext);
+ bPopFaultingExceptionFrame = TRUE;
+ }
+#endif // USE_FEF
+
+ OBJECTREF e;
+ e = pThread->LastThrownObject();
+
+ STRESS_LOG7(LF_EH, LL_INFO10, "CPFH_RealFirstPassHandler: code:%X, LastThrownObject:%p, MT:%pT"
+ ", IP:%p, SP:%p, pContext:%p, pEstablisherFrame:%p\n",
+ exceptionCode, OBJECTREFToObject(e), (e!=0)?e->GetMethodTable():0,
+ pContext ? GetIP(pContext) : 0, pContext ? GetSP(pContext) : 0,
+ pContext, pEstablisherFrame);
+
+#ifdef LOGGING
+ // If it is a complus exception, and there is a thrown object, get its name, for better logging.
+ if (IsComPlusException(pExceptionRecord))
+ {
+ const char * eClsName = "!EXCEPTION_COMPLUS";
+ if (e != 0)
+ {
+ eClsName = e->GetTrueMethodTable()->GetDebugClassName();
+ }
+ LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: exception: 0x%08X, class: '%s', IP: 0x%p\n",
+ exceptionCode, eClsName, pContext ? GetIP(pContext) : NULL));
+ }
+#endif
+
+ EXCEPTION_POINTERS exceptionPointers = {pExceptionRecord, pContext};
+
+ STRESS_LOG4(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: setting boundaries: Exinfo: 0x%p, BottomMostHandler:0x%p, SearchBoundary:0x%p, TopFrame:0x%p\n",
+ pExInfo, pExInfo->m_pBottomMostHandler, pExInfo->m_pSearchBoundary, tct.pTopFrame);
+
+ // Here we are trying to decide if we are coming in as:
+ // 1) first handler in a brand new exception
+ // 2) a subsequent handler in an exception
+ // 3) a nested exception
+ // m_pBottomMostHandler is the registration structure (establisher frame) for the most recent (ie lowest in
+ // memory) non-nested handler that was installed and pEstablisher frame is what the current handler
+ // was registered with.
+ // The OS calls each registered handler in the chain, passing its establisher frame to it.
+ if (pExInfo->m_pBottomMostHandler != NULL && pEstablisherFrame > pExInfo->m_pBottomMostHandler)
+ {
+ STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: detected subsequent handler. ExInfo:0x%p, BottomMost:0x%p SearchBoundary:0x%p\n",
+ pExInfo, pExInfo->m_pBottomMostHandler, pExInfo->m_pSearchBoundary);
+
+ // If the establisher frame of this handler is greater than the bottommost then it must have been
+ // installed earlier and therefore we are case 2
+ if (pThread->GetThrowable() == NULL)
+ {
+ // Bottommost didn't setup a throwable, so not exception not for us
+ retval = ExceptionContinueSearch;
+ goto exit;
+ }
+
+ // setup search start point
+ tct.pBottomFrame = pExInfo->m_pSearchBoundary;
+
+ if (tct.pTopFrame == tct.pBottomFrame)
+ {
+ // this will happen if our nested handler already searched for us so we don't want
+ // to search again
+ retval = ExceptionContinueSearch;
+ goto exit;
+ }
+ }
+ else
+ { // we are either case 1 or case 3
+#if defined(_DEBUG_IMPL)
+ //@todo: merge frames, context, handlers
+ if (pThread->GetFrame() != FRAME_TOP)
+ pThread->GetFrame()->LogFrameChain(LF_EH, LL_INFO1000);
+#endif // _DEBUG_IMPL
+
+ // If the exception was rethrown, we'll create a new ExInfo, which will represent the rethrown exception.
+ // The original exception is not the rethrown one.
+ if (pExInfo->m_ExceptionFlags.IsRethrown() && pThread->LastThrownObject() != NULL)
+ {
+ pExInfo->m_ExceptionFlags.ResetIsRethrown();
+ bRethrownException = TRUE;
+
+#if defined(USE_FEF)
+ if (bPopFaultingExceptionFrame)
+ {
+ // if we added a FEF, it will refer to the frame at the point of the original exception which is
+ // already unwound so don't want it.
+ // If we rethrew the exception we have already added a helper frame for the rethrow, so don't
+ // need this one. If we didn't rethrow it, (ie rethrow from native) then there the topmost frame will
+ // be a transition to native frame in which case we don't need it either
+ faultingExceptionFrame.Pop();
+ bPopFaultingExceptionFrame = FALSE;
+ }
+#endif
+ }
+
+ // If the establisher frame is less than the bottommost handler, then this is nested because the
+ // establisher frame was installed after the bottommost.
+ if (pEstablisherFrame < pExInfo->m_pBottomMostHandler
+ /* || IsComPlusNestedExceptionRecord(pEstablisherFrame) */ )
+ {
+ bNestedException = TRUE;
+
+ // case 3: this is a nested exception. Need to save and restore the thread info
+ STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: ExInfo:0x%p detected nested exception 0x%p < 0x%p\n",
+ pExInfo, pEstablisherFrame, pExInfo->m_pBottomMostHandler);
+
+ EXCEPTION_REGISTRATION_RECORD* pNestedER = TryFindNestedEstablisherFrame(pEstablisherFrame);
+ ExInfo *pNestedExInfo;
+
+ if (!pNestedER || pNestedER >= pExInfo->m_pBottomMostHandler )
+ {
+ // RARE CASE. We've re-entered the EE from an unmanaged filter.
+ //
+ // OR
+ //
+ // We can be here if we dont find a nested exception handler. This is exemplified using
+ // call chain of scenario 2 explained further below.
+ //
+ // Assuming __try of NativeB throws an exception E1 and it gets caught in ManagedA2, then
+ // bottom-most handler (BMH) is going to be CPFH_A. The catch will trigger an unwind
+ // and invoke __finally in NativeB. Let the __finally throw a new exception E2.
+ //
+ // Assuming ManagedB2 has a catch block to catch E2, when we enter CPFH_B looking for a
+ // handler for E2, our establisher frame will be that of CPFH_B, which will be lower
+ // in stack than current BMH (which is CPFH_A). Thus, we will come here, determining
+ // E2 to be nested exception correctly but not find a nested exception handler.
+ void *limit = (void *) GetPrevSEHRecord(pExInfo->m_pBottomMostHandler);
+
+ pNestedExInfo = new (nothrow) ExInfo(); // Very rare failure here; need robust allocator.
+ if (pNestedExInfo == NULL)
+ { // if we can't allocate memory, we can't correctly continue.
+ #if defined(_DEBUG)
+ if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_NestedEhOom))
+ _ASSERTE(!"OOM in callback from unmanaged filter.");
+ #endif // _DEBUG
+
+ EEPOLICY_HANDLE_FATAL_ERROR(COR_E_OUTOFMEMORY);
+ }
+
+
+ pNestedExInfo->m_StackAddress = limit; // Note: this is also the flag that tells us this
+ // ExInfo was stack allocated.
+ }
+ else
+ {
+ pNestedExInfo = &((NestedHandlerExRecord*)pNestedER)->m_handlerInfo;
+ }
+
+ LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: PushExInfo() current: 0x%p previous: 0x%p\n",
+ pExInfo->m_StackAddress, pNestedExInfo->m_StackAddress));
+
+ _ASSERTE(pNestedExInfo);
+ pNestedExInfo->m_hThrowable = NULL; // pNestedExInfo may be stack allocated, and as such full of
+ // garbage. m_hThrowable must be sane, so set it to NULL. (We could
+ // zero the entire record, but this is cheaper.)
+
+ pNestedExInfo->CopyAndClearSource(pExInfo);
+
+ pExInfo->m_pPrevNestedInfo = pNestedExInfo; // Save at head of nested info chain
+
+#if 0
+/* the following code was introduced in Whidbey as part of the Faulting Exception Frame removal (12/03).
+ However it isn't correct. If any nested exceptions occur while processing a rethrow, we would
+ incorrectly consider the nested exception to be a rethrow. See VSWhidbey 349379 for an example.
+
+ Therefore I am disabling this code until we see a failure that explains why it was added in the first
+ place. cwb 9/04.
+*/
+ // If we're here as a result of a rethrown exception, set the rethrown flag on the new ExInfo.
+ if (bRethrownException)
+ {
+ pExInfo->m_ExceptionFlags.SetIsRethrown();
+ }
+#endif
+ }
+ else
+ {
+ // At this point, either:
+ //
+ // 1) the bottom-most handler is NULL, implying this is a new exception for which we are getting ready, OR
+ // 2) the bottom-most handler is not-NULL, implying that a there is already an existing exception in progress.
+ //
+ // Scenario 1 is that of a new throw and is easy to understand. Scenario 2 is the interesting one.
+ //
+ // ManagedA1 -> ManagedA2 -> ManagedA3 -> NativeCodeA -> ManagedB1 -> ManagedB2 -> ManagedB3 -> NativeCodeB
+ //
+ // On x86, each block of managed code is protected by one COMPlusFrameHandler [CPFH] (CLR's exception handler
+ // for managed code), unlike 64bit where each frame has a personality routine attached to it. Thus,
+ // for the example above, assume CPFH_A protects ManagedA* blocks and is setup just before the call to
+ // ManagedA1. Likewise, CPFH_B protects ManagedB* blocks and is setup just before the call to ManagedB1.
+ //
+ // When ManagedB3 throws an exception, CPFH_B is invoked to look for a handler in all of the ManagedB* blocks.
+ // At this point, it is setup as the "bottom-most-handler" (BMH). If no handler is found and exception reaches
+ // ManagedA* blocks, CPFH_A is invoked to look for a handler and thus, becomes BMH.
+ //
+ // Thus, in the first pass on x86 for a given exception, a particular CPFH will be invoked only once when looking
+ // for a handler and thus, registered as BMH only once. Either the exception goes unhandled and the process will
+ // terminate or a handler will be found and second pass will commence.
+ //
+ // However, assume NativeCodeB had a __try/__finally and raised an exception [E1] within the __try. Let's assume
+ // it gets caught in ManagedB1 and thus, unwind is triggered. At this point, the active exception tracker
+ // has context about the exception thrown out of __try and CPFH_B is registered as BMH.
+ //
+ // If the __finally throws a new exception [E2], CPFH_B will be invoked again for first pass while looking for
+ // a handler for the thrown exception. Since BMH is already non-NULL, we will come here since EstablisherFrame will be
+ // the same as BMH (because EstablisherFrame will be that of CPFH_B). We will proceed to overwrite the "required" parts
+ // of the existing exception tracker with the details of E2 (see setting of exception record and context below), erasing
+ // any artifact of E1.
+ //
+ // This is unlike Scenario 1 when exception tracker is completely initialized to default values. This is also
+ // unlike 64bit which will detect that E1 and E2 are different exceptions and hence, will setup a new tracker
+ // to track E2, effectively behaving like Scenario 1 above. X86 cannot do this since there is no nested exception
+ // tracker setup that gets to see the new exception.
+ //
+ // Thus, if E1 was a CSE and E2 isn't, we will come here and treat E2 as a CSE as well since corruption severity
+ // is initialized as part of exception tracker initialization. Thus, E2 will start to be treated as CSE, which is
+ // incorrect. Similar argument applies to delivery of First chance exception notification delivery.
+ //
+ // <QUIP> Another example why we should unify EH systems :) </QUIP>
+ //
+ // To address this issue, we will need to reset exception tracker here, just like the overwriting of "required"
+ // parts of exception tracker.
+
+ // If the current establisher frame is the same as the bottom-most-handler and we are here
+ // in the first pass, assert that current exception and the one tracked by active exception tracker
+ // are indeed different exceptions. In such a case, we must reset the exception tracker so that it can be
+ // setup correctly further down when CEHelper::SetupCorruptionSeverityForActiveException is invoked.
+
+ if ((pExInfo->m_pBottomMostHandler != NULL) &&
+ (pEstablisherFrame == pExInfo->m_pBottomMostHandler))
+ {
+ // Current exception should be different from the one exception tracker is already tracking.
+ _ASSERTE(pExceptionRecord != pExInfo->m_pExceptionRecord);
+
+ // This cannot be nested exceptions - they are handled earlier (see above).
+ _ASSERTE(!bNestedException);
+
+ LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: Bottom-most handler (0x%p) is the same as EstablisherFrame.\n",
+ pExInfo->m_pBottomMostHandler));
+ LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: Exception record in exception tracker is 0x%p, while that of new exception is 0x%p.\n",
+ pExInfo->m_pExceptionRecord, pExceptionRecord));
+ LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: Resetting exception tracker (0x%p).\n", pExInfo));
+
+ // This will reset the exception tracker state, including the corruption severity.
+ pExInfo->Init();
+ }
+ }
+
+ // If we are handling a fault from managed code, we need to set the Thread->ExInfo->pContext to
+ // the current fault context, which is used in the stack walk to get back into the managed
+ // stack with the correct registers. (Previously, this was done by linking in a FaultingExceptionFrame
+ // record.)
+ // We are about to create the managed exception object, which may trigger a GC, so set this up now.
+
+ pExInfo->m_pExceptionRecord = pExceptionRecord;
+ pExInfo->m_pContext = pContext;
+ if (pContext && ShouldHandleManagedFault(pExceptionRecord, pContext, pEstablisherFrame, pThread))
+ { // If this was a fault in managed code, rather than create a Frame for stackwalking,
+ // we can use this exinfo (after all, it has all the register info.)
+ pExInfo->m_ExceptionFlags.SetUseExInfoForStackwalk();
+ }
+
+ // It should now be safe for a GC to happen.
+
+ // case 1 & 3: this is the first time through of a new, nested, or rethrown exception, so see if we can
+ // find a handler. Only setup throwable if are bottommost handler
+ if (IsComPlusException(pExceptionRecord) && (!bAsynchronousThreadStop))
+ {
+
+ // Update the throwable from the last thrown object. Note: this may cause OOM, in which case we replace
+ // both throwables with the preallocated OOM exception.
+ pThread->SafeSetThrowables(pThread->LastThrownObject());
+
+ // now we've got a COM+ exception, fall through to so see if we handle it
+
+ STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: fall through ExInfo:0x%p setting m_pBottomMostHandler to 0x%p from 0x%p\n",
+ pExInfo, pEstablisherFrame, pExInfo->m_pBottomMostHandler);
+ pExInfo->m_pBottomMostHandler = pEstablisherFrame;
+ }
+ else if (bRethrownException)
+ {
+ // If it was rethrown and not COM+, will still be the last one thrown. Either we threw it last and
+ // stashed it here or someone else caught it and rethrew it, in which case it will still have been
+ // originally stashed here.
+
+ // Update the throwable from the last thrown object. Note: this may cause OOM, in which case we replace
+ // both throwables with the preallocated OOM exception.
+ pThread->SafeSetThrowables(pThread->LastThrownObject());
+ STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: rethrow non-COM+ ExInfo:0x%p setting m_pBottomMostHandler to 0x%p from 0x%p\n",
+ pExInfo, pEstablisherFrame, pExInfo->m_pBottomMostHandler);
+ pExInfo->m_pBottomMostHandler = pEstablisherFrame;
+ }
+ else
+ {
+ if (!fIsManagedCode)
+ {
+ tct.bDontCatch = false;
+ }
+
+ if (exceptionCode == STATUS_BREAKPOINT)
+ {
+ // don't catch int 3
+ retval = ExceptionContinueSearch;
+ goto exit;
+ }
+
+ // We need to set m_pBottomMostHandler here, Thread::IsExceptionInProgress returns 1.
+ // This is a necessary part of suppressing thread abort exceptions in the constructor
+ // of any exception object we might create.
+ STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: setting ExInfo:0x%p m_pBottomMostHandler for IsExceptionInProgress to 0x%p from 0x%p\n",
+ pExInfo, pEstablisherFrame, pExInfo->m_pBottomMostHandler);
+ pExInfo->m_pBottomMostHandler = pEstablisherFrame;
+
+ // Create the managed exception object.
+ OBJECTREF throwable = CreateCOMPlusExceptionObject(pThread, pExceptionRecord, bAsynchronousThreadStop);
+
+ // Set the throwables on the thread to the newly created object. If this fails, it will return a
+ // preallocated exception object instead. This also updates the last thrown exception, for rethrows.
+ throwable = pThread->SafeSetThrowables(throwable);
+
+ // Set the exception code and pointers. We set these after setting the throwables on the thread,
+ // because if the proper exception is replaced by an OOM exception, we still want the exception code
+ // and pointers set in the OOM exception.
+ EXCEPTIONREF exceptionRef = (EXCEPTIONREF)throwable;
+ exceptionRef->SetXCode(pExceptionRecord->ExceptionCode);
+ exceptionRef->SetXPtrs(&exceptionPointers);
+ }
+
+ tct.pBottomFrame = NULL;
+
+ EEToProfilerExceptionInterfaceWrapper::ExceptionThrown(pThread);
+
+ CPFH_UpdatePerformanceCounters();
+ } // End of case-1-or-3
+
+ {
+ // Allocate storage for the stack trace.
+ OBJECTREF throwable = NULL;
+ GCPROTECT_BEGIN(throwable);
+ throwable = pThread->GetThrowable();
+
+#ifdef FEATURE_CORRUPTING_EXCEPTIONS
+ {
+ BEGIN_SO_INTOLERANT_CODE(GetThread());
+ // Setup the state in current exception tracker indicating the corruption severity
+ // of the active exception.
+ CEHelper::SetupCorruptionSeverityForActiveException(bRethrownException, bNestedException,
+ CEHelper::ShouldTreatActiveExceptionAsNonCorrupting());
+ END_SO_INTOLERANT_CODE;
+ }
+#endif // FEATURE_CORRUPTING_EXCEPTIONS
+
+#ifdef FEATURE_CORECLR
+ // Check if we are dealing with AV or not and if we are,
+ // ensure that this is a real AV and not managed AV exception
+ BOOL fIsThrownExceptionAV = FALSE;
+ if ((pExceptionRecord->ExceptionCode == STATUS_ACCESS_VIOLATION) &&
+ (MscorlibBinder::GetException(kAccessViolationException) == throwable->GetMethodTable()))
+ {
+ // Its an AV - set the flag
+ fIsThrownExceptionAV = TRUE;
+ }
+
+ // Did we get an AV?
+ if (fIsThrownExceptionAV == TRUE)
+ {
+ // Get the escalation policy action for handling AV
+ EPolicyAction actionAV = GetEEPolicy()->GetActionOnFailure(FAIL_AccessViolation);
+
+ // Valid actions are: eNoAction (default behviour) or eRudeExitProcess
+ _ASSERTE(((actionAV == eNoAction) || (actionAV == eRudeExitProcess)));
+ if (actionAV == eRudeExitProcess)
+ {
+ LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: AccessViolation handler found and doing RudeExitProcess due to escalation policy (eRudeExitProcess)\n"));
+
+ // EEPolicy::HandleFatalError will help us RudeExit the process.
+ // RudeExitProcess due to AV is to prevent a security risk - we are ripping
+ // at the boundary, without looking for the handlers.
+ EEPOLICY_HANDLE_FATAL_ERROR(COR_E_SECURITY);
+ }
+ }
+#endif // FEATURE_CORECLR
+
+ // If we're out of memory, then we figure there's probably not memory to maintain a stack trace, so we skip it.
+ // If we've got a stack overflow, then we figure the stack will be so huge as to make tracking the stack trace
+ // impracticle, so we skip it.
+ if ((throwable == CLRException::GetPreallocatedOutOfMemoryException()) ||
+ (throwable == CLRException::GetPreallocatedStackOverflowException()))
+ {
+ tct.bAllowAllocMem = FALSE;
+ }
+ else
+ {
+ pExInfo->m_StackTraceInfo.AllocateStackTrace();
+ }
+
+ GCPROTECT_END();
+ }
+
+ // Set up information for GetExceptionPointers()/GetExceptionCode() callback.
+ pExInfo->SetExceptionCode(pExceptionRecord);
+
+ pExInfo->m_pExceptionPointers = &exceptionPointers;
+
+ if (bRethrownException || bNestedException)
+ {
+ _ASSERTE(pExInfo->m_pPrevNestedInfo != NULL);
+
+ BEGIN_SO_INTOLERANT_CODE(GetThread());
+ SetStateForWatsonBucketing(bRethrownException, pExInfo->GetPreviousExceptionTracker()->GetThrowableAsHandle());
+ END_SO_INTOLERANT_CODE;
+ }
+
+#ifdef DEBUGGING_SUPPORTED
+ //
+ // At this point the exception is still fresh to us, so assert that
+ // there should be nothing from the debugger on it.
+ //
+ _ASSERTE(!pExInfo->m_ExceptionFlags.DebuggerInterceptInfo());
+#endif
+
+ if (pThread->IsRudeAbort())
+ {
+ OBJECTREF rudeAbortThrowable = CLRException::GetPreallocatedRudeThreadAbortException();
+
+ if (pThread->GetThrowable() != rudeAbortThrowable)
+ {
+ // Neither of these sets will throw because the throwable that we're setting is a preallocated
+ // exception. This also updates the last thrown exception, for rethrows.
+ pThread->SafeSetThrowables(rudeAbortThrowable);
+ }
+
+ if (!pThread->IsRudeAbortInitiated())
+ {
+ pThread->PreWorkForThreadAbort();
+ }
+ }
+
+ LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: looking for handler bottom %x, top %x\n",
+ tct.pBottomFrame, tct.pTopFrame));
+ tct.bReplaceStack = pExInfo->m_pBottomMostHandler == pEstablisherFrame && !bRethrownException;
+ tct.bSkipLastElement = bRethrownException && bNestedException;
+ found = LookForHandler(&exceptionPointers,
+ pThread,
+ &tct);
+
+ // We have searched this far.
+ pExInfo->m_pSearchBoundary = tct.pTopFrame;
+ LOG((LF_EH, LL_INFO1000, "CPFH_RealFirstPassHandler: set pSearchBoundary to 0x%p\n", pExInfo->m_pSearchBoundary));
+
+ if ((found == LFH_NOT_FOUND)
+#ifdef DEBUGGING_SUPPORTED
+ && !pExInfo->m_ExceptionFlags.DebuggerInterceptInfo()
+#endif
+ )
+ {
+ LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: NOT_FOUND\n"));
+
+ if (tct.pTopFrame == FRAME_TOP)
+ {
+ LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: NOT_FOUND at FRAME_TOP\n"));
+ }
+
+ retval = ExceptionContinueSearch;
+ goto exit;
+ }
+ else
+ {
+ // so we are going to handle the exception
+
+ // Remove the nested exception record -- before calling RtlUnwind.
+ // The second-pass callback for a NestedExceptionRecord assumes that if it's
+ // being unwound, it should pop one exception from the pExInfo chain. This is
+ // true for any older NestedRecords that might be unwound -- but not for the
+ // new one we're about to add. To avoid this, we remove the new record
+ // before calling Unwind.
+ //
+ // <TODO>@NICE: This can probably be a little cleaner -- the nested record currently
+ // is also used to guard the running of the filter code. When we clean up the
+ // behaviour of exceptions within filters, we should be able to get rid of this
+ // PUSH/POP/PUSH behaviour.</TODO>
+ _ASSERTE(bPopNestedHandlerExRecord);
+
+ UNINSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg));
+
+ // Since we are going to handle the exception we switch into preemptive mode
+ GCX_PREEMP_NO_DTOR();
+
+#ifdef DEBUGGING_SUPPORTED
+ //
+ // Check if the debugger wants to intercept this frame at a different point than where we are.
+ //
+ if (pExInfo->m_ExceptionFlags.DebuggerInterceptInfo())
+ {
+ ClrDebuggerDoUnwindAndIntercept(pEstablisherFrame, pExceptionRecord);
+
+ //
+ // If this returns, then the debugger couldn't do it's stuff and we default to the found handler.
+ //
+ if (found == LFH_NOT_FOUND)
+ {
+ retval = ExceptionContinueSearch;
+ // we need to be sure to switch back into Cooperative mode since we are going to
+ // jump to the exit: label and follow the normal return path (it is expected that
+ // CPFH_RealFirstPassHandler returns in COOP.
+ GCX_PREEMP_NO_DTOR_END();
+ goto exit;
+ }
+ }
+#endif
+
+ LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: handler found: %s\n", tct.pFunc->m_pszDebugMethodName));
+
+ CallRtlUnwindSafe(pEstablisherFrame, RtlUnwindCallback, pExceptionRecord, 0);
+ // on x86 at least, RtlUnwind always returns
+
+ // Note: we've completed the unwind pass up to the establisher frame, and we're headed off to finish our
+ // cleanup and end up back in jitted code. Any more FS0 handlers pushed from this point on out will _not_ be
+ // unwound.
+ // Note: we are still in Preemptive mode here and that is correct, COMPlusAfterUnwind will switch us back
+ // into Cooperative mode.
+ return COMPlusAfterUnwind(pExceptionRecord, pEstablisherFrame, tct);
+ }
+
+exit:
+ {
+ // We need to be in COOP if we get here
+ GCX_ASSERT_COOP();
+ }
+
+ // If we got as far as saving pExInfo, save the context pointer so it's available for the unwind.
+ if (pExInfo)
+ {
+ pExInfo->m_pContext = pContext;
+ // pExInfo->m_pExceptionPointers points to a local structure, which is now going out of scope.
+ pExInfo->m_pExceptionPointers = NULL;
+ }
+
+#if defined(USE_FEF)
+ if (bPopFaultingExceptionFrame)
+ {
+ faultingExceptionFrame.Pop();
+ }
+#endif // USE_FEF
+
+ if (bPopNestedHandlerExRecord)
+ {
+ UNINSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg));
+ }
+ return retval;
+} // CPFH_RealFirstPassHandler()
+
+
+//******************************************************************************
+//
+void InitializeExceptionHandling()
+{
+ WRAPPER_NO_CONTRACT;
+
+ InitSavedExceptionInfo();
+
+ CLRAddVectoredHandlers();
+
+ // Initialize the lock used for synchronizing access to the stacktrace in the exception object
+ g_StackTraceArrayLock.Init(LOCK_TYPE_DEFAULT, TRUE);
+}
+
+//******************************************************************************
+static inline EXCEPTION_DISPOSITION __cdecl
+CPFH_FirstPassHandler(EXCEPTION_RECORD *pExceptionRecord,
+ EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame,
+ CONTEXT *pContext,
+ DISPATCHER_CONTEXT *pDispatcherContext)
+{
+ WRAPPER_NO_CONTRACT;
+ EXCEPTION_DISPOSITION retval;
+
+ _ASSERTE (!(pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND)));
+
+ DWORD exceptionCode = pExceptionRecord->ExceptionCode;
+
+ Thread *pThread = GetThread();
+
+ STRESS_LOG4(LF_EH, LL_INFO100,
+ "CPFH_FirstPassHandler: pEstablisherFrame = %x EH code = %x EIP = %x with ESP = %x\n",
+ pEstablisherFrame, exceptionCode, pContext ? GetIP(pContext) : 0, pContext ? GetSP(pContext) : 0);
+
+ EXCEPTION_POINTERS ptrs = { pExceptionRecord, pContext };
+
+ // Call to the vectored handler to give other parts of the Runtime a chance to jump in and take over an
+ // exception before we do too much with it. The most important point in the vectored handler is not to toggle
+ // the GC mode.
+ DWORD filter = CLRVectoredExceptionHandler(&ptrs);
+
+ if (filter == (DWORD) EXCEPTION_CONTINUE_EXECUTION)
+ {
+ return ExceptionContinueExecution;
+ }
+ else if (filter == EXCEPTION_CONTINUE_SEARCH)
+ {
+ return ExceptionContinueSearch;
+ }
+
+#if defined(STRESS_HEAP)
+ //
+ // Check to see if this exception is due to GCStress. Since the GCStress mechanism only injects these faults
+ // into managed code, we only need to check for them in CPFH_FirstPassHandler.
+ //
+ if (IsGcMarker(exceptionCode, pContext))
+ {
+ return ExceptionContinueExecution;
+ }
+#endif // STRESS_HEAP
+
+ // We always want to be in co-operative mode when we run this function and whenever we return
+ // from it, want to go to pre-emptive mode because are returning to OS.
+ BOOL disabled = pThread->PreemptiveGCDisabled();
+ GCX_COOP_NO_DTOR();
+
+ BOOL bAsynchronousThreadStop = IsThreadHijackedForThreadStop(pThread, pExceptionRecord);
+
+ if (bAsynchronousThreadStop)
+ {
+ // If we ever get here in preemptive mode, we're in trouble. We've
+ // changed the thread's IP to point at a little function that throws ... if
+ // the thread were to be in preemptive mode and a GC occurred, the stack
+ // crawl would have been all messed up (becuase we have no frame that points
+ // us back to the right place in managed code).
+ _ASSERTE(disabled);
+
+ AdjustContextForThreadStop(pThread, pContext);
+ LOG((LF_EH, LL_INFO100, "CPFH_FirstPassHandler is Asynchronous Thread Stop or Abort\n"));
+ }
+
+ pThread->ResetThrowControlForThread();
+
+ CPFH_VerifyThreadIsInValidState(pThread, exceptionCode, pEstablisherFrame);
+
+ // If we were in cooperative mode when we came in here, then its okay to see if we should do HandleManagedFault
+ // and push a FaultingExceptionFrame. If we weren't in coop mode coming in here, then it means that there's no
+ // way the exception could really be from managed code. I might look like it was from managed code, but in
+ // reality its a rethrow from unmanaged code, either unmanaged user code, or unmanaged EE implementation.
+ if (disabled && ShouldHandleManagedFault(pExceptionRecord, pContext, pEstablisherFrame, pThread))
+ {
+#if defined(USE_FEF)
+ HandleManagedFault(pExceptionRecord, pContext, pEstablisherFrame, pThread);
+ retval = ExceptionContinueExecution;
+ goto exit;
+#else // USE_FEF
+ // Save the context pointer in the Thread's EXInfo, so that a stack crawl can recover the
+ // register values from the fault.
+
+ //@todo: I haven't yet found any case where we need to do anything here. If there are none, eliminate
+ // this entire if () {} block.
+#endif // USE_FEF
+ }
+
+ // OK. We're finally ready to start the real work. Nobody else grabbed the exception in front of us. Now we can
+ // get started.
+ retval = CPFH_RealFirstPassHandler(pExceptionRecord,
+ pEstablisherFrame,
+ pContext,
+ pDispatcherContext,
+ bAsynchronousThreadStop,
+ disabled);
+
+#if defined(USE_FEF) // This label is only used in the HandleManagedFault() case above.
+exit:
+#endif
+ if (retval != ExceptionContinueExecution || !disabled)
+ {
+ GCX_PREEMP_NO_DTOR();
+ }
+
+ STRESS_LOG1(LF_EH, LL_INFO100, "CPFH_FirstPassHandler: exiting with retval %d\n", retval);
+ return retval;
+} // CPFH_FirstPassHandler()
+
+//******************************************************************************
+inline void
+CPFH_UnwindFrames1(Thread* pThread, EXCEPTION_REGISTRATION_RECORD* pEstablisherFrame, DWORD exceptionCode)
+{
+ WRAPPER_NO_CONTRACT;
+
+ ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo);
+
+ // Ready to unwind the stack...
+ ThrowCallbackType tct;
+ tct.Init();
+ tct.bIsUnwind = TRUE;
+ tct.pTopFrame = GetCurrFrame(pEstablisherFrame); // highest frame to search to
+ tct.pBottomFrame = NULL;
+
+ // Set the flag indicating if the current exception represents a longjmp.
+ // See comment in COMPlusUnwindCallback for details.
+ CORRUPTING_EXCEPTIONS_ONLY(tct.m_fIsLongJump = (exceptionCode == STATUS_LONGJUMP);)
+
+ #ifdef _DEBUG
+ tct.pCurrentExceptionRecord = pEstablisherFrame;
+ tct.pPrevExceptionRecord = GetPrevSEHRecord(pEstablisherFrame);
+ #endif
+
+ #ifdef DEBUGGING_SUPPORTED
+ EXCEPTION_REGISTRATION_RECORD *pInterceptEstablisherFrame = NULL;
+
+ // If the exception is intercepted, use information stored in the DebuggerExState to unwind the stack.
+ if (pExInfo->m_ExceptionFlags.DebuggerInterceptInfo())
+ {
+ pExInfo->m_DebuggerExState.GetDebuggerInterceptInfo(&pInterceptEstablisherFrame,
+ NULL, // MethodDesc **ppFunc,
+ NULL, // int *pdHandler,
+ NULL, // BYTE **ppStack
+ NULL, // ULONG_PTR *pNativeOffset,
+ NULL // Frame **ppFrame)
+ );
+ LOG((LF_EH, LL_INFO1000, "CPFH_UnwindFrames1: frames are Est 0x%X, Intercept 0x%X\n",
+ pEstablisherFrame, pInterceptEstablisherFrame));
+
+ //
+ // When we set up for the interception we store off the CPFH or CPNEH that we
+ // *know* will handle unwinding the destination of the intercept.
+ //
+ // However, a CPNEH with the same limiting Capital-F-rame could do the work
+ // and unwind us, so...
+ //
+ // If this is the exact frame handler we are supposed to search for, or
+ // if this frame handler services the same Capital-F-rame as the frame handler
+ // we are looking for (i.e. this frame handler may do the work that we would
+ // expect our frame handler to do),
+ // then
+ // we need to pass the interception destination during this unwind.
+ //
+ _ASSERTE(IsUnmanagedToManagedSEHHandler(pEstablisherFrame));
+
+ if ((pEstablisherFrame == pInterceptEstablisherFrame) ||
+ (GetCurrFrame(pEstablisherFrame) == GetCurrFrame(pInterceptEstablisherFrame)))
+ {
+ pExInfo->m_DebuggerExState.GetDebuggerInterceptInfo(NULL,
+ &(tct.pFunc),
+ &(tct.dHandler),
+ &(tct.pStack),
+ NULL,
+ &(tct.pBottomFrame)
+ );
+
+ LOG((LF_EH, LL_INFO1000, "CPFH_UnwindFrames1: going to: pFunc:%#X, pStack:%#X\n",
+ tct.pFunc, tct.pStack));
+
+ }
+
+ }
+ #endif
+
+ UnwindFrames(pThread, &tct);
+
+ LOG((LF_EH, LL_INFO1000, "CPFH_UnwindFrames1: after unwind ec:%#x, tct.pTopFrame:0x%p, pSearchBndry:0x%p\n"
+ " pEstFrame:0x%p, IsC+NestExRec:%d, !Nest||Active:%d\n",
+ exceptionCode, tct.pTopFrame, pExInfo->m_pSearchBoundary, pEstablisherFrame,
+ IsComPlusNestedExceptionRecord(pEstablisherFrame),
+ (!IsComPlusNestedExceptionRecord(pEstablisherFrame) || reinterpret_cast<NestedHandlerExRecord*>(pEstablisherFrame)->m_ActiveForUnwind)));
+
+ if (tct.pTopFrame >= pExInfo->m_pSearchBoundary &&
+ (!IsComPlusNestedExceptionRecord(pEstablisherFrame) ||
+ reinterpret_cast<NestedHandlerExRecord*>(pEstablisherFrame)->m_ActiveForUnwind) )
+ {
+ // If this is the search boundary, and we're not a nested handler, then
+ // this is the last time we'll see this exception. Time to unwind our
+ // exinfo.
+ STRESS_LOG0(LF_EH, LL_INFO100, "CPFH_UnwindFrames1: Exception unwind -- unmanaged catcher detected\n");
+ pExInfo->UnwindExInfo((VOID*)pEstablisherFrame);
+ }
+} // CPFH_UnwindFrames1()
+
+//******************************************************************************
+inline EXCEPTION_DISPOSITION __cdecl
+CPFH_UnwindHandler(EXCEPTION_RECORD *pExceptionRecord,
+ EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame,
+ CONTEXT *pContext,
+ void *pDispatcherContext)
+{
+ WRAPPER_NO_CONTRACT;
+ _ASSERTE (pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND));
+
+ #ifdef _DEBUG
+ // Note: you might be inclined to write "static int breakOnSecondPass = CLRConfig::GetConfigValue(...);", but
+ // you can't do that here. That causes C++ EH to be generated under the covers for this function, and this
+ // function isn't allowed to have any C++ EH in it because its never going to return.
+ static int breakOnSecondPass; // = 0
+ static BOOL breakOnSecondPassSetup; // = FALSE
+ if (!breakOnSecondPassSetup)
+ {
+ breakOnSecondPass = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_BreakOnSecondPass);
+ breakOnSecondPassSetup = TRUE;
+ }
+ if (breakOnSecondPass != 0)
+ {
+ _ASSERTE(!"Unwind handler");
+ }
+ #endif
+
+ DWORD exceptionCode = pExceptionRecord->ExceptionCode;
+ Thread *pThread = GetThread();
+
+ ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo);
+
+ STRESS_LOG4(LF_EH, LL_INFO100, "In CPFH_UnwindHandler EHCode = %x EIP = %x with ESP = %x, pEstablisherFrame = 0x%p\n", exceptionCode,
+ pContext ? GetIP(pContext) : 0, pContext ? GetSP(pContext) : 0, pEstablisherFrame);
+
+ // We always want to be in co-operative mode when we run this function. Whenever we return
+ // from it, want to go to pre-emptive mode because are returning to OS.
+
+ {
+ // needs to be in its own scope to avoid polluting the namespace, since
+ // we don't do a _END then we don't revert the state
+ GCX_COOP_NO_DTOR();
+ }
+
+ CPFH_VerifyThreadIsInValidState(pThread, exceptionCode, pEstablisherFrame);
+
+ if (IsComPlusNestedExceptionRecord(pEstablisherFrame))
+ {
+ NestedHandlerExRecord *pHandler = reinterpret_cast<NestedHandlerExRecord*>(pEstablisherFrame);
+ if (pHandler->m_pCurrentExInfo != NULL)
+ {
+ // See the comment at the end of COMPlusNestedExceptionHandler about nested exception.
+ // OS is going to skip the EstablisherFrame before our NestedHandler.
+ if (pHandler->m_pCurrentExInfo->m_pBottomMostHandler <= pHandler->m_pCurrentHandler)
+ {
+ // We're unwinding -- the bottom most handler is potentially off top-of-stack now. If
+ // it is, change it to the next COM+ frame. (This one is not good, as it's about to
+ // disappear.)
+ EXCEPTION_REGISTRATION_RECORD *pNextBottomMost = GetNextCOMPlusSEHRecord(pHandler->m_pCurrentHandler);
+
+ STRESS_LOG3(LF_EH, LL_INFO10000, "COMPlusNestedExceptionHandler: setting ExInfo:0x%p m_pBottomMostHandler from 0x%p to 0x%p\n",
+ pHandler->m_pCurrentExInfo, pHandler->m_pCurrentExInfo->m_pBottomMostHandler, pNextBottomMost);
+
+ pHandler->m_pCurrentExInfo->m_pBottomMostHandler = pNextBottomMost;
+ }
+ }
+ }
+
+ // this establishes a marker so can determine if are processing a nested exception
+ // don't want to use the current frame to limit search as it could have been unwound by
+ // the time get to nested handler (ie if find an exception, unwind to the call point and
+ // then resume in the catch and then get another exception) so make the nested handler
+ // have the same boundary as this one. If nested handler can't find a handler, we won't
+ // end up searching this frame list twice because the nested handler will set the search
+ // boundary in the thread and so if get back to this handler it will have a range that starts
+ // and ends at the same place.
+ NestedHandlerExRecord nestedHandlerExRecord;
+ nestedHandlerExRecord.Init((PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler, GetCurrFrame(pEstablisherFrame));
+
+ nestedHandlerExRecord.m_ActiveForUnwind = TRUE;
+ nestedHandlerExRecord.m_pCurrentExInfo = pExInfo;
+ nestedHandlerExRecord.m_pCurrentHandler = pEstablisherFrame;
+
+ INSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg));
+
+ // Unwind the stack. The establisher frame sets the boundary.
+ CPFH_UnwindFrames1(pThread, pEstablisherFrame, exceptionCode);
+
+ // We're unwinding -- the bottom most handler is potentially off top-of-stack now. If
+ // it is, change it to the next COM+ frame. (This one is not good, as it's about to
+ // disappear.)
+ if (pExInfo->m_pBottomMostHandler &&
+ pExInfo->m_pBottomMostHandler <= pEstablisherFrame)
+ {
+ EXCEPTION_REGISTRATION_RECORD *pNextBottomMost = GetNextCOMPlusSEHRecord(pEstablisherFrame);
+
+ // If there is no previous COM+ SEH handler, GetNextCOMPlusSEHRecord() will return -1. Much later, we will dereference that and AV.
+ _ASSERTE (pNextBottomMost != EXCEPTION_CHAIN_END);
+
+ STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_UnwindHandler: setting ExInfo:0x%p m_pBottomMostHandler from 0x%p to 0x%p\n",
+ pExInfo, pExInfo->m_pBottomMostHandler, pNextBottomMost);
+
+ pExInfo->m_pBottomMostHandler = pNextBottomMost;
+ }
+
+ {
+ // needs to be in its own scope to avoid polluting the namespace, since
+ // we don't do a _END then we don't revert the state
+ GCX_PREEMP_NO_DTOR();
+ }
+ UNINSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg));
+
+ // If we are here, then exception was not caught in managed code protected by this
+ // ComplusFrameHandler. Hence, reset thread abort state if this is the last personality routine,
+ // for managed code, on the stack.
+ ResetThreadAbortState(pThread, pEstablisherFrame);
+
+ STRESS_LOG0(LF_EH, LL_INFO100, "CPFH_UnwindHandler: Leaving with ExceptionContinueSearch\n");
+ return ExceptionContinueSearch;
+} // CPFH_UnwindHandler()
+
+//******************************************************************************
+// This is the first handler that is called in the context of managed code
+// It is the first level of defense and tries to find a handler in the user
+// code to handle the exception
+//-------------------------------------------------------------------------
+// EXCEPTION_DISPOSITION __cdecl COMPlusFrameHandler(
+// EXCEPTION_RECORD *pExceptionRecord,
+// _EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame,
+// CONTEXT *pContext,
+// DISPATCHER_CONTEXT *pDispatcherContext)
+//
+// See http://www.microsoft.com/msj/0197/exception/exception.aspx for a background piece on Windows
+// unmanaged structured exception handling.
+EXCEPTION_HANDLER_IMPL(COMPlusFrameHandler)
+{
+ WRAPPER_NO_CONTRACT;
+ _ASSERTE(!DebugIsEECxxException(pExceptionRecord) && "EE C++ Exception leaked into managed code!");
+
+ STRESS_LOG5(LF_EH, LL_INFO100, "In COMPlusFrameHander EH code = %x flag = %x EIP = %x with ESP = %x, pEstablisherFrame = 0x%p\n",
+ pExceptionRecord->ExceptionCode, pExceptionRecord->ExceptionFlags,
+ pContext ? GetIP(pContext) : 0, pContext ? GetSP(pContext) : 0, pEstablisherFrame);
+
+ _ASSERTE((pContext == NULL) || ((pContext->ContextFlags & CONTEXT_CONTROL) == CONTEXT_CONTROL));
+
+ if (g_fNoExceptions)
+ return ExceptionContinueSearch; // No EH during EE shutdown.
+
+ // Check if the exception represents a GCStress Marker. If it does,
+ // we shouldnt record its entry in the TLS as such exceptions are
+ // continuable and can confuse the VM to treat them as CSE,
+ // as they are implemented using illegal instruction exception.
+
+ bool fIsGCMarker = false;
+
+#ifdef HAVE_GCCOVER // This is a debug only macro
+ if (GCStress<cfg_instr_jit>::IsEnabled())
+ {
+ // UnsafeTlsGetValue trashes last error. When Complus_GCStress=4, GC is invoked
+ // on every allowable JITed instruction by means of our exception handling machanism
+ // it is very easy to trash the last error. For example, a p/invoke called a native method
+ // which sets last error. Before we getting the last error in the IL stub, it is trashed here
+ DWORD dwLastError = GetLastError();
+ fIsGCMarker = IsGcMarker(pExceptionRecord->ExceptionCode, pContext);
+ if (!fIsGCMarker)
+ {
+ SaveCurrentExceptionInfo(pExceptionRecord, pContext);
+ }
+ SetLastError(dwLastError);
+ }
+ else
+#endif
+ {
+ // GCStress does not exist on retail builds (see IsGcMarker implementation for details).
+ SaveCurrentExceptionInfo(pExceptionRecord, pContext);
+ }
+
+ if (fIsGCMarker)
+ {
+ // If this was a GCStress marker exception, then return
+ // ExceptionContinueExecution to the OS.
+ return ExceptionContinueExecution;
+ }
+
+ EXCEPTION_DISPOSITION retVal = ExceptionContinueSearch;
+
+ Thread *pThread = GetThread();
+ if ((pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND)) == 0)
+ {
+ if (IsSOExceptionCode(pExceptionRecord->ExceptionCode))
+ {
+ EEPolicy::HandleStackOverflow(SOD_ManagedFrameHandler, (void*)pEstablisherFrame);
+
+ // VC's unhandled exception filter plays with stack. It VirtualAlloc's a new stack, and
+ // then launch Watson from the new stack. When Watson asks CLR to save required data, we
+ // are not able to walk the stack.
+ // Setting Context in ExInfo so that our Watson dump routine knows how to walk this stack.
+ ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo);
+ pExInfo->m_pContext = pContext;
+
+ // Save the reference to the topmost handler we see during first pass when an SO goes past us.
+ // When an unwind gets triggered for the exception, we will reset the frame chain when we reach
+ // the topmost handler we saw during the first pass.
+ //
+ // This unifies, behaviour-wise, 32bit with 64bit.
+ if ((pExInfo->m_pTopMostHandlerDuringSO == NULL) ||
+ (pEstablisherFrame > pExInfo->m_pTopMostHandlerDuringSO))
+ {
+ pExInfo->m_pTopMostHandlerDuringSO = pEstablisherFrame;
+ }
+
+ // Switch to preemp mode since we are returning back to the OS.
+ // We will do the quick switch since we are short of stack
+ FastInterlockAnd (&pThread->m_fPreemptiveGCDisabled, 0);
+
+ return ExceptionContinueSearch;
+ }
+ else
+ {
+#ifdef FEATURE_STACK_PROBE
+ if (GetEEPolicy()->GetActionOnFailure(FAIL_StackOverflow) == eRudeUnloadAppDomain)
+ {
+ RetailStackProbe(static_cast<unsigned int>(ADJUST_PROBE(BACKOUT_CODE_STACK_LIMIT)), pThread);
+ }
+#endif
+ }
+ }
+ else
+ {
+ DWORD exceptionCode = pExceptionRecord->ExceptionCode;
+
+ if (exceptionCode == STATUS_UNWIND)
+ {
+ // If exceptionCode is STATUS_UNWIND, RtlUnwind is called with a NULL ExceptionRecord,
+ // therefore OS uses a faked ExceptionRecord with STATUS_UNWIND code. Then we need to
+ // look at our saved exception code.
+ exceptionCode = GetCurrentExceptionCode();
+ }
+
+ if (IsSOExceptionCode(exceptionCode))
+ {
+ // We saved the context during the first pass in case the stack overflow exception is
+ // unhandled and Watson dump code needs it. Now we are in the second pass, therefore
+ // either the exception is handled by user code, or we have finished unhandled exception
+ // filter process, and the OS is unwinding the stack. Either way, we don't need the
+ // context any more. It is very important to reset the context so that our code does not
+ // accidentally walk the frame using the dangling context in ExInfoWalker::WalkToPosition.
+ ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo);
+ pExInfo->m_pContext = NULL;
+
+ // We should have the reference to the topmost handler seen during the first pass of SO
+ _ASSERTE(pExInfo->m_pTopMostHandlerDuringSO != NULL);
+
+ // Reset frame chain till we reach the topmost establisher frame we saw in the first pass.
+ // This will ensure that if any intermediary frame calls back into managed (e.g. native frame
+ // containing a __finally that reverse pinvokes into managed), then we have the correct
+ // explicit frame on the stack. Resetting the frame chain only when we reach the topmost
+ // personality routine seen in the first pass may not result in expected behaviour,
+ // specially during stack walks when crawl frame needs to be initialized from
+ // explicit frame.
+ if (pEstablisherFrame <= pExInfo->m_pTopMostHandlerDuringSO)
+ {
+ GCX_COOP_NO_DTOR();
+
+ if (pThread->GetFrame() < GetCurrFrame(pEstablisherFrame))
+ {
+ // We are very short of stack. We avoid calling UnwindFrame which may
+ // run unknown code here.
+ pThread->SetFrame(GetCurrFrame(pEstablisherFrame));
+ }
+ }
+
+ // Switch to preemp mode since we are returning back to the OS.
+ // We will do the quick switch since we are short of stack
+ FastInterlockAnd(&pThread->m_fPreemptiveGCDisabled, 0);
+
+ return ExceptionContinueSearch;
+ }
+ }
+
+ // <TODO> . We need to probe here, but can't introduce destructors etc. </TODO>
+ BEGIN_CONTRACT_VIOLATION(SOToleranceViolation);
+
+ if (pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND))
+ {
+ retVal = CPFH_UnwindHandler(pExceptionRecord,
+ pEstablisherFrame,
+ pContext,
+ pDispatcherContext);
+ }
+ else
+ {
+
+ /* Make no assumptions about the current machine state.
+ <TODO>@PERF: Only needs to be called by the very first handler invoked by SEH </TODO>*/
+ ResetCurrentContext();
+
+ retVal = CPFH_FirstPassHandler(pExceptionRecord,
+ pEstablisherFrame,
+ pContext,
+ pDispatcherContext);
+
+ }
+
+ END_CONTRACT_VIOLATION;
+
+ return retVal;
+} // COMPlusFrameHandler()
+
+
+//-------------------------------------------------------------------------
+// This is called by the EE to restore the stack pointer if necessary.
+//-------------------------------------------------------------------------
+
+// This can't be inlined into the caller to avoid introducing EH frame
+NOINLINE LPVOID COMPlusEndCatchWorker(Thread * pThread)
+{
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+ STATIC_CONTRACT_SO_INTOLERANT;
+
+ LOG((LF_EH, LL_INFO1000, "COMPlusPEndCatch:called with "
+ "pThread:0x%x\n",pThread));
+
+ // indicate that we are out of the managed clause as early as possible
+ ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo);
+ pExInfo->m_EHClauseInfo.SetManagedCodeEntered(FALSE);
+
+ void* esp = NULL;
+
+ // @todo . We need to probe in the EH code, but can't introduce destructors etc.
+ BEGIN_CONTRACT_VIOLATION(SOToleranceViolation);
+
+ // Notify the profiler that the catcher has finished running
+ // IL stubs don't contain catch blocks so inability to perform this check does not matter.
+ // if (!pFunc->IsILStub())
+ EEToProfilerExceptionInterfaceWrapper::ExceptionCatcherLeave();
+
+ // no need to set pExInfo->m_ClauseType = (DWORD)COR_PRF_CLAUSE_NONE now that the
+ // notification is done because because the ExInfo record is about to be popped off anyway
+
+ LOG((LF_EH, LL_INFO1000, "COMPlusPEndCatch:pThread:0x%x\n",pThread));
+
+#ifdef _DEBUG
+ gLastResumedExceptionFunc = NULL;
+ gLastResumedExceptionHandler = 0;
+#endif
+ // Set the thrown object to NULL as no longer needed. This also sets the last thrown object to NULL.
+ pThread->SafeSetThrowables(NULL);
+
+ // reset the stashed exception info
+ pExInfo->m_pExceptionRecord = NULL;
+ pExInfo->m_pContext = NULL;
+ pExInfo->m_pExceptionPointers = NULL;
+
+ if (pExInfo->m_pShadowSP)
+ {
+ *pExInfo->m_pShadowSP = 0; // Reset the shadow SP
+ }
+
+ // pExInfo->m_dEsp was set in ResumeAtJITEH(). It is the Esp of the
+ // handler nesting level which catches the exception.
+ esp = (void*)(size_t)pExInfo->m_dEsp;
+
+ pExInfo->UnwindExInfo(esp);
+
+ // Prepare to sync managed exception state
+ //
+ // In a case when we're nested inside another catch block, the domain in which we're executing may not be the
+ // same as the one the domain of the throwable that was just made the current throwable above. Therefore, we
+ // make a special effort to preserve the domain of the throwable as we update the the last thrown object.
+ //
+ // This function (COMPlusEndCatch) can also be called by the in-proc debugger helper thread on x86 when
+ // an attempt to SetIP takes place to set IP outside the catch clause. In such a case, managed thread object
+ // will not be available. Thus, we should reset the severity only if its not such a thread.
+ //
+ // This behaviour (of debugger doing SetIP) is not allowed on 64bit since the catch clauses are implemented
+ // as a seperate funclet and it's just not allowed to set the IP across EH scopes, such as from inside a catch
+ // clause to outside of the catch clause.
+ bool fIsDebuggerHelperThread = (g_pDebugInterface == NULL) ? false : g_pDebugInterface->ThisIsHelperThread();
+
+ // Sync managed exception state, for the managed thread, based upon any active exception tracker
+ pThread->SyncManagedExceptionState(fIsDebuggerHelperThread);
+
+ LOG((LF_EH, LL_INFO1000, "COMPlusPEndCatch: esp=%p\n", esp));
+
+ END_CONTRACT_VIOLATION;
+
+ return esp;
+}
+
+//
+// This function works in conjunction with JIT_EndCatch. On input, the parameters are set as follows:
+// ebp, ebx, edi, esi: the values of these registers at the end of the catch block
+// *pRetAddress: the next instruction after the call to JIT_EndCatch
+//
+// On output, *pRetAddress is the instruction at which to resume execution. This may be user code,
+// or it may be ThrowControlForThread (which will re-raise a pending ThreadAbortException).
+//
+// Returns the esp to set before resuming at *pRetAddress.
+//
+LPVOID STDCALL COMPlusEndCatch(LPVOID ebp, DWORD ebx, DWORD edi, DWORD esi, LPVOID* pRetAddress)
+{
+ //
+ // PopNestedExceptionRecords directly manipulates fs:[0] chain. This method can't have any EH!
+ //
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+ STATIC_CONTRACT_SO_INTOLERANT;
+
+ ETW::ExceptionLog::ExceptionCatchEnd();
+ ETW::ExceptionLog::ExceptionThrownEnd();
+
+ void* esp = COMPlusEndCatchWorker(GetThread());
+
+ // We are going to resume at a handler nesting level whose esp is dEsp. Pop off any SEH records below it. This
+ // would be the COMPlusNestedExceptionHandler we had inserted.
+ PopNestedExceptionRecords(esp);
+
+ //
+ // Set up m_OSContext for the call to COMPlusCheckForAbort
+ //
+ Thread* pThread = GetThread();
+ _ASSERTE(pThread != NULL);
+
+ SetIP(pThread->m_OSContext, (PCODE)*pRetAddress);
+ SetSP(pThread->m_OSContext, (TADDR)esp);
+ SetFP(pThread->m_OSContext, (TADDR)ebp);
+ pThread->m_OSContext->Ebx = ebx;
+ pThread->m_OSContext->Edi = edi;
+ pThread->m_OSContext->Esi = esi;
+
+ LPVOID throwControl = COMPlusCheckForAbort((UINT_PTR)*pRetAddress);
+ if (throwControl)
+ *pRetAddress = throwControl;
+
+ return esp;
+}
+
+#endif // !DACCESS_COMPILE
+
+PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(CONTEXT * pContext)
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ UINT_PTR stackSlot = pContext->Ebp + REDIRECTSTUB_EBP_OFFSET_CONTEXT;
+ PTR_PTR_CONTEXT ppContext = dac_cast<PTR_PTR_CONTEXT>((TADDR)stackSlot);
+ return *ppContext;
+}
+
+#if !defined(DACCESS_COMPILE)
+
+PEXCEPTION_REGISTRATION_RECORD GetCurrentSEHRecord()
+{
+ WRAPPER_NO_CONTRACT;
+
+ LPVOID fs0 = (LPVOID)__readfsdword(0);
+
+#if 0 // This walk is too expensive considering we hit it every time we a CONTRACT(NOTHROW)
+#ifdef _DEBUG
+ EXCEPTION_REGISTRATION_RECORD *pEHR = (EXCEPTION_REGISTRATION_RECORD *)fs0;
+ LPVOID spVal;
+ __asm {
+ mov spVal, esp
+ }
+
+ // check that all the eh frames are all greater than the current stack value. If not, the
+ // stack has been updated somehow w/o unwinding the SEH chain.
+
+ // LOG((LF_EH, LL_INFO1000000, "ER Chain:\n"));
+ while (pEHR != NULL && pEHR != EXCEPTION_CHAIN_END) {
+ // LOG((LF_EH, LL_INFO1000000, "\tp: prev:p handler:%x\n", pEHR, pEHR->Next, pEHR->Handler));
+ if (pEHR < spVal) {
+ if (gLastResumedExceptionFunc != 0)
+ _ASSERTE(!"Stack is greater than start of SEH chain - possible missing leave in handler. See gLastResumedExceptionHandler & gLastResumedExceptionFunc for info");
+ else
+ _ASSERTE(!"Stack is greater than start of SEH chain (FS:0)");
+ }
+ if (pEHR->Handler == (void *)-1)
+ _ASSERTE(!"Handler value has been corrupted");
+
+ _ASSERTE(pEHR < pEHR->Next);
+
+ pEHR = pEHR->Next;
+ }
+#endif
+#endif
+
+ return (EXCEPTION_REGISTRATION_RECORD*) fs0;
+}
+
+PEXCEPTION_REGISTRATION_RECORD GetFirstCOMPlusSEHRecord(Thread *pThread) {
+ WRAPPER_NO_CONTRACT;
+ EXCEPTION_REGISTRATION_RECORD *pEHR = *(pThread->GetExceptionListPtr());
+ if (pEHR == EXCEPTION_CHAIN_END || IsUnmanagedToManagedSEHHandler(pEHR)) {
+ return pEHR;
+ } else {
+ return GetNextCOMPlusSEHRecord(pEHR);
+ }
+}
+
+
+PEXCEPTION_REGISTRATION_RECORD GetPrevSEHRecord(EXCEPTION_REGISTRATION_RECORD *next)
+{
+ WRAPPER_NO_CONTRACT;
+ _ASSERTE(IsUnmanagedToManagedSEHHandler(next));
+
+ EXCEPTION_REGISTRATION_RECORD *pEHR = GetCurrentSEHRecord();
+ _ASSERTE(pEHR != 0 && pEHR != EXCEPTION_CHAIN_END);
+
+ EXCEPTION_REGISTRATION_RECORD *pBest = 0;
+ while (pEHR != next) {
+ if (IsUnmanagedToManagedSEHHandler(pEHR))
+ pBest = pEHR;
+ pEHR = pEHR->Next;
+ _ASSERTE(pEHR != 0 && pEHR != EXCEPTION_CHAIN_END);
+ }
+
+ return pBest;
+}
+
+VOID SetCurrentSEHRecord(EXCEPTION_REGISTRATION_RECORD *pSEH)
+{
+ WRAPPER_NO_CONTRACT;
+ *GetThread()->GetExceptionListPtr() = pSEH;
+}
+
+
+//
+// Unwind pExinfo, pops FS:[0] handlers until the interception context SP, and
+// resumes at interception context.
+//
+VOID UnwindExceptionTrackerAndResumeInInterceptionFrame(ExInfo* pExInfo, EHContext* context)
+{
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+ STATIC_CONTRACT_SO_TOLERANT;
+
+ _ASSERTE(pExInfo && context);
+
+ pExInfo->UnwindExInfo((LPVOID)(size_t)context->Esp);
+ PopNestedExceptionRecords((LPVOID)(size_t)context->Esp);
+
+ STRESS_LOG3(LF_EH|LF_CORDB, LL_INFO100, "UnwindExceptionTrackerAndResumeInInterceptionFrame: completing intercept at EIP = %p ESP = %p EBP = %p\n", context->Eip, context->Esp, context->Ebp);
+
+ ResumeAtJitEHHelper(context);
+ UNREACHABLE_MSG("Should never return from ResumeAtJitEHHelper!");
+}
+
+//
+// Pop SEH records below the given target ESP. This is only used to pop nested exception records.
+// If bCheckForUnknownHandlers is set, it only checks for unknown FS:[0] handlers.
+//
+BOOL PopNestedExceptionRecords(LPVOID pTargetSP, BOOL bCheckForUnknownHandlers)
+{
+ // No CONTRACT here, because we can't run the risk of it pushing any SEH into the current method.
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_SO_TOLERANT;
+
+ PEXCEPTION_REGISTRATION_RECORD pEHR = GetCurrentSEHRecord();
+
+ while ((LPVOID)pEHR < pTargetSP)
+ {
+ //
+ // The only handler type we're allowed to have below the limit on the FS:0 chain in these cases is a nested
+ // exception record, so we verify that here.
+ //
+ // There is a special case, of course: for an unhandled exception, when the default handler does the exit
+ // unwind, we may have an exception that escapes a finally clause, thus replacing the original unhandled
+ // exception. If we find a catcher for that new exception, then we'll go ahead and do our own unwind, then
+ // jump to the catch. When we are called here, just before jumpping to the catch, we'll pop off our nested
+ // handlers, then we'll pop off one more handler: the handler that ntdll!ExecuteHandler2 pushed before
+ // calling our nested handler. We go ahead and pop off that handler, too. Its okay, its only there to catch
+ // exceptions from handlers and turn them into collided unwind status codes... there's no cleanup in the
+ // handler that we're removing, and that's the important point. The handler that ExecuteHandler2 pushes
+ // isn't a public export from ntdll, but its named "UnwindHandler" and is physically shortly after
+ // ExecuteHandler2 in ntdll.
+ //
+ static HINSTANCE ExecuteHandler2Module = 0;
+ static BOOL ExecuteHandler2ModuleInited = FALSE;
+
+ // Cache the handle to the dll with the handler pushed by ExecuteHandler2.
+ if (!ExecuteHandler2ModuleInited)
+ {
+ ExecuteHandler2Module = WszGetModuleHandle(W("ntdll.dll"));
+ ExecuteHandler2ModuleInited = TRUE;
+ }
+
+ if (bCheckForUnknownHandlers)
+ {
+ if (!IsComPlusNestedExceptionRecord(pEHR) ||
+ !((ExecuteHandler2Module != NULL) && IsIPInModule(ExecuteHandler2Module, (PCODE)pEHR->Handler)))
+ {
+ return TRUE;
+ }
+ }
+#ifdef _DEBUG
+ else
+ {
+ // Note: if we can't find the module containing ExecuteHandler2, we'll just be really strict and require
+ // that we're only popping nested handlers.
+ _ASSERTE(IsComPlusNestedExceptionRecord(pEHR) ||
+ ((ExecuteHandler2Module != NULL) && IsIPInModule(ExecuteHandler2Module, (PCODE)pEHR->Handler)));
+ }
+#endif // _DEBUG
+
+ pEHR = pEHR->Next;
+ }
+
+ if (!bCheckForUnknownHandlers)
+ {
+ SetCurrentSEHRecord(pEHR);
+ }
+ return FALSE;
+}
+
+//
+// This is implemented differently from the PopNestedExceptionRecords above because it's called in the context of
+// the DebuggerRCThread to operate on the stack of another thread.
+//
+VOID PopNestedExceptionRecords(LPVOID pTargetSP, CONTEXT *pCtx, void *pSEH)
+{
+ // No CONTRACT here, because we can't run the risk of it pushing any SEH into the current method.
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+
+#ifdef _DEBUG
+ LOG((LF_CORDB,LL_INFO1000, "\nPrintSEHRecords:\n"));
+
+ EXCEPTION_REGISTRATION_RECORD *pEHR = (EXCEPTION_REGISTRATION_RECORD *)(size_t)*(DWORD *)pSEH;
+
+ // check that all the eh frames are all greater than the current stack value. If not, the
+ // stack has been updated somehow w/o unwinding the SEH chain.
+ while (pEHR != NULL && pEHR != EXCEPTION_CHAIN_END)
+ {
+ LOG((LF_EH, LL_INFO1000000, "\t%08x: next:%08x handler:%x\n", pEHR, pEHR->Next, pEHR->Handler));
+ pEHR = pEHR->Next;
+ }
+#endif
+
+ DWORD dwCur = *(DWORD*)pSEH; // 'EAX' in the original routine
+ DWORD dwPrev = (DWORD)(size_t)pSEH;
+
+ while (dwCur < (DWORD)(size_t)pTargetSP)
+ {
+ // Watch for the OS handler
+ // for nested exceptions, or any C++ handlers for destructors in our call
+ // stack, or anything else.
+ if (dwCur < (DWORD)GetSP(pCtx))
+ dwPrev = dwCur;
+
+ dwCur = *(DWORD *)(size_t)dwCur;
+
+ LOG((LF_CORDB,LL_INFO10000, "dwCur: 0x%x dwPrev:0x%x pTargetSP:0x%x\n",
+ dwCur, dwPrev, pTargetSP));
+ }
+
+ *(DWORD *)(size_t)dwPrev = dwCur;
+
+#ifdef _DEBUG
+ pEHR = (EXCEPTION_REGISTRATION_RECORD *)(size_t)*(DWORD *)pSEH;
+ // check that all the eh frames are all greater than the current stack value. If not, the
+ // stack has been updated somehow w/o unwinding the SEH chain.
+
+ LOG((LF_CORDB,LL_INFO1000, "\nPopSEHRecords:\n"));
+ while (pEHR != NULL && pEHR != (void *)-1)
+ {
+ LOG((LF_EH, LL_INFO1000000, "\t%08x: next:%08x handler:%x\n", pEHR, pEHR->Next, pEHR->Handler));
+ pEHR = pEHR->Next;
+ }
+#endif
+}
+
+//==========================================================================
+// COMPlusThrowCallback
+//
+//==========================================================================
+
+/*
+ *
+ * COMPlusThrowCallbackHelper
+ *
+ * This function is a simple helper function for COMPlusThrowCallback. It is needed
+ * because of the EX_TRY macro. This macro does an alloca(), which allocates space
+ * off the stack, not free'ing it. Thus, doing a EX_TRY in a loop can easily result
+ * in a stack overflow error. By factoring out the EX_TRY into a separate function,
+ * we recover that stack space.
+ *
+ * Parameters:
+ * pJitManager - The JIT manager that will filter the EH.
+ * pCf - The frame to crawl.
+ * EHClausePtr
+ * nestingLevel
+ * pThread - Used to determine if the thread is throwable or not.
+ *
+ * Return:
+ * Exception status.
+ *
+ */
+int COMPlusThrowCallbackHelper(IJitManager *pJitManager,
+ CrawlFrame *pCf,
+ ThrowCallbackType* pData,
+ EE_ILEXCEPTION_CLAUSE *EHClausePtr,
+ DWORD nestingLevel,
+ OBJECTREF throwable,
+ Thread *pThread
+ )
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_TRIGGERS;
+ MODE_COOPERATIVE;
+ }
+ CONTRACTL_END;
+
+ int iFilt = 0;
+ BOOL impersonating = FALSE;
+
+ EX_TRY
+ {
+ GCPROTECT_BEGIN (throwable);
+ if (pData->hCallerToken != NULL)
+ {
+ STRESS_LOG1(LF_EH, LL_INFO100, "In COMPlusThrowCallbackHelper hCallerToken = %d\n",pData->hCallerToken);
+ // CLR_ImpersonateLoggedOnUser fails fast on error
+ COMPrincipal::CLR_ImpersonateLoggedOnUser(pData->hCallerToken);
+ impersonating = TRUE;
+ }
+
+ // We want to call filters even if the thread is aborting, so suppress abort
+ // checks while the filter runs.
+ ThreadPreventAsyncHolder preventAbort;
+
+ BYTE* startAddress = (BYTE*)pCf->GetCodeInfo()->GetStartAddress();
+ iFilt = ::CallJitEHFilter(pCf, startAddress, EHClausePtr, nestingLevel, throwable);
+
+ if (impersonating)
+ {
+ STRESS_LOG1(LF_EH, LL_INFO100, "In COMPlusThrowCallbackHelper hImpersonationToken = %d\n",pData->hImpersonationToken);
+ // CLR_ImpersonateLoggedOnUser fails fast on error
+ COMPrincipal::CLR_ImpersonateLoggedOnUser(pData->hImpersonationToken);
+ impersonating = FALSE;
+ }
+ GCPROTECT_END();
+ }
+ EX_CATCH
+ {
+ if (impersonating)
+ {
+ STRESS_LOG1(LF_EH, LL_INFO100, "In COMPlusThrowCallbackHelper EX_CATCH hImpersonationToken = %d\n",pData->hImpersonationToken);
+ // CLR_ImpersonateLoggedOnUser fails fast on error
+ COMPrincipal::CLR_ImpersonateLoggedOnUser(pData->hImpersonationToken);
+ impersonating = FALSE;
+ }
+
+ // We had an exception in filter invocation that remained unhandled.
+ // Sync managed exception state, for the managed thread, based upon the active exception tracker.
+ pThread->SyncManagedExceptionState(false);
+
+ //
+ // Swallow exception. Treat as exception continue search.
+ //
+ iFilt = EXCEPTION_CONTINUE_SEARCH;
+
+ }
+ EX_END_CATCH(SwallowAllExceptions)
+
+ return iFilt;
+}
+
+//******************************************************************************
+// The stack walk callback for exception handling on x86.
+// Returns one of:
+// SWA_CONTINUE = 0, // continue walking
+// SWA_ABORT = 1, // stop walking, early out in "failure case"
+// SWA_FAILED = 2 // couldn't walk stack
+StackWalkAction COMPlusThrowCallback( // SWA value
+ CrawlFrame *pCf, // Data from StackWalkFramesEx
+ ThrowCallbackType *pData) // Context data passed through from CPFH
+{
+ // We don't want to use a runtime contract here since this codepath is used during
+ // the processing of a hard SO. Contracts use a significant amount of stack
+ // which we can't afford for those cases.
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+
+ Frame *pFrame = pCf->GetFrame();
+ MethodDesc *pFunc = pCf->GetFunction();
+
+ #if defined(_DEBUG)
+ #define METHODNAME(pFunc) (pFunc?pFunc->m_pszDebugMethodName:"<n/a>")
+ #else
+ #define METHODNAME(pFunc) "<n/a>"
+ #endif
+ STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusThrowCallback: STACKCRAWL method:%pM ('%s'), Frame:%p, FrameVtable = %pV\n",
+ pFunc, METHODNAME(pFunc), pFrame, pCf->IsFrameless()?0:(*(void**)pFrame));
+ #undef METHODNAME
+
+ Thread *pThread = GetThread();
+
+ if (pFrame && pData->pTopFrame == pFrame)
+ /* Don't look past limiting frame if there is one */
+ return SWA_ABORT;
+
+ if (!pFunc)
+ return SWA_CONTINUE;
+
+ if (pThread->IsRudeAbortInitiated() && !pThread->IsWithinCer(pCf))
+ {
+ return SWA_CONTINUE;
+ }
+
+ ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo);
+
+ _ASSERTE(!pData->bIsUnwind);
+#ifdef _DEBUG
+ // It SHOULD be the case that any frames we consider live between this exception
+ // record and the previous one.
+ if (!pExInfo->m_pPrevNestedInfo) {
+ if (pData->pCurrentExceptionRecord) {
+ if (pFrame) _ASSERTE(pData->pCurrentExceptionRecord > pFrame);
+ if (pCf->IsFrameless()) _ASSERTE((ULONG_PTR)pData->pCurrentExceptionRecord >= GetRegdisplaySP(pCf->GetRegisterSet()));
+ }
+ if (pData->pPrevExceptionRecord) {
+ // FCALLS have an extra SEH record in debug because of the desctructor
+ // associated with ForbidGC checking. This is benign, so just ignore it.
+ if (pFrame) _ASSERTE(pData->pPrevExceptionRecord < pFrame || pFrame->GetVTablePtr() == HelperMethodFrame::GetMethodFrameVPtr());
+ if (pCf->IsFrameless()) _ASSERTE((ULONG_PTR)pData->pPrevExceptionRecord <= GetRegdisplaySP(pCf->GetRegisterSet()));
+ }
+ }
+#endif
+
+ UINT_PTR currentIP = 0;
+ UINT_PTR currentSP = 0;
+
+ if (pCf->IsFrameless())
+ {
+ currentIP = (UINT_PTR)GetControlPC(pCf->GetRegisterSet());
+ currentSP = (UINT_PTR)GetRegdisplaySP(pCf->GetRegisterSet());
+ }
+ else if (InlinedCallFrame::FrameHasActiveCall(pFrame))
+ {
+ // don't have the IP, SP for native code
+ currentIP = 0;
+ currentSP = 0;
+ }
+ else
+ {
+ currentIP = (UINT_PTR)(pCf->GetFrame()->GetIP());
+ currentSP = 0; //Don't have an SP to get.
+ }
+
+ if (!pFunc->IsILStub())
+ {
+ // Append the current frame to the stack trace and save the save trace to the managed Exception object.
+ pExInfo->m_StackTraceInfo.AppendElement(pData->bAllowAllocMem, currentIP, currentSP, pFunc, pCf);
+
+ pExInfo->m_StackTraceInfo.SaveStackTrace(pData->bAllowAllocMem,
+ pThread->GetThrowableAsHandle(),
+ pData->bReplaceStack,
+ pData->bSkipLastElement);
+ }
+ else
+ {
+ LOG((LF_EH, LL_INFO1000, "COMPlusThrowCallback: Skipping AppendElement/SaveStackTrace for IL stub MD %p\n", pFunc));
+ }
+
+ // Fire an exception thrown ETW event when an exception occurs
+ ETW::ExceptionLog::ExceptionThrown(pCf, pData->bSkipLastElement, pData->bReplaceStack);
+
+ // Reset the flags. These flags are set only once before each stack walk done by LookForHandler(), and
+ // they apply only to the first frame we append to the stack trace. Subsequent frames are always appended.
+ if (pData->bReplaceStack)
+ {
+ pData->bReplaceStack = FALSE;
+ }
+ if (pData->bSkipLastElement)
+ {
+ pData->bSkipLastElement = FALSE;
+ }
+
+ // Check for any impersonation on the frame and save that for use during EH filter callbacks
+ OBJECTREF* pRefSecDesc = pCf->GetAddrOfSecurityObject();
+ if (pRefSecDesc != NULL && *pRefSecDesc != NULL)
+ {
+ FRAMESECDESCREF fsdRef = (FRAMESECDESCREF)*pRefSecDesc;
+ if (fsdRef->GetCallerToken() != NULL)
+ {
+ // Impersonation info present on the Frame
+ pData->hCallerToken = fsdRef->GetCallerToken();
+ STRESS_LOG1(LF_EH, LL_INFO100, "In COMPlusThrowCallback. Found non-NULL callertoken on FSD:%d\n",pData->hCallerToken);
+ if (!pData->bImpersonationTokenSet)
+ {
+ pData->hImpersonationToken = fsdRef->GetImpersonationToken();
+ STRESS_LOG1(LF_EH, LL_INFO100, "In COMPlusThrowCallback. Found non-NULL impersonationtoken on FSD:%d\n",pData->hImpersonationToken);
+ pData->bImpersonationTokenSet = TRUE;
+ }
+ }
+ }
+
+ // now we've got the stack trace, if we aren't allowed to catch this and we're first pass, return
+ if (pData->bDontCatch)
+ return SWA_CONTINUE;
+
+ if (!pCf->IsFrameless())
+ {
+ // @todo - remove this once SIS is fully enabled.
+ extern bool g_EnableSIS;
+ if (g_EnableSIS)
+ {
+ // For debugger, we may want to notify 1st chance exceptions if they're coming out of a stub.
+ // We recognize stubs as Frames with a M2U transition type. The debugger's stackwalker also
+ // recognizes these frames and publishes ICorDebugInternalFrames in the stackwalk. It's
+ // important to use pFrame as the stack address so that the Exception callback matches up
+ // w/ the ICorDebugInternlFrame stack range.
+ if (CORDebuggerAttached())
+ {
+ Frame * pFrameStub = pCf->GetFrame();
+ Frame::ETransitionType t = pFrameStub->GetTransitionType();
+ if (t == Frame::TT_M2U)
+ {
+ // Use address of the frame as the stack address.
+ currentSP = (SIZE_T) ((void*) pFrameStub);
+ currentIP = 0; // no IP.
+ EEToDebuggerExceptionInterfaceWrapper::FirstChanceManagedException(pThread, (SIZE_T)currentIP, (SIZE_T)currentSP);
+#ifdef FEATURE_EXCEPTION_NOTIFICATIONS
+ // Deliver the FirstChanceNotification after the debugger, if not already delivered.
+ if (!pExInfo->DeliveredFirstChanceNotification())
+ {
+ ExceptionNotifications::DeliverFirstChanceNotification();
+ }
+#endif // FEATURE_EXCEPTION_NOTIFICATIONS
+ }
+ }
+ }
+ return SWA_CONTINUE;
+ }
+
+ bool fIsILStub = pFunc->IsILStub();
+ bool fGiveDebuggerAndProfilerNotification = !fIsILStub;
+ BOOL fMethodCanHandleException = TRUE;
+
+ MethodDesc * pUserMDForILStub = NULL;
+ Frame * pILStubFrame = NULL;
+ if (fIsILStub)
+ pUserMDForILStub = GetUserMethodForILStub(pThread, currentSP, pFunc, &pILStubFrame);
+
+#ifdef FEATURE_CORRUPTING_EXCEPTIONS
+ CorruptionSeverity currentSeverity = pThread->GetExceptionState()->GetCurrentExceptionTracker()->GetCorruptionSeverity();
+ {
+ // We must defer to the MethodDesc of the user method instead of the IL stub
+ // itself because the user can specify the policy on a per-method basis and
+ // that won't be reflected via the IL stub's MethodDesc.
+ MethodDesc * pMDWithCEAttribute = fIsILStub ? pUserMDForILStub : pFunc;
+
+ // Check if the exception can be delivered to the method? It will check if the exception
+ // is a CE or not. If it is, it will check if the method can process it or not.
+ fMethodCanHandleException = CEHelper::CanMethodHandleException(currentSeverity, pMDWithCEAttribute);
+ }
+#endif // FEATURE_CORRUPTING_EXCEPTIONS
+
+ // Let the profiler know that we are searching for a handler within this function instance
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionEnter(pFunc);
+
+ // The following debugger notification and AppDomain::FirstChanceNotification should be scoped together
+ // since the AD notification *must* follow immediately after the debugger's notification.
+ {
+#ifdef DEBUGGING_SUPPORTED
+ //
+ // Go ahead and notify any debugger of this exception.
+ //
+ EEToDebuggerExceptionInterfaceWrapper::FirstChanceManagedException(pThread, (SIZE_T)currentIP, (SIZE_T)currentSP);
+
+ if (CORDebuggerAttached() && pExInfo->m_ExceptionFlags.DebuggerInterceptInfo())
+ {
+ return SWA_ABORT;
+ }
+#endif // DEBUGGING_SUPPORTED
+
+#ifdef FEATURE_EXCEPTION_NOTIFICATIONS
+ // Attempt to deliver the first chance notification to the AD only *AFTER* the debugger
+ // has done that, provided we have not already done that.
+ if (!pExInfo->DeliveredFirstChanceNotification())
+ {
+ ExceptionNotifications::DeliverFirstChanceNotification();
+ }
+#endif // FEATURE_EXCEPTION_NOTIFICATIONS
+ }
+ IJitManager* pJitManager = pCf->GetJitManager();
+ _ASSERTE(pJitManager);
+ EH_CLAUSE_ENUMERATOR pEnumState;
+ unsigned EHCount = 0;
+
+#ifdef FEATURE_CORRUPTING_EXCEPTIONS
+ // If exception cannot be handled, then just bail out. We shouldnt examine the EH clauses
+ // in such a method.
+ if (!fMethodCanHandleException)
+ {
+ LOG((LF_EH, LL_INFO100, "COMPlusThrowCallback - CEHelper decided not to look for exception handlers in the method(MD:%p).\n", pFunc));
+
+ // Set the flag to skip this frame since the CE cannot be delivered
+ _ASSERTE(currentSeverity == ProcessCorrupting);
+
+ // Ensure EHClause count is zero
+ EHCount = 0;
+ }
+ else
+#endif // FEATURE_CORRUPTING_EXCEPTIONS
+ {
+ EHCount = pJitManager->InitializeEHEnumeration(pCf->GetMethodToken(), &pEnumState);
+ }
+
+ if (EHCount == 0)
+ {
+ // Inform the profiler that we're leaving, and what pass we're on
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionLeave(pFunc);
+ return SWA_CONTINUE;
+ }
+
+ TypeHandle thrownType = TypeHandle();
+ // if we are being called on an unwind for an exception that we did not try to catch, eg.
+ // an internal EE exception, then pThread->GetThrowable will be null
+ {
+ OBJECTREF throwable = pThread->GetThrowable();
+ if (throwable != NULL)
+ {
+ throwable = PossiblyUnwrapThrowable(throwable, pCf->GetAssembly());
+ thrownType = TypeHandle(throwable->GetTrueMethodTable());
+ }
+ }
+
+ PREGDISPLAY regs = pCf->GetRegisterSet();
+ BYTE *pStack = (BYTE *) GetRegdisplaySP(regs);
+#ifdef DEBUGGING_SUPPORTED
+ BYTE *pHandlerEBP = (BYTE *) GetRegdisplayFP(regs);
+#endif
+
+ DWORD offs = (DWORD)pCf->GetRelOffset(); //= (BYTE*) (*regs->pPC) - (BYTE*) pCf->GetStartAddress();
+ STRESS_LOG1(LF_EH, LL_INFO10000, "COMPlusThrowCallback: offset is %d\n", offs);
+
+ EE_ILEXCEPTION_CLAUSE EHClause;
+ unsigned start_adjust, end_adjust;
+
+ start_adjust = !(pCf->HasFaulted() || pCf->IsIPadjusted());
+ end_adjust = pCf->IsActiveFunc();
+
+ for(ULONG i=0; i < EHCount; i++)
+ {
+ pJitManager->GetNextEHClause(&pEnumState, &EHClause);
+ _ASSERTE(IsValidClause(&EHClause));
+
+ STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusThrowCallback: considering '%s' clause [%d,%d], ofs:%d\n",
+ (IsFault(&EHClause) ? "fault" : (
+ IsFinally(&EHClause) ? "finally" : (
+ IsFilterHandler(&EHClause) ? "filter" : (
+ IsTypedHandler(&EHClause) ? "typed" : "unknown")))),
+ EHClause.TryStartPC,
+ EHClause.TryEndPC,
+ offs
+ );
+
+ // Checking the exception range is a bit tricky because
+ // on CPU faults (null pointer access, div 0, ..., the IP points
+ // to the faulting instruction, but on calls, the IP points
+ // to the next instruction.
+ // This means that we should not include the start point on calls
+ // as this would be a call just preceding the try block.
+ // Also, we should include the end point on calls, but not faults.
+
+ // If we're in the FILTER part of a filter clause, then we
+ // want to stop crawling. It's going to be caught in a
+ // EX_CATCH just above us. If not, the exception
+ if ( IsFilterHandler(&EHClause)
+ && ( offs > EHClause.FilterOffset
+ || offs == EHClause.FilterOffset && !start_adjust)
+ && ( offs < EHClause.HandlerStartPC
+ || offs == EHClause.HandlerStartPC && !end_adjust)) {
+
+ STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusThrowCallback: Fault inside filter [%d,%d] startAdj %d endAdj %d\n",
+ EHClause.FilterOffset, EHClause.HandlerStartPC, start_adjust, end_adjust);
+
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionLeave(pFunc);
+ return SWA_ABORT;
+ }
+
+ if ( (offs < EHClause.TryStartPC) ||
+ (offs > EHClause.TryEndPC) ||
+ (offs == EHClause.TryStartPC && start_adjust) ||
+ (offs == EHClause.TryEndPC && end_adjust))
+ continue;
+
+ BOOL typeMatch = FALSE;
+ BOOL isTypedHandler = IsTypedHandler(&EHClause);
+
+ if (isTypedHandler && !thrownType.IsNull())
+ {
+ if (EHClause.TypeHandle == (void*)(size_t)mdTypeRefNil)
+ {
+ // this is a catch(...)
+ typeMatch = TRUE;
+ }
+ else
+ {
+ TypeHandle exnType = pJitManager->ResolveEHClause(&EHClause,pCf);
+
+ // if doesn't have cached class then class wasn't loaded so couldn't have been thrown
+ typeMatch = !exnType.IsNull() && ExceptionIsOfRightType(exnType, thrownType);
+ }
+ }
+
+ // <TODO>@PERF: Is this too expensive? Consider storing the nesting level
+ // instead of the HandlerEndPC.</TODO>
+
+ // Determine the nesting level of EHClause. Just walk the table
+ // again, and find out how many handlers enclose it
+ DWORD nestingLevel = 0;
+
+ if (IsFaultOrFinally(&EHClause))
+ continue;
+ if (isTypedHandler)
+ {
+ LOG((LF_EH, LL_INFO100, "COMPlusThrowCallback: %s match for typed handler.\n", typeMatch?"Found":"Did not find"));
+ if (!typeMatch)
+ {
+ continue;
+ }
+ }
+ else
+ {
+ // Must be an exception filter (__except() part of __try{}__except(){}).
+ nestingLevel = ComputeEnclosingHandlerNestingLevel(pJitManager,
+ pCf->GetMethodToken(),
+ EHClause.HandlerStartPC);
+
+ // We just need *any* address within the method. This will let the debugger
+ // resolve the EnC version of the method.
+ PCODE pMethodAddr = GetControlPC(regs);
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToDebuggerExceptionInterfaceWrapper::ExceptionFilter(pFunc, pMethodAddr, EHClause.FilterOffset, pHandlerEBP);
+
+ UINT_PTR uStartAddress = (UINT_PTR)pCf->GetCodeInfo()->GetStartAddress();
+
+ // save clause information in the exinfo
+ pExInfo->m_EHClauseInfo.SetInfo(COR_PRF_CLAUSE_FILTER,
+ uStartAddress + EHClause.FilterOffset,
+ StackFrame((UINT_PTR)pHandlerEBP));
+
+ // Let the profiler know we are entering a filter
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFilterEnter(pFunc);
+
+ COUNTER_ONLY(GetPerfCounters().m_Excep.cFiltersExecuted++);
+
+ STRESS_LOG3(LF_EH, LL_INFO10, "COMPlusThrowCallback: calling filter code, EHClausePtr:%08x, Start:%08x, End:%08x\n",
+ &EHClause, EHClause.HandlerStartPC, EHClause.HandlerEndPC);
+
+ OBJECTREF throwable = PossiblyUnwrapThrowable(pThread->GetThrowable(), pCf->GetAssembly());
+
+ pExInfo->m_EHClauseInfo.SetManagedCodeEntered(TRUE);
+
+ int iFilt = COMPlusThrowCallbackHelper(pJitManager,
+ pCf,
+ pData,
+ &EHClause,
+ nestingLevel,
+ throwable,
+ pThread);
+
+ pExInfo->m_EHClauseInfo.SetManagedCodeEntered(FALSE);
+
+ // Let the profiler know we are leaving a filter
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFilterLeave();
+
+ pExInfo->m_EHClauseInfo.ResetInfo();
+
+ if (pThread->IsRudeAbortInitiated() && !pThread->IsWithinCer(pCf))
+ {
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionLeave(pFunc);
+ return SWA_CONTINUE;
+ }
+
+ // If this filter didn't want the exception, keep looking.
+ if (EXCEPTION_EXECUTE_HANDLER != iFilt)
+ continue;
+ }
+
+ // Record this location, to stop the unwind phase, later.
+ pData->pFunc = pFunc;
+ pData->dHandler = i;
+ pData->pStack = pStack;
+
+ // Notify the profiler that a catcher has been found
+ if (fGiveDebuggerAndProfilerNotification)
+ {
+ EEToProfilerExceptionInterfaceWrapper::ExceptionSearchCatcherFound(pFunc);
+ EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionLeave(pFunc);
+ }
+
+#ifdef DEBUGGING_SUPPORTED
+ //
+ // Notify debugger that a catcher has been found.
+ //
+ if (fIsILStub)
+ {
+ EEToDebuggerExceptionInterfaceWrapper::NotifyOfCHFFilter(pExInfo->m_pExceptionPointers, pILStubFrame);
+ }
+ else
+ if (fGiveDebuggerAndProfilerNotification &&
+ CORDebuggerAttached() && !pExInfo->m_ExceptionFlags.DebuggerInterceptInfo())
+ {
+ _ASSERTE(pData);
+ // We just need *any* address within the method. This will let the debugger
+ // resolve the EnC version of the method.
+ PCODE pMethodAddr = GetControlPC(regs);
+
+ EEToDebuggerExceptionInterfaceWrapper::FirstChanceManagedExceptionCatcherFound(pThread,
+ pData->pFunc, pMethodAddr,
+ (SIZE_T)pData->pStack,
+ &EHClause);
+ }
+#endif // DEBUGGING_SUPPORTED
+
+ return SWA_ABORT;
+ }
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionLeave(pFunc);
+ return SWA_CONTINUE;
+} // StackWalkAction COMPlusThrowCallback()
+
+
+//==========================================================================
+// COMPlusUnwindCallback
+//==========================================================================
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning (disable : 4740) // There is inline asm code in this function, which disables
+ // global optimizations.
+#pragma warning (disable : 4731)
+#endif
+StackWalkAction COMPlusUnwindCallback (CrawlFrame *pCf, ThrowCallbackType *pData)
+{
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+
+ _ASSERTE(pData->bIsUnwind);
+
+ Frame *pFrame = pCf->GetFrame();
+ MethodDesc *pFunc = pCf->GetFunction();
+
+ #if defined(_DEBUG)
+ #define METHODNAME(pFunc) (pFunc?pFunc->m_pszDebugMethodName:"<n/a>")
+ #else
+ #define METHODNAME(pFunc) "<n/a>"
+ #endif
+ STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusUnwindCallback: STACKCRAWL method:%pM ('%s'), Frame:%p, FrameVtable = %pV\n",
+ pFunc, METHODNAME(pFunc), pFrame, pCf->IsFrameless()?0:(*(void**)pFrame));
+ #undef METHODNAME
+
+ if (pFrame && pData->pTopFrame == pFrame)
+ /* Don't look past limiting frame if there is one */
+ return SWA_ABORT;
+
+ if (!pFunc)
+ return SWA_CONTINUE;
+
+ if (!pCf->IsFrameless())
+ return SWA_CONTINUE;
+
+ Thread *pThread = GetThread();
+
+ // If the thread is being RudeAbort, we will not run any finally
+ if (pThread->IsRudeAbortInitiated() && !pThread->IsWithinCer(pCf))
+ {
+ return SWA_CONTINUE;
+ }
+
+ IJitManager* pJitManager = pCf->GetJitManager();
+ _ASSERTE(pJitManager);
+
+ ExInfo *pExInfo = &(pThread->GetExceptionState()->m_currentExInfo);
+
+ PREGDISPLAY regs = pCf->GetRegisterSet();
+ BYTE *pStack = (BYTE *) GetRegdisplaySP(regs);
+
+ TypeHandle thrownType = TypeHandle();
+
+ BOOL fCanMethodHandleException = TRUE;
+#ifdef FEATURE_CORRUPTING_EXCEPTIONS
+ // MethodDesc's security information (i.e. whether it is critical or transparent) is calculated lazily.
+ // If this method's security information was not precalculated, then it would have been in the first pass
+ // already using Security::IsMethodCritical which could take have taken us down a path which is GC_TRIGGERS.
+ //
+ //
+ // However, this unwind callback (for X86) is GC_NOTRIGGER and at this point the security information would have been
+ // calculated already. Hence, we wouldnt endup in the GC_TRIGGERS path. Thus, to keep SCAN.EXE (static contract analyzer) happy,
+ // we will pass a FALSE to the CanMethodHandleException call, indicating we dont need to calculate security information (and thus,
+ // not go down the GC_TRIGGERS path.
+ //
+ // Check if the exception can be delivered to the method? It will check if the exception
+ // is a CE or not. If it is, it will check if the method can process it or not.
+ CorruptionSeverity currentSeverity = pThread->GetExceptionState()->GetCurrentExceptionTracker()->GetCorruptionSeverity();
+
+ // We have to do this check for x86 since, unlike 64bit which will setup a new exception tracker for longjmp,
+ // x86 only sets up new trackers in the first pass (and longjmp is 2nd pass only exception). Hence, we pass
+ // this information in the callback structure without affecting any existing exception tracker (incase longjmp was
+ // a nested exception).
+ if (pData->m_fIsLongJump)
+ {
+ // Longjump is not a CSE. With a CSE in progress, this can be invoked by either:
+ //
+ // 1) Managed code (e.g. finally/fault/catch), OR
+ // 2) By native code
+ //
+ // In scenario (1), managed code can invoke it only if it was attributed with HPCSE attribute. Thus,
+ // longjmp is no different than managed code doing a "throw new Exception();".
+ //
+ // In scenario (2), longjmp is no different than any other non-CSE native exception raised.
+ //
+ // In both these case, longjmp should be treated as non-CSE. Since x86 does not setup a tracker for
+ // it (see comment above), we pass this information (of whether the current exception is a longjmp or not)
+ // to this callback (from UnwindFrames) to setup the correct corruption severity.
+ //
+ // http://www.nynaeve.net/?p=105 has a brief description of how exception-safe setjmp/longjmp works.
+ currentSeverity = NotCorrupting;
+ }
+ {
+ MethodDesc * pFuncWithCEAttribute = pFunc;
+ Frame * pILStubFrame = NULL;
+ if (pFunc->IsILStub())
+ {
+ // We must defer to the MethodDesc of the user method instead of the IL stub
+ // itself because the user can specify the policy on a per-method basis and
+ // that won't be reflected via the IL stub's MethodDesc.
+ pFuncWithCEAttribute = GetUserMethodForILStub(pThread, (UINT_PTR)pStack, pFunc, &pILStubFrame);
+ }
+ fCanMethodHandleException = CEHelper::CanMethodHandleException(currentSeverity, pFuncWithCEAttribute, FALSE);
+ }
+#endif // FEATURE_CORRUPTING_EXCEPTIONS
+
+#ifdef DEBUGGING_SUPPORTED
+ LOG((LF_EH, LL_INFO1000, "COMPlusUnwindCallback: Intercept %d, pData->pFunc 0x%X, pFunc 0x%X, pData->pStack 0x%X, pStack 0x%X\n",
+ pExInfo->m_ExceptionFlags.DebuggerInterceptInfo(),
+ pData->pFunc,
+ pFunc,
+ pData->pStack,
+ pStack));
+
+ //
+ // If the debugger wants to intercept this exception here, go do that.
+ //
+ if (pExInfo->m_ExceptionFlags.DebuggerInterceptInfo() && (pData->pFunc == pFunc) && (pData->pStack == pStack))
+ {
+ goto LDoDebuggerIntercept;
+ }
+#endif
+
+ bool fGiveDebuggerAndProfilerNotification;
+ fGiveDebuggerAndProfilerNotification = !pFunc->IsILStub();
+
+ // Notify the profiler of the function we're dealing with in the unwind phase
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFunctionEnter(pFunc);
+
+ EH_CLAUSE_ENUMERATOR pEnumState;
+ unsigned EHCount;
+
+#ifdef FEATURE_CORRUPTING_EXCEPTIONS
+ if (!fCanMethodHandleException)
+ {
+ LOG((LF_EH, LL_INFO100, "COMPlusUnwindCallback - CEHelper decided not to look for exception handlers in the method(MD:%p).\n", pFunc));
+
+ // Set the flag to skip this frame since the CE cannot be delivered
+ _ASSERTE(currentSeverity == ProcessCorrupting);
+
+ // Force EHClause count to be zero
+ EHCount = 0;
+ }
+ else
+#endif // FEATURE_CORRUPTING_EXCEPTIONS
+ {
+ EHCount = pJitManager->InitializeEHEnumeration(pCf->GetMethodToken(), &pEnumState);
+ }
+
+ if (EHCount == 0)
+ {
+ // Inform the profiler that we're leaving, and what pass we're on
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFunctionLeave(pFunc);
+
+ return SWA_CONTINUE;
+ }
+
+ // if we are being called on an unwind for an exception that we did not try to catch, eg.
+ // an internal EE exception, then pThread->GetThrowable will be null
+ {
+ OBJECTREF throwable = pThread->GetThrowable();
+ if (throwable != NULL)
+ {
+ throwable = PossiblyUnwrapThrowable(throwable, pCf->GetAssembly());
+ thrownType = TypeHandle(throwable->GetTrueMethodTable());
+ }
+ }
+#ifdef DEBUGGING_SUPPORTED
+ BYTE *pHandlerEBP;
+ pHandlerEBP = (BYTE *) GetRegdisplayFP(regs);
+#endif
+
+ DWORD offs;
+ offs = (DWORD)pCf->GetRelOffset(); //= (BYTE*) (*regs->pPC) - (BYTE*) pCf->GetStartAddress();
+
+ LOG((LF_EH, LL_INFO100, "COMPlusUnwindCallback: current EIP offset in method 0x%x, \n", offs));
+
+ EE_ILEXCEPTION_CLAUSE EHClause;
+ unsigned start_adjust, end_adjust;
+
+ start_adjust = !(pCf->HasFaulted() || pCf->IsIPadjusted());
+ end_adjust = pCf->IsActiveFunc();
+
+ for(ULONG i=0; i < EHCount; i++)
+ {
+ pJitManager->GetNextEHClause(&pEnumState, &EHClause);
+ _ASSERTE(IsValidClause(&EHClause));
+
+ STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusUnwindCallback: considering '%s' clause [%d,%d], offs:%d\n",
+ (IsFault(&EHClause) ? "fault" : (
+ IsFinally(&EHClause) ? "finally" : (
+ IsFilterHandler(&EHClause) ? "filter" : (
+ IsTypedHandler(&EHClause) ? "typed" : "unknown")))),
+ EHClause.TryStartPC,
+ EHClause.TryEndPC,
+ offs
+ );
+
+ // Checking the exception range is a bit tricky because
+ // on CPU faults (null pointer access, div 0, ..., the IP points
+ // to the faulting instruction, but on calls, the IP points
+ // to the next instruction.
+ // This means that we should not include the start point on calls
+ // as this would be a call just preceding the try block.
+ // Also, we should include the end point on calls, but not faults.
+
+ if ( IsFilterHandler(&EHClause)
+ && ( offs > EHClause.FilterOffset
+ || offs == EHClause.FilterOffset && !start_adjust)
+ && ( offs < EHClause.HandlerStartPC
+ || offs == EHClause.HandlerStartPC && !end_adjust)
+ ) {
+ STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusUnwindCallback: Fault inside filter [%d,%d] startAdj %d endAdj %d\n",
+ EHClause.FilterOffset, EHClause.HandlerStartPC, start_adjust, end_adjust);
+
+ // Make the filter as done. See comment in CallJitEHFilter
+ // on why we have to do it here.
+ Frame* pFilterFrame = pThread->GetFrame();
+ _ASSERTE(pFilterFrame->GetVTablePtr() == ExceptionFilterFrame::GetMethodFrameVPtr());
+ ((ExceptionFilterFrame*)pFilterFrame)->SetFilterDone();
+
+ // Inform the profiler that we're leaving, and what pass we're on
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFunctionLeave(pFunc);
+
+ return SWA_ABORT;
+ }
+
+ if ( (offs < EHClause.TryStartPC) ||
+ (offs > EHClause.TryEndPC) ||
+ (offs == EHClause.TryStartPC && start_adjust) ||
+ (offs == EHClause.TryEndPC && end_adjust))
+ continue;
+
+ // <TODO>@PERF : Is this too expensive? Consider storing the nesting level
+ // instead of the HandlerEndPC.</TODO>
+
+ // Determine the nesting level of EHClause. Just walk the table
+ // again, and find out how many handlers enclose it
+
+ DWORD nestingLevel = ComputeEnclosingHandlerNestingLevel(pJitManager,
+ pCf->GetMethodToken(),
+ EHClause.HandlerStartPC);
+
+ // We just need *any* address within the method. This will let the debugger
+ // resolve the EnC version of the method.
+ PCODE pMethodAddr = GetControlPC(regs);
+
+ UINT_PTR uStartAddress = (UINT_PTR)pCf->GetCodeInfo()->GetStartAddress();
+
+ if (IsFaultOrFinally(&EHClause))
+ {
+ COUNTER_ONLY(GetPerfCounters().m_Excep.cFinallysExecuted++);
+
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToDebuggerExceptionInterfaceWrapper::ExceptionHandle(pFunc, pMethodAddr, EHClause.HandlerStartPC, pHandlerEBP);
+
+ pExInfo->m_EHClauseInfo.SetInfo(COR_PRF_CLAUSE_FINALLY,
+ uStartAddress + EHClause.HandlerStartPC,
+ StackFrame((UINT_PTR)pHandlerEBP));
+
+ // Notify the profiler that we are about to execute the finally code
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFinallyEnter(pFunc);
+
+ LOG((LF_EH, LL_INFO100, "COMPlusUnwindCallback: finally clause [%d,%d] - call\n", EHClause.TryStartPC, EHClause.TryEndPC));
+
+ pExInfo->m_EHClauseInfo.SetManagedCodeEntered(TRUE);
+
+ ::CallJitEHFinally(pCf, (BYTE *)uStartAddress, &EHClause, nestingLevel);
+
+ pExInfo->m_EHClauseInfo.SetManagedCodeEntered(FALSE);
+
+ LOG((LF_EH, LL_INFO100, "COMPlusUnwindCallback: finally - returned\n"));
+
+ // Notify the profiler that we are done with the finally code
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFinallyLeave();
+
+ pExInfo->m_EHClauseInfo.ResetInfo();
+
+ continue;
+ }
+
+ // Current is not a finally, check if it's the catching handler (or filter).
+ if (pData->pFunc != pFunc || (ULONG)(pData->dHandler) != i || pData->pStack != pStack)
+ {
+ continue;
+ }
+
+#ifdef _DEBUG
+ gLastResumedExceptionFunc = pCf->GetFunction();
+ gLastResumedExceptionHandler = i;
+#endif
+
+ // save clause information in the exinfo
+ pExInfo->m_EHClauseInfo.SetInfo(COR_PRF_CLAUSE_CATCH,
+ uStartAddress + EHClause.HandlerStartPC,
+ StackFrame((UINT_PTR)pHandlerEBP));
+
+ // Notify the profiler that we are about to resume at the catcher.
+ if (fGiveDebuggerAndProfilerNotification)
+ {
+ DACNotify::DoExceptionCatcherEnterNotification(pFunc, EHClause.HandlerStartPC);
+
+ EEToProfilerExceptionInterfaceWrapper::ExceptionCatcherEnter(pThread, pFunc);
+
+ EEToDebuggerExceptionInterfaceWrapper::ExceptionHandle(pFunc, pMethodAddr, EHClause.HandlerStartPC, pHandlerEBP);
+ }
+
+ STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusUnwindCallback: offset 0x%x matches clause [0x%x, 0x%x) matches in method %pM\n",
+ offs, EHClause.TryStartPC, EHClause.TryEndPC, pFunc);
+
+ // ResumeAtJitEH will set pExInfo->m_EHClauseInfo.m_fManagedCodeEntered = TRUE; at the appropriate time
+ ::ResumeAtJitEH(pCf, (BYTE *)uStartAddress, &EHClause, nestingLevel, pThread, pData->bUnwindStack);
+ //UNREACHABLE_MSG("ResumeAtJitEH shouldn't have returned!");
+
+ // we do not set pExInfo->m_EHClauseInfo.m_fManagedCodeEntered = FALSE here,
+ // that happens when the catch clause calls back to COMPlusEndCatch
+
+ }
+
+ STRESS_LOG1(LF_EH, LL_INFO100, "COMPlusUnwindCallback: no handler found in method %pM\n", pFunc);
+ if (fGiveDebuggerAndProfilerNotification)
+ EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFunctionLeave(pFunc);
+
+ return SWA_CONTINUE;
+
+
+#ifdef DEBUGGING_SUPPORTED
+LDoDebuggerIntercept:
+
+ STRESS_LOG1(LF_EH|LF_CORDB, LL_INFO100, "COMPlusUnwindCallback: Intercepting in method %pM\n", pFunc);
+
+ //
+ // Setup up the easy parts of the context to restart at.
+ //
+ EHContext context;
+
+ //
+ // Note: EAX ECX EDX are scratch
+ //
+ context.Esp = (DWORD)(size_t)(GetRegdisplaySP(regs));
+ context.Ebx = *regs->pEbx;
+ context.Esi = *regs->pEsi;
+ context.Edi = *regs->pEdi;
+ context.Ebp = *regs->pEbp;
+
+ //
+ // Set scratch registers to 0 to avoid reporting incorrect values to GC in case of debugger changing the IP
+ // in the middle of a scratch register lifetime (see Dev10 754922)
+ //
+ context.Eax = 0;
+ context.Ecx = 0;
+ context.Edx = 0;
+
+ //
+ // Ok, now set the target Eip to the address the debugger requested.
+ //
+ ULONG_PTR nativeOffset;
+ pExInfo->m_DebuggerExState.GetDebuggerInterceptInfo(NULL, NULL, NULL, NULL, &nativeOffset, NULL);
+ context.Eip = GetControlPC(regs) - (pCf->GetRelOffset() - nativeOffset);
+
+ //
+ // Finally we need to get the correct Esp for this nested level
+ //
+
+ context.Esp = pCf->GetCodeManager()->GetAmbientSP(regs,
+ pCf->GetCodeInfo(),
+ nativeOffset,
+ pData->dHandler,
+ pCf->GetCodeManState()
+ );
+ //
+ // In case we see unknown FS:[0] handlers we delay the interception point until we reach the handler that protects the interception point.
+ // This way we have both FS:[0] handlers being poped up by RtlUnwind and managed capital F Frames being unwinded by managed stackwalker.
+ //
+ BOOL fCheckForUnknownHandler = TRUE;
+ if (PopNestedExceptionRecords((LPVOID)(size_t)context.Esp, fCheckForUnknownHandler))
+ {
+ // Let ClrDebuggerDoUnwindAndIntercept RtlUnwind continue to unwind frames until we reach the handler protected by COMPlusNestedExceptionHandler.
+ pExInfo->m_InterceptionContext = context;
+ pExInfo->m_ValidInterceptionContext = TRUE;
+ STRESS_LOG0(LF_EH|LF_CORDB, LL_INFO100, "COMPlusUnwindCallback: Skip interception until unwinding reaches the actual handler protected by COMPlusNestedExceptionHandler\n");
+ }
+ else
+ {
+ //
+ // Pop off all the Exception information up to this point in the stack
+ //
+ UnwindExceptionTrackerAndResumeInInterceptionFrame(pExInfo, &context);
+ }
+ return SWA_ABORT;
+#endif // DEBUGGING_SUPPORTED
+} // StackWalkAction COMPlusUnwindCallback ()
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning (disable : 4740) // There is inline asm code in this function, which disables
+ // global optimizations.
+#pragma warning (disable : 4731)
+#endif
+void ResumeAtJitEH(CrawlFrame* pCf,
+ BYTE* startPC,
+ EE_ILEXCEPTION_CLAUSE *EHClausePtr,
+ DWORD nestingLevel,
+ Thread *pThread,
+ BOOL unwindStack)
+{
+ // No dynamic contract here because this function doesn't return and destructors wouldn't be executed
+ WRAPPER_NO_CONTRACT;
+
+ EHContext context;
+
+ context.Setup(PCODE(startPC + EHClausePtr->HandlerStartPC), pCf->GetRegisterSet());
+
+ size_t * pShadowSP = NULL; // Write Esp to *pShadowSP before jumping to handler
+ size_t * pHandlerEnd = NULL;
+
+ OBJECTREF throwable = PossiblyUnwrapThrowable(pThread->GetThrowable(), pCf->GetAssembly());
+
+ pCf->GetCodeManager()->FixContext(ICodeManager::CATCH_CONTEXT,
+ &context,
+ pCf->GetCodeInfo(),
+ EHClausePtr->HandlerStartPC,
+ nestingLevel,
+ throwable,
+ pCf->GetCodeManState(),
+ &pShadowSP,
+ &pHandlerEnd);
+
+ if (pHandlerEnd)
+ {
+ *pHandlerEnd = EHClausePtr->HandlerEndPC;
+ }
+
+ // save esp so that endcatch can restore it (it always restores, so want correct value)
+ ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo);
+ pExInfo->m_dEsp = (LPVOID)context.GetSP();
+ LOG((LF_EH, LL_INFO1000, "ResumeAtJitEH: current m_dEsp set to %p\n", context.GetSP()));
+
+ PVOID dEsp = GetCurrentSP();
+
+ if (!unwindStack)
+ {
+ // If we don't want to unwind the stack, then the guard page had better not be gone!
+ _ASSERTE(pThread->DetermineIfGuardPagePresent());
+
+ // so down below won't really update esp
+ context.SetSP(dEsp);
+ pExInfo->m_pShadowSP = pShadowSP; // so that endcatch can zero it back
+
+ if (pShadowSP)
+ {
+ *pShadowSP = (size_t)dEsp;
+ }
+ }
+ else
+ {
+ // so shadow SP has the real SP as we are going to unwind the stack
+ dEsp = (LPVOID)context.GetSP();
+
+ // BEGIN: pExInfo->UnwindExInfo(dEsp);
+ ExInfo *pPrevNestedInfo = pExInfo->m_pPrevNestedInfo;
+
+ while (pPrevNestedInfo && pPrevNestedInfo->m_StackAddress < dEsp)
+ {
+ LOG((LF_EH, LL_INFO1000, "ResumeAtJitEH: popping nested ExInfo at 0x%p\n", pPrevNestedInfo->m_StackAddress));
+
+ pPrevNestedInfo->DestroyExceptionHandle();
+ pPrevNestedInfo->m_StackTraceInfo.FreeStackTrace();
+
+#ifdef DEBUGGING_SUPPORTED
+ if (g_pDebugInterface != NULL)
+ {
+ g_pDebugInterface->DeleteInterceptContext(pPrevNestedInfo->m_DebuggerExState.GetDebuggerInterceptContext());
+ }
+#endif // DEBUGGING_SUPPORTED
+
+ pPrevNestedInfo = pPrevNestedInfo->m_pPrevNestedInfo;
+ }
+
+ pExInfo->m_pPrevNestedInfo = pPrevNestedInfo;
+
+ _ASSERTE(pExInfo->m_pPrevNestedInfo == 0 || pExInfo->m_pPrevNestedInfo->m_StackAddress >= dEsp);
+
+ // Before we unwind the SEH records, get the Frame from the top-most nested exception record.
+ Frame* pNestedFrame = GetCurrFrame(FindNestedEstablisherFrame(GetCurrentSEHRecord()));
+
+ PopNestedExceptionRecords((LPVOID)(size_t)dEsp);
+
+ EXCEPTION_REGISTRATION_RECORD* pNewBottomMostHandler = GetCurrentSEHRecord();
+
+ pExInfo->m_pShadowSP = pShadowSP;
+
+ // The context and exception record are no longer any good.
+ _ASSERTE(pExInfo->m_pContext < dEsp); // It must be off the top of the stack.
+ pExInfo->m_pContext = 0; // Whack it.
+ pExInfo->m_pExceptionRecord = 0;
+ pExInfo->m_pExceptionPointers = 0;
+
+ // We're going to put one nested record back on the stack before we resume. This is
+ // where it goes.
+ NestedHandlerExRecord *pNestedHandlerExRecord = (NestedHandlerExRecord*)((BYTE*)dEsp - ALIGN_UP(sizeof(NestedHandlerExRecord), STACK_ALIGN_SIZE));
+
+ // The point of no return. The next statement starts scribbling on the stack. It's
+ // deep enough that we won't hit our own locals. (That's important, 'cuz we're still
+ // using them.)
+ //
+ _ASSERTE(dEsp > &pCf);
+ pNestedHandlerExRecord->m_handlerInfo.m_hThrowable=NULL; // This is random memory. Handle
+ // must be initialized to null before
+ // calling Init(), as Init() will try
+ // to free any old handle.
+ pNestedHandlerExRecord->Init((PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler, pNestedFrame);
+
+ INSTALL_EXCEPTION_HANDLING_RECORD(&(pNestedHandlerExRecord->m_ExReg));
+
+ context.SetSP(pNestedHandlerExRecord);
+
+ // We might have moved the bottommost handler. The nested record itself is never
+ // the bottom most handler -- it's pushed afte the fact. So we have to make the
+ // bottom-most handler the one BEFORE the nested record.
+ if (pExInfo->m_pBottomMostHandler < pNewBottomMostHandler)
+ {
+ STRESS_LOG3(LF_EH, LL_INFO10000, "ResumeAtJitEH: setting ExInfo:0x%p m_pBottomMostHandler from 0x%p to 0x%p\n",
+ pExInfo, pExInfo->m_pBottomMostHandler, pNewBottomMostHandler);
+ pExInfo->m_pBottomMostHandler = pNewBottomMostHandler;
+ }
+
+ if (pShadowSP)
+ {
+ *pShadowSP = context.GetSP();
+ }
+ }
+
+ STRESS_LOG3(LF_EH, LL_INFO100, "ResumeAtJitEH: resuming at EIP = %p ESP = %p EBP = %p\n",
+ context.Eip, context.GetSP(), context.GetFP());
+
+#ifdef STACK_GUARDS_DEBUG
+ // We are transitioning back to managed code, so ensure that we are in
+ // SO-tolerant mode before we do so.
+ RestoreSOToleranceState();
+#endif
+
+ // we want this to happen as late as possible but certainly after the notification
+ // that the handle for the current ExInfo has been freed has been delivered
+ pExInfo->m_EHClauseInfo.SetManagedCodeEntered(TRUE);
+
+ ETW::ExceptionLog::ExceptionCatchBegin(pCf->GetCodeInfo()->GetMethodDesc(), (PVOID)pCf->GetCodeInfo()->GetStartAddress());
+
+ ResumeAtJitEHHelper(&context);
+ UNREACHABLE_MSG("Should never return from ResumeAtJitEHHelper!");
+
+ // we do not set pExInfo->m_EHClauseInfo.m_fManagedCodeEntered = FALSE here,
+ // that happens when the catch clause calls back to COMPlusEndCatch
+ // we don't return to this point so it would be moot (see unreachable_msg above)
+
+}
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+// Must be in a separate function because INSTALL_COMPLUS_EXCEPTION_HANDLER has a filter
+int CallJitEHFilterWorker(size_t *pShadowSP, EHContext *pContext)
+{
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+ STATIC_CONTRACT_SO_INTOLERANT;
+
+ int retVal = EXCEPTION_CONTINUE_SEARCH;
+
+ BEGIN_CALL_TO_MANAGED();
+
+ retVal = CallJitEHFilterHelper(pShadowSP, pContext);
+
+ END_CALL_TO_MANAGED();
+
+ return retVal;
+}
+
+int CallJitEHFilter(CrawlFrame* pCf, BYTE* startPC, EE_ILEXCEPTION_CLAUSE *EHClausePtr, DWORD nestingLevel, OBJECTREF thrownObj)
+{
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+
+ int retVal = EXCEPTION_CONTINUE_SEARCH;
+ size_t * pShadowSP = NULL;
+ EHContext context;
+
+ context.Setup(PCODE(startPC + EHClausePtr->FilterOffset), pCf->GetRegisterSet());
+
+ size_t * pEndFilter = NULL; // Write
+ pCf->GetCodeManager()->FixContext(ICodeManager::FILTER_CONTEXT, &context, pCf->GetCodeInfo(),
+ EHClausePtr->FilterOffset, nestingLevel, thrownObj, pCf->GetCodeManState(),
+ &pShadowSP, &pEndFilter);
+
+ // End of the filter is the same as start of handler
+ if (pEndFilter)
+ {
+ *pEndFilter = EHClausePtr->HandlerStartPC;
+ }
+
+ // ExceptionFilterFrame serves two purposes:
+ //
+ // 1. It serves as a frame that stops the managed search for handler
+ // if we fault in the filter. ThrowCallbackType.pTopFrame is going point
+ // to this frame during search for exception handler inside filter.
+ // The search for handler needs a frame to stop. If we had no frame here,
+ // the exceptions in filters would not be swallowed correctly since we would
+ // walk past the EX_TRY/EX_CATCH block in COMPlusThrowCallbackHelper.
+ //
+ // 2. It allows setting of SHADOW_SP_FILTER_DONE flag in UnwindFrames()
+ // if we fault in the filter. We have to set this flag together with unwinding
+ // of the filter frame. Using a regular C++ holder to clear this flag here would cause
+ // GC holes. The stack would be in inconsistent state when we trigger gc just before
+ // returning from UnwindFrames.
+
+ FrameWithCookie<ExceptionFilterFrame> exceptionFilterFrame(pShadowSP);
+
+ ETW::ExceptionLog::ExceptionFilterBegin(pCf->GetCodeInfo()->GetMethodDesc(), (PVOID)pCf->GetCodeInfo()->GetStartAddress());
+
+ retVal = CallJitEHFilterWorker(pShadowSP, &context);
+
+ ETW::ExceptionLog::ExceptionFilterEnd();
+
+ exceptionFilterFrame.Pop();
+
+ return retVal;
+}
+
+void CallJitEHFinally(CrawlFrame* pCf, BYTE* startPC, EE_ILEXCEPTION_CLAUSE *EHClausePtr, DWORD nestingLevel)
+{
+ WRAPPER_NO_CONTRACT;
+
+ EHContext context;
+ context.Setup(PCODE(startPC + EHClausePtr->HandlerStartPC), pCf->GetRegisterSet());
+
+ size_t * pShadowSP = NULL; // Write Esp to *pShadowSP before jumping to handler
+
+ size_t * pFinallyEnd = NULL;
+ pCf->GetCodeManager()->FixContext(
+ ICodeManager::FINALLY_CONTEXT, &context, pCf->GetCodeInfo(),
+ EHClausePtr->HandlerStartPC, nestingLevel, ObjectToOBJECTREF((Object *) NULL), pCf->GetCodeManState(),
+ &pShadowSP, &pFinallyEnd);
+
+ if (pFinallyEnd)
+ {
+ *pFinallyEnd = EHClausePtr->HandlerEndPC;
+ }
+
+ ETW::ExceptionLog::ExceptionFinallyBegin(pCf->GetCodeInfo()->GetMethodDesc(), (PVOID)pCf->GetCodeInfo()->GetStartAddress());
+
+ CallJitEHFinallyHelper(pShadowSP, &context);
+
+ ETW::ExceptionLog::ExceptionFinallyEnd();
+
+ //
+ // Update the registers using new context
+ //
+ // This is necessary to reflect GC pointer changes during the middle of a unwind inside a
+ // finally clause, because:
+ // 1. GC won't see the part of stack inside try (which has thrown an exception) that is already
+ // unwinded and thus GC won't update GC pointers for this portion of the stack, but rather the
+ // call stack in finally.
+ // 2. upon return of finally, the unwind process continues and unwinds stack based on the part
+ // of stack inside try and won't see the updated values in finally.
+ // As a result, we need to manually update the context using register values upon return of finally
+ //
+ // Note that we only update the registers for finally clause because
+ // 1. For filter handlers, stack walker is able to see the whole stack (including the try part)
+ // with the help of ExceptionFilterFrame as filter handlers are called in first pass
+ // 2. For catch handlers, the current unwinding is already finished
+ //
+ context.UpdateFrame(pCf->GetRegisterSet());
+
+ // This does not need to be guarded by a holder because the frame is dead if an exception gets thrown. Filters are different
+ // since they are run in the first pass, so we must update the shadowSP reset in CallJitEHFilter.
+ if (pShadowSP) {
+ *pShadowSP = 0; // reset the shadowSP to 0
+ }
+}
+#if defined(_MSC_VER)
+#pragma warning (default : 4731)
+#endif
+
+//=====================================================================
+// *********************************************************************
+BOOL ComPlusFrameSEH(EXCEPTION_REGISTRATION_RECORD* pEHR)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ return ((LPVOID)pEHR->Handler == (LPVOID)COMPlusFrameHandler || (LPVOID)pEHR->Handler == (LPVOID)COMPlusNestedExceptionHandler);
+}
+
+
+//
+//-------------------------------------------------------------------------
+// This is installed when we call COMPlusFrameHandler to provide a bound to
+// determine when are within a nested exception
+//-------------------------------------------------------------------------
+EXCEPTION_HANDLER_IMPL(COMPlusNestedExceptionHandler)
+{
+ WRAPPER_NO_CONTRACT;
+
+ if (pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND))
+ {
+ LOG((LF_EH, LL_INFO100, " COMPlusNestedHandler(unwind) with %x at %x\n", pExceptionRecord->ExceptionCode,
+ pContext ? GetIP(pContext) : 0));
+
+
+ // We're unwinding past a nested exception record, which means that we've thrown
+ // a new exception out of a region in which we're handling a previous one. The
+ // previous exception is overridden -- and needs to be unwound.
+
+ // The preceding is ALMOST true. There is one more case, where we use setjmp/longjmp
+ // from withing a nested handler. We won't have a nested exception in that case -- just
+ // the unwind.
+
+ Thread* pThread = GetThread();
+ _ASSERTE(pThread);
+ ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo);
+ ExInfo* pPrevNestedInfo = pExInfo->m_pPrevNestedInfo;
+
+ if (pPrevNestedInfo == &((NestedHandlerExRecord*)pEstablisherFrame)->m_handlerInfo)
+ {
+ _ASSERTE(pPrevNestedInfo);
+
+ LOG((LF_EH, LL_INFO100, "COMPlusNestedExceptionHandler: PopExInfo(): popping nested ExInfo at 0x%p\n", pPrevNestedInfo));
+
+ pPrevNestedInfo->DestroyExceptionHandle();
+ pPrevNestedInfo->m_StackTraceInfo.FreeStackTrace();
+
+#ifdef DEBUGGING_SUPPORTED
+ if (g_pDebugInterface != NULL)
+ {
+ g_pDebugInterface->DeleteInterceptContext(pPrevNestedInfo->m_DebuggerExState.GetDebuggerInterceptContext());
+ }
+#endif // DEBUGGING_SUPPORTED
+
+ pExInfo->m_pPrevNestedInfo = pPrevNestedInfo->m_pPrevNestedInfo;
+
+ } else {
+ // The whacky setjmp/longjmp case. Nothing to do.
+ }
+
+ } else {
+ LOG((LF_EH, LL_INFO100, " InCOMPlusNestedHandler with %x at %x\n", pExceptionRecord->ExceptionCode,
+ pContext ? GetIP(pContext) : 0));
+ }
+
+
+ // There is a nasty "gotcha" in the way exception unwinding, finally's, and nested exceptions
+ // interact. Here's the scenario ... it involves two exceptions, one normal one, and one
+ // raised in a finally.
+ //
+ // The first exception occurs, and is caught by some handler way up the stack. That handler
+ // calls RtlUnwind -- and handlers that didn't catch this first exception are called again, with
+ // the UNWIND flag set. If, one of the handlers throws an exception during
+ // unwind (like, a throw from a finally) -- then that same handler is not called during
+ // the unwind pass of the second exception. [ASIDE: It is called on first-pass.]
+ //
+ // What that means is -- the COMPlusExceptionHandler, can't count on unwinding itself correctly
+ // if an exception is thrown from a finally. Instead, it relies on the NestedExceptionHandler
+ // that it pushes for this.
+ //
+
+ EXCEPTION_DISPOSITION retval = EXCEPTION_HANDLER_FWD(COMPlusFrameHandler);
+ LOG((LF_EH, LL_INFO100, "Leaving COMPlusNestedExceptionHandler with %d\n", retval));
+ return retval;
+}
+
+EXCEPTION_REGISTRATION_RECORD *FindNestedEstablisherFrame(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ while (pEstablisherFrame->Handler != (PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler) {
+ pEstablisherFrame = pEstablisherFrame->Next;
+ _ASSERTE(pEstablisherFrame != EXCEPTION_CHAIN_END); // should always find one
+ }
+ return pEstablisherFrame;
+}
+
+EXCEPTION_HANDLER_IMPL(FastNExportExceptHandler)
+{
+ WRAPPER_NO_CONTRACT;
+
+ // Most of our logic is in commin with COMPlusFrameHandler.
+ EXCEPTION_DISPOSITION retval = EXCEPTION_HANDLER_FWD(COMPlusFrameHandler);
+
+#ifdef _DEBUG
+ // If the exception is escaping the last CLR personality routine on the stack,
+ // then state a flag on the thread to indicate so.
+ if (retval == ExceptionContinueSearch)
+ {
+ SetReversePInvokeEscapingUnhandledExceptionStatus(IS_UNWINDING(pExceptionRecord->ExceptionFlags), pEstablisherFrame);
+ }
+#endif // _DEBUG
+
+ return retval;
+}
+
+
+// Just like a regular NExport handler -- except it pops an extra frame on unwind. A handler
+// like this is needed by the COMMethodStubProlog code. It first pushes a frame -- and then
+// pushes a handler. When we unwind, we need to pop the extra frame to avoid corrupting the
+// frame chain in the event of an unmanaged catcher.
+//
+EXCEPTION_HANDLER_IMPL(UMThunkPrestubHandler)
+{
+ // @todo: we'd like to have a dynamic contract here, but there's a problem. (Bug 129180) Enter on the CRST used
+ // in HandleManagedFault leaves the no-trigger count incremented. The destructor of this contract will restore
+ // it to zero, then when we leave the CRST in LinkFrameAndThrow, we assert because we're trying to decrement the
+ // gc-trigger count down past zero. The solution is to fix what we're doing with this CRST. </TODO>
+ STATIC_CONTRACT_THROWS; // COMPlusFrameHandler throws
+ STATIC_CONTRACT_GC_TRIGGERS;
+ STATIC_CONTRACT_MODE_ANY;
+
+ EXCEPTION_DISPOSITION retval = ExceptionContinueSearch;
+
+ BEGIN_CONTRACT_VIOLATION(SOToleranceViolation);
+
+ // We must forward to the COMPlusFrameHandler. This will unwind the Frame Chain up to here, and also leave the
+ // preemptive GC mode set correctly.
+ retval = EXCEPTION_HANDLER_FWD(COMPlusFrameHandler);
+
+#ifdef _DEBUG
+ // If the exception is escaping the last CLR personality routine on the stack,
+ // then state a flag on the thread to indicate so.
+ if (retval == ExceptionContinueSearch)
+ {
+ SetReversePInvokeEscapingUnhandledExceptionStatus(IS_UNWINDING(pExceptionRecord->ExceptionFlags), pEstablisherFrame);
+ }
+#endif // _DEBUG
+
+ if (IS_UNWINDING(pExceptionRecord->ExceptionFlags))
+ {
+ // Pops an extra frame on unwind.
+
+ GCX_COOP(); // Must be cooperative to modify frame chain.
+
+ Thread *pThread = GetThread();
+ _ASSERTE(pThread);
+ Frame *pFrame = pThread->GetFrame();
+ pFrame->ExceptionUnwind();
+ pFrame->Pop(pThread);
+ }
+
+ END_CONTRACT_VIOLATION;
+
+ return retval;
+}
+
+LONG CLRNoCatchHandler(EXCEPTION_POINTERS* pExceptionInfo, PVOID pv)
+{
+ WRAPPER_NO_CONTRACT;
+ STATIC_CONTRACT_ENTRY_POINT;
+
+ LONG result = EXCEPTION_CONTINUE_SEARCH;
+
+ // This function can be called during the handling of a SO
+ //BEGIN_ENTRYPOINT_VOIDRET;
+
+ result = CLRVectoredExceptionHandler(pExceptionInfo);
+
+ if (EXCEPTION_EXECUTE_HANDLER == result)
+ {
+ result = EXCEPTION_CONTINUE_SEARCH;
+ }
+
+ //END_ENTRYPOINT_VOIDRET;
+
+ return result;
+}
+
+#ifdef FEATURE_COMINTEROP
+// The reverse COM interop path needs to be sure to pop the ComMethodFrame that is pushed, but we do not want
+// to have an additional FS:0 handler between the COM callsite and the call into managed. So we push this
+// FS:0 handler, which will defer to the usual COMPlusFrameHandler and then perform the cleanup of the
+// ComMethodFrame, if needed.
+EXCEPTION_HANDLER_IMPL(COMPlusFrameHandlerRevCom)
+{
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+ STATIC_CONTRACT_MODE_ANY;
+
+ // Defer to COMPlusFrameHandler
+ EXCEPTION_DISPOSITION result = EXCEPTION_HANDLER_FWD(COMPlusFrameHandler);
+
+ if (pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND))
+ {
+ // Do cleanup as needed
+ ComMethodFrame::DoSecondPassHandlerCleanup(GetCurrFrame(pEstablisherFrame));
+ }
+
+ return result;
+}
+#endif // FEATURE_COMINTEROP
+
+
+// Returns TRUE if caller should resume execution.
+BOOL
+AdjustContextForVirtualStub(
+ EXCEPTION_RECORD *pExceptionRecord,
+ CONTEXT *pContext)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ Thread * pThread = GetThread();
+
+ // We may not have a managed thread object. Example is an AV on the helper thread.
+ // (perhaps during StubManager::IsStub)
+ if (pThread == NULL)
+ {
+ return FALSE;
+ }
+
+ PCODE f_IP = GetIP(pContext);
+
+ VirtualCallStubManager::StubKind sk;
+ /* VirtualCallStubManager *pMgr = */ VirtualCallStubManager::FindStubManager(f_IP, &sk);
+
+ if (sk == VirtualCallStubManager::SK_DISPATCH)
+ {
+ if (*PTR_WORD(f_IP) != X86_INSTR_CMP_IND_ECX_IMM32)
+ {
+ _ASSERTE(!"AV in DispatchStub at unknown instruction");
+ return FALSE;
+ }
+ }
+ else
+ if (sk == VirtualCallStubManager::SK_RESOLVE)
+ {
+ if (*PTR_WORD(f_IP) != X86_INSTR_MOV_EAX_ECX_IND)
+ {
+ _ASSERTE(!"AV in ResolveStub at unknown instruction");
+ return FALSE;
+ }
+
+ SetSP(pContext, dac_cast<PCODE>(dac_cast<PTR_BYTE>(GetSP(pContext)) + sizeof(void*))); // rollback push eax
+ }
+ else
+ {
+ return FALSE;
+ }
+
+ PCODE callsite = GetAdjustedCallAddress(*dac_cast<PTR_PCODE>(GetSP(pContext)));
+ pExceptionRecord->ExceptionAddress = (PVOID)callsite;
+ SetIP(pContext, callsite);
+
+ // put ESP back to what it was before the call.
+ SetSP(pContext, dac_cast<PCODE>(dac_cast<PTR_BYTE>(GetSP(pContext)) + sizeof(void*)));
+
+ return TRUE;
+}
+
+#endif // !DACCESS_COMPILE
diff --git a/src/vm/i386/fptext.asm b/src/vm/i386/fptext.asm
new file mode 100644
index 0000000000..2190d18519
--- /dev/null
+++ b/src/vm/i386/fptext.asm
@@ -0,0 +1,277 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+
+; ==++==
+;
+
+;
+; ==--==
+ .386
+ .model flat
+
+ option casemap:none
+ public _DoubleToNumber,_NumberToDouble
+
+; NUMBER structure
+
+nPrecision equ (dword ptr 0)
+nScale equ (dword ptr 4)
+nSign equ (dword ptr 8)
+nDigits equ (word ptr 12)
+
+ .code
+
+; Powers of 10 from 1.0E1 to 1.0E15 increasing by 1
+
+Pow10By1 label tbyte
+
+ dt 1.0E1
+ dt 1.0E2
+ dt 1.0E3
+ dt 1.0E4
+ dt 1.0E5
+ dt 1.0E6
+ dt 1.0E7
+ dt 1.0E8
+ dt 1.0E9
+ dt 1.0E10
+ dt 1.0E11
+ dt 1.0E12
+ dt 1.0E13
+ dt 1.0E14
+ dt 1.0E15
+
+; Powers of 10 from 1.0E16 to 1.0E336 increasing by 16
+
+Pow10By16 label tbyte
+
+ dt 1.0E16
+ dt 1.0E32
+ dt 1.0E48
+ dt 1.0E64
+ dt 1.0E80
+ dt 1.0E96
+ dt 1.0E112
+ dt 1.0E128
+ dt 1.0E144
+ dt 1.0E160
+ dt 1.0E176
+ dt 1.0E192
+ dt 1.0E208
+ dt 1.0E224
+ dt 1.0E240
+ dt 1.0E256
+ dt 1.0E272
+ dt 1.0E288
+ dt 1.0E304
+ dt 1.0E320
+ dt 1.0E336
+
+; Single precision constants
+
+Single10 dd 10.0
+SingleINF dd 7F800000H
+
+g_CwStd dw 137fH ;Mask all errors, 64-bit, round near
+
+; void _cdecl DoubleToNumber(double value, int precision, NUMBER* number)
+
+_DoubleToNumber proc
+
+value equ (qword ptr [ebp+8])
+precision equ (dword ptr [ebp+16])
+number equ (dword ptr [ebp+20])
+paramSize = 16
+
+cwsave equ (word ptr [ebp-24])
+digits equ (tbyte ptr [ebp-20])
+temp equ (tbyte ptr [ebp-10])
+localSize = 24
+
+ push ebp
+ mov ebp,esp
+ sub esp,localSize
+ push edi
+ push ebx
+ fnstcw cwsave
+ fldcw g_CwStd
+ fld value
+ fstp temp
+ mov edi,number
+ mov eax,precision
+ mov nPrecision[edi],eax
+ movzx eax,word ptr temp[8]
+ mov edx,eax
+ shr edx,15
+ mov nSign[edi],edx
+ and eax,7FFFH
+ je DN1
+ cmp eax,7FFFH
+ jne DN10
+ mov eax,80000000H
+ cmp dword ptr temp[4],eax
+ jne DN1
+ cmp dword ptr temp[0],0
+ jne DN1
+ dec eax
+DN1: mov nScale[edi],eax
+ mov nDigits[edi],0
+ jmp DN30
+DN10: fld value
+ sub eax,16382+58 ;Remove bias and 58 bits
+ imul eax,19728 ;log10(2) * 2^16 = .30103 * 65536
+ add eax,0FFFFH ;Round up
+ sar eax,16 ;Only use high half
+ lea edx,[eax+18]
+ mov nScale[edi],edx
+ neg eax
+ call ScaleByPow10
+ fbstp digits
+ xor eax,eax
+ xor ebx,ebx
+ mov ecx,precision
+ inc ecx
+ mov edx,8
+ mov al,byte ptr digits[8]
+ test al,0F0H
+ jne DN11
+ dec nScale[edi]
+ jmp DN12
+DN11: shr al,4
+ dec ecx
+ je DN20
+ add al,'0'
+ mov nDigits[edi+ebx*2],ax
+ inc ebx
+ mov al,byte ptr digits[edx]
+DN12: and al,0FH
+ dec ecx
+ je DN20
+ add al,'0'
+ mov nDigits[edi+ebx*2],ax
+ inc ebx
+ dec edx
+ jl DN22 ; We've run out of digits & don't have a rounding digit, so we'll skip the rounding step.
+ mov al,byte ptr digits[edx]
+ jmp DN11
+DN20: cmp al,5
+ jb DN22
+DN21: dec ebx
+ inc nDigits[edi+ebx*2]
+ cmp nDigits[edi+ebx*2],'9'
+ jbe DN23
+ or ebx,ebx
+ jne DN21
+ mov nDigits[edi+ebx*2],'1'
+ inc nScale[edi]
+ jmp DN23
+DN22: dec ebx
+ cmp nDigits[edi+ebx*2],'0'
+ je DN22
+DN23: mov nDigits[edi+ebx*2+2],0
+DN30:
+ fldcw cwsave ;;Restore original CW
+ pop ebx
+ pop edi
+ mov esp,ebp
+ pop ebp
+ ret ;made _cdecl for WinCE paramSize
+
+_DoubleToNumber endp
+
+; void _cdecl NumberToDouble(NUMBER* number, double* value)
+_NumberToDouble proc
+
+number equ (dword ptr [ebp+8])
+value equ (dword ptr [ebp+12])
+paramSize = 8
+
+cwsave equ (word ptr [ebp-8])
+temp equ (dword ptr [ebp-4])
+localSize = 8
+
+ push ebp
+ mov ebp,esp ; Save the stack ptr
+ sub esp,localSize ;
+ fnstcw cwsave
+ fldcw g_CwStd
+ fldz ; zero the register
+ mov ecx,number ; move precision into ecx
+ xor edx,edx ; clear edx
+ cmp dx,nDigits[ecx] ; if the first digit is 0 goto SignResult
+ je SignResult
+ mov eax,nScale[ecx] ; store the scale in eax
+ cmp eax,-330 ; if the scale is less than or equal to -330 goto Cleanup
+ jle Cleanup
+ cmp eax,310 ; if the scale is less than 310, goto ParseDigits
+ jl ParseDigits
+ fstp st(0) ; store value on the top of the floating point stack
+ fld SingleINF ; Load infinity
+ jmp SignResult ; Goto SignResult
+ParseDigits:
+ movzx eax,nDigits[ecx+edx*2]; load the character at nDigits[edx];
+ sub eax,'0' ; subtract '0'
+ jc ScaleResult ; jump to ScaleResult if this produces a negative value
+ mov temp,eax ; store the first digit in temp
+ fmul Single10 ; Multiply by 10
+ fiadd temp ; Add the digit which we just found
+ inc edx ; increment the counter
+ cmp edx,18 ; if (eax<18) goto ParseDigits
+ jb ParseDigits
+ScaleResult:
+ mov eax,nScale[ecx] ; eax = scale
+ sub eax,edx ; scale -= (number of digits)
+ call ScaleByPow10 ; multiply the result by 10^scale
+SignResult:
+ cmp nSign[ecx],0 ; If the sign is 0 already go to Cleanup, otherwise change the sign.
+ je Cleanup
+ fchs
+Cleanup:
+ mov edx,value ; store value in edx
+ fstp qword ptr [edx] ; copy from value to the fp stack
+ fldcw cwsave ; Restore original CW
+ mov esp,ebp ; restore the stack frame & exit.
+ pop ebp
+ ret ;Made _cdecl for WinCE paramSize
+
+_NumberToDouble endp
+
+; Scale st(0) by 10^eax
+
+ScaleByPow10 proc
+ test eax,eax
+ je SP2
+ jl SP3
+ mov edx,eax
+ and edx,0FH
+ je SP1
+ lea edx,[edx+edx*4]
+ fld Pow10By1[edx*2-10]
+ fmul
+SP1: mov edx,eax
+ shr edx,4
+ test edx, edx ; remove partial flag stall caused by shr
+ je SP2
+ lea edx,[edx+edx*4]
+ fld Pow10By16[edx*2-10]
+ fmul
+SP2: ret
+SP3: neg eax
+ mov edx,eax
+ and edx,0FH
+ je SP4
+ lea edx,[edx+edx*4]
+ fld Pow10By1[edx*2-10]
+ fdiv
+SP4: mov edx,eax
+ shr edx,4
+ test edx, edx ; remove partial flag stall caused by shr
+ je SP5
+ lea edx,[edx+edx*4]
+ fld Pow10By16[edx*2-10]
+ fdiv
+SP5: ret
+ScaleByPow10 endp
+
+ end
diff --git a/src/vm/i386/gmsasm.asm b/src/vm/i386/gmsasm.asm
new file mode 100644
index 0000000000..6b6044b50d
--- /dev/null
+++ b/src/vm/i386/gmsasm.asm
@@ -0,0 +1,37 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+
+; ==++==
+;
+
+;
+; ==--==
+;
+; *** NOTE: If you make changes to this file, propagate the changes to
+; gmsasm.s in this directory
+
+ .586
+ .model flat
+
+include asmconstants.inc
+
+ option casemap:none
+ .code
+
+; int __fastcall LazyMachStateCaptureState(struct LazyMachState *pState);
+@LazyMachStateCaptureState@4 proc public
+ mov [ecx+MachState__pRetAddr], 0 ; marks that this is not yet valid
+ mov [ecx+MachState__edi], edi ; remember register values
+ mov [ecx+MachState__esi], esi
+ mov [ecx+MachState__ebx], ebx
+ mov [ecx+LazyMachState_captureEbp], ebp
+ mov [ecx+LazyMachState_captureEsp], esp
+
+ mov eax, [esp] ; capture return address
+ mov [ecx+LazyMachState_captureEip], eax
+ xor eax, eax
+ retn
+@LazyMachStateCaptureState@4 endp
+
+end
diff --git a/src/vm/i386/gmscpu.h b/src/vm/i386/gmscpu.h
new file mode 100644
index 0000000000..0aecefac21
--- /dev/null
+++ b/src/vm/i386/gmscpu.h
@@ -0,0 +1,140 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/**************************************************************/
+/* gmscpu.h */
+/**************************************************************/
+/* HelperFrame is defines 'GET_STATE(machState)' macro, which
+ figures out what the state of the machine will be when the
+ current method returns. It then stores the state in the
+ JIT_machState structure. */
+
+/**************************************************************/
+
+#ifndef __gmsx86_h__
+#define __gmsx86_h__
+
+#define __gmsx86_h__
+
+#ifdef _DEBUG
+class HelperMethodFrame;
+struct MachState;
+EXTERN_C MachState* STDCALL HelperMethodFrameConfirmState(HelperMethodFrame* frame, void* esiVal, void* ediVal, void* ebxVal, void* ebpVal);
+#endif
+
+ // A MachState indicates the register state of the processor at some point in time (usually
+ // just before or after a call is made). It can be made one of two ways. Either explicitly
+ // (when you for some reason know the values of all the registers), or implicitly using the
+ // GET_STATE macros.
+
+typedef DPTR(struct MachState) PTR_MachState;
+struct MachState {
+
+ MachState()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+ INDEBUG(memset(this, 0xCC, sizeof(MachState));)
+ }
+
+ bool isValid() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(dac_cast<TADDR>(_pRetAddr) != INVALID_POINTER_CC); return(_pRetAddr != 0); }
+ TADDR* pEdi() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(dac_cast<TADDR>(_pEdi) != INVALID_POINTER_CC); return(_pEdi); }
+ TADDR* pEsi() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(dac_cast<TADDR>(_pEsi) != INVALID_POINTER_CC); return(_pEsi); }
+ TADDR* pEbx() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(dac_cast<TADDR>(_pEbx) != INVALID_POINTER_CC); return(_pEbx); }
+ TADDR* pEbp() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(dac_cast<TADDR>(_pEbp) != INVALID_POINTER_CC); return(_pEbp); }
+ TADDR esp() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(isValid()); return(_esp); }
+ PTR_TADDR pRetAddr() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(isValid()); return(_pRetAddr); }
+ TADDR GetRetAddr() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(isValid()); return *_pRetAddr; }
+#ifndef DACCESS_COMPILE
+ void SetRetAddr(TADDR* addr) { LIMITED_METHOD_CONTRACT; _ASSERTE(isValid()); _pRetAddr = addr; }
+#endif
+
+ friend class HelperMethodFrame;
+ friend class CheckAsmOffsets;
+ friend struct LazyMachState;
+#ifdef _DEBUG
+ friend MachState* STDCALL HelperMethodFrameConfirmState(HelperMethodFrame* frame, void* esiVal, void* ediVal, void* ebxVal, void* ebpVal);
+#endif
+
+
+protected:
+ // Note the fields are layed out to make generating a
+ // MachState structure from assembly code very easy
+
+ // The state of all the callee saved registers.
+ // If the register has been spill to the stack p<REG>
+ // points at this location, otherwise it points
+ // at the field <REG> field itself
+ PTR_TADDR _pEdi;
+ TADDR _edi;
+ PTR_TADDR _pEsi;
+ TADDR _esi;
+ PTR_TADDR _pEbx;
+ TADDR _ebx;
+ PTR_TADDR _pEbp;
+ TADDR _ebp;
+
+ TADDR _esp; // stack pointer after the function returns
+ PTR_TADDR _pRetAddr; // The address of the stored IP address (points into the stack)
+};
+
+/********************************************************************/
+/* This allows you to defer the computation of the Machine state
+ until later. Note that we don't reuse slots, because we want
+ this to be threadsafe without locks */
+
+struct LazyMachState;
+typedef DPTR(LazyMachState) PTR_LazyMachState;
+struct LazyMachState : public MachState {
+ // compute the machine state of the processor as it will exist just
+ // after the return after at most'funCallDepth' number of functions.
+ // if 'testFtn' is non-NULL, the return address is tested at each
+ // return instruction encountered. If this test returns non-NULL,
+ // then stack walking stops (thus you can walk up to the point that the
+ // return address matches some criteria
+
+ // Normally this is called with funCallDepth=1 and testFtn = 0 so that
+ // it returns the state of the processor after the function that called 'captureState()'
+ void setLazyStateFromUnwind(MachState* copy);
+ static void unwindLazyState(LazyMachState* baseState,
+ MachState* lazyState,
+ DWORD threadId,
+ int funCallDepth = 1,
+ HostCallPreference hostCallPreference = AllowHostCalls);
+
+ friend class HelperMethodFrame;
+ friend class CheckAsmOffsets;
+private:
+ TADDR captureEbp; // Ebp at the time of capture
+ TADDR captureEsp; // Esp at the time of capture
+ TADDR captureEip; // Eip at the time of capture
+};
+
+inline void LazyMachState::setLazyStateFromUnwind(MachState* copy)
+{
+ // _pRetAddr has to be the last thing updated when we make the copy (because its
+ // is the the _pRetAddr becoming non-zero that flips this from invalid to valid.
+ // we assert that it is the last field in the struct.
+ static_assert_no_msg(offsetof(MachState, _pRetAddr) + sizeof(_pRetAddr) == sizeof(MachState));
+
+ memcpy(this, copy, offsetof(MachState, _pRetAddr));
+
+ // this has to be last
+ VolatileStore((TADDR*)&_pRetAddr, dac_cast<TADDR>(copy->_pRetAddr));
+}
+
+// Do the initial capture of the machine state. This is meant to be
+// as light weight as possible, as we may never need the state that
+// we capture. Thus to complete the process you need to call
+// 'getMachState()', which finishes the process
+EXTERN_C int __fastcall LazyMachStateCaptureState(struct LazyMachState *pState);
+
+// CAPTURE_STATE captures just enough register state so that the state of the
+// processor can be deterined just after the the routine that has CAPTURE_STATE in
+// it returns.
+
+// Note that the return is never taken, is is there for epilog walking
+#define CAPTURE_STATE(machState, ret) \
+ if (LazyMachStateCaptureState(machState)) ret
+
+#endif
diff --git a/src/vm/i386/gmsx86.cpp b/src/vm/i386/gmsx86.cpp
new file mode 100644
index 0000000000..e7e16b70ab
--- /dev/null
+++ b/src/vm/i386/gmsx86.cpp
@@ -0,0 +1,1245 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/**************************************************************/
+/* gmsx86.cpp */
+/**************************************************************/
+
+#include "common.h"
+#include "gmscpu.h"
+
+/***************************************************************/
+/* setMachState figures out what the state of the CPU will be
+ when the function that calls 'setMachState' returns. It stores
+ this information in 'frame'
+
+ setMachState works by simulating the execution of the
+ instructions starting at the instruction following the
+ call to 'setMachState' and continuing until a return instruction
+ is simulated. To avoid having to process arbitrary code, the
+ call to 'setMachState' should be called as follows
+
+ if (machState.setMachState != 0) return;
+
+ setMachState is guarnenteed to return 0 (so the return
+ statement will never be executed), but the expression above
+ insures insures that there is a 'quick' path to epilog
+ of the function. This insures that setMachState will only
+ have to parse a limited number of X86 instructions. */
+
+
+/***************************************************************/
+#ifndef POISONC
+#define POISONC ((sizeof(int *) == 4)?0xCCCCCCCCU:UI64(0xCCCCCCCCCCCCCCCC))
+#endif
+
+/***************************************************************/
+/* the 'zeroFtn and 'recursiveFtn' are only here to determine
+ if if mscorwks itself has been instrumented by a profiler
+ that intercepts calls or epilogs of functions. (the
+ callsInstrumented and epilogInstrumented functions). */
+
+#if !defined(DACCESS_COMPILE)
+
+#pragma optimize("gsy", on ) // optimize to insure that code generation does not have junk in it
+#pragma warning(disable:4717)
+
+static int __stdcall zeroFtn() {
+ return 0;
+}
+
+static int __stdcall recursiveFtn() {
+ return recursiveFtn()+1;
+}
+
+#pragma optimize("", on )
+
+
+/* Has mscorwks been instrumented so that calls are morphed into push XXXX call <helper> */
+static bool callsInstrumented() {
+ // Does the recusive function begin with push XXXX call <helper>
+ PTR_BYTE ptr = PTR_BYTE(recursiveFtn);
+
+ return (ptr[0] == 0x68 && ptr[5] == 0xe8); // PUSH XXXX, call <helper>
+}
+
+/* Has mscorwks been instrumented so function prolog and epilogs are replaced with
+ jmp [XXXX] */
+
+static bool epilogInstrumented() {
+
+ PTR_BYTE ptr = PTR_BYTE(zeroFtn);
+ if (ptr[0] == 0xe8) // call <helper> (prolog instrumentation)
+ ptr += 5;
+ if (ptr[0] == 0x33 && ptr[1] == 0xc0) // xor eax eax
+ ptr += 2;
+ return (ptr[0] == 0xeb || ptr[0] == 0xe9); // jmp <XXXX>
+}
+
+#else
+
+ // Note that we have the callsInstrumeted and epilogInstrumented
+ // functions so that the looser heuristics used for instrumented code
+ // can't foul up an instrumented mscorwks. For simplicity sake we
+ // don't bother with this in the DAC, which means that the DAC could
+ // be misled more frequently than mscorwks itself, but I still think
+ // it will not be misled in any real scenario
+static bool callsInstrumented() { LIMITED_METHOD_DAC_CONTRACT; return true; }
+static bool epilogInstrumented() { LIMITED_METHOD_DAC_CONTRACT; return true; }
+
+#endif // !defined(DACCESS_COMPILE)
+
+/***************************************************************/
+/* returns true if a call to 'ip' should be entered by the
+ epilog walker. Bascically we are looking for things that look
+ like __SEH_epilog. In particular we look for things that
+ pops a register before doing a push. If we see something
+ that we don't recognise, we dont consider it a epilog helper
+ and return false.
+*/
+
+static bool shouldEnterCall(PTR_BYTE ip) {
+ SUPPORTS_DAC;
+
+ int datasize; // helper variable for decoding of address modes
+ int mod; // helper variable for decoding of mod r/m
+ int rm; // helper variable for decoding of mod r/m
+
+ int pushes = 0;
+
+ // we should start unbalenced pops within 48 instrs. If not, it is not a special epilog function
+ // the only reason we need as many instructions as we have below is because coreclr
+ // gets instrumented for profiling, code coverage, BBT etc, and we want these things to
+ // just work.
+ for (int i = 0; i < 48; i++) {
+ switch(*ip) {
+ case 0xF2: // repne
+ case 0xF3: // repe
+ ip++;
+ break;
+
+ case 0x68: // push 0xXXXXXXXX
+ ip += 5;
+
+ // For office profiler. They morph tail calls into push TARGET; jmp helper
+ // so if you see
+ //
+ // push XXXX
+ // jmp xxxx
+ //
+ // and we notice that coreclr has been instrumented and
+ // xxxx starts with a JMP [] then do what you would do for jmp XXXX
+ if (*ip == 0xE9 && callsInstrumented()) { // jmp helper
+ PTR_BYTE tmpIp = ip + 5;
+ PTR_BYTE target = tmpIp + (__int32)*((PTR_TADDR)(PTR_TO_TADDR(tmpIp) - 4));
+ if (target[0] == 0xFF && target[1] == 0x25) { // jmp [xxxx] (to external dll)
+ ip = PTR_BYTE(*((PTR_TADDR)(PTR_TO_TADDR(ip) - 4)));
+ }
+ }
+ else {
+ pushes++;
+ }
+ break;
+
+ case 0x50: // push EAX
+ case 0x51: // push ECX
+ case 0x52: // push EDX
+ case 0x53: // push EBX
+ case 0x55: // push EBP
+ case 0x56: // push ESI
+ case 0x57: // push EDI
+ pushes++;
+ ip++;
+ break;
+
+ case 0xE8: // call <disp32>
+ ip += 5;
+ pushes = 0; // This assumes that all of the previous pushes are arguments to this call
+ break;
+
+ case 0xFF:
+ if (ip[1] != 0x15) // call [XXXX] is OK (prolog of epilog helper is intrumented)
+ return false; // but everything else is not OK.
+ ip += 6;
+ pushes = 0; // This assumes that all of the previous pushes are arguments to this call
+ break;
+
+ case 0x9C: // pushfd
+ case 0x9D: // popfd
+ // a pushfd can never be an argument, so we model a pair of
+ // these instruction as not changing the stack so that a call
+ // that occurs between them does not consume the value of pushfd
+ ip++;
+ break;
+
+ case 0x5D: // pop EBP
+ case 0x5E: // pop ESI
+ case 0x5F: // pop EDI
+ case 0x5B: // pop EBX
+ case 0x58: // pop EAX
+ case 0x59: // pop ECX
+ case 0x5A: // pop EDX
+ if (pushes <= 0) {
+ // We now have more pops than pushes. This is our indication
+ // that we are in an EH_epilog function so we return true.
+ // This is the only way to exit this method with a retval of true.
+ return true;
+ }
+ --pushes;
+ ip++;
+ break;
+
+ case 0xA1: // MOV EAX, [XXXX]
+ ip += 5;
+ break;
+
+ case 0xC6: // MOV r/m8, imm8
+ datasize = 1;
+ goto decodeRM;
+
+ case 0x89: // MOV r/m, reg
+ if (ip[1] == 0xE5) // MOV EBP, ESP
+ return false;
+ if (ip[1] == 0xEC) // MOV ESP, EBP
+ return false;
+ goto move;
+
+ case 0x8B: // MOV reg, r/m
+ if (ip[1] == 0xE5) // MOV ESP, EBP
+ return false;
+ if (ip[1] == 0xEC) // MOV EBP, ESP
+ return false;
+ goto move;
+
+ case 0x88: // MOV reg, r/m (BYTE)
+ case 0x8A: // MOV r/m, reg (BYTE)
+
+ case 0x31: // XOR
+ case 0x32: // XOR
+ case 0x33: // XOR
+
+ move:
+ datasize = 0;
+
+ decodeRM:
+ // Note that we don't want to read from ip[] after
+ // we do ANY incrementing of ip
+
+ mod = (ip[1] & 0xC0) >> 6;
+ if (mod != 3) {
+ rm = (ip[1] & 0x07);
+ if (mod == 0) { // (mod == 0)
+ if (rm == 5)
+ ip += 4; // disp32
+ else if (rm == 4)
+ ip += 1; // [reg*K+reg]
+ // otherwise [reg]
+
+ }
+ else if (mod == 1) { // (mod == 1)
+ ip += 1; // for disp8
+ if (rm == 4)
+ ip += 1; // [reg*K+reg+disp8]
+ // otherwise [reg+disp8]
+ }
+ else { // (mod == 2)
+ ip += 4; // for disp32
+ if (rm == 4)
+ ip += 1; // [reg*K+reg+disp32]
+ // otherwise [reg+disp32]
+ }
+ }
+
+ ip += 2;
+ ip += datasize;
+ break;
+
+ case 0x64: // FS: prefix
+ ip++;
+ break;
+
+ case 0xEB: // jmp <disp8>
+ ip += (signed __int8) ip[1] + 2;
+ break;
+
+ case 0xE9: // jmp <disp32>
+ ip += (__int32)*PTR_DWORD(PTR_TO_TADDR(ip) + 1) + 5;
+ break;
+
+ case 0xF7: // test r/m32, imm32
+ // Magellan code coverage build
+ if ( (ip[1] & 0x38) == 0x00)
+ {
+ datasize = 4;
+ goto decodeRM;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+
+ case 0x75: // jnz <target>
+ // Magellan code coverage build
+ // We always follow forward jump to avoid possible looping.
+ {
+ PTR_BYTE tmpIp = ip + (TADDR)(signed __int8) ip[1] + 2;
+ if (tmpIp > ip) {
+ ip = tmpIp; // follow forwards jump
+ }
+ else {
+ return false; // backwards jump implies not EH_epilog function
+ }
+ }
+ break;
+
+ case 0xC2: // ret
+ case 0xC3: // ret n
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
+
+/***************************************************************/
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
+#endif
+
+/***************************************************************/
+// A fundamental requirement of managed code is that we need to be able to enumerate all GC references on the
+// stack at GC time. To do this we need to be able to 'crawl' the stack. We know how to do this in JIT
+// compiled code (it generates additional information like the frame size etc), but we don't know how to do
+// this for unmanaged code. For PINVOKE calls, we leave a pointer to the transition boundary between managed
+// and unmanaged code and we simply ignore the lower part of the stack. However setting up this transition is
+// a bit expensive (1-2 dozen instructions), and while that is acceptable for PINVOKE, it is not acceptable
+// for high volume calls, like NEW, CAST, WriterBarrier, Stack field fetch and others.
+//
+// To get around this, for transitions into the runtime (which we call FCALLS), we DEFER setting up the
+// boundary variables (what we call the transition frame), until we actually need it (we will do an operation
+// that might cause a GC). This allow us to handle the common case (where we might find the thing in a cache,
+// or be service the 'new' from a allocation quantum), and only pay the cost of setting up the transition
+// frame when it will actually be used.
+//
+// The problem is that in order to set up a transition frame we need to be able to find ALL REGISTERS AT THE
+// TIME THE TRANSITION TO UNMANAGED CODE WAS MADE (because we might need to update them if they have GC
+// references). Because we have executed ordinary C++ code (which might spill the registers to the stack at
+// any time), we have a problem. LazyMachState is our 'solution' to this problem. We take advantage of the
+// fact that the C++ code MUST RESTORE the register before returning. Thus we simulate the execution from the
+// current location to the return and 'watch' where the registers got restored from. This is what
+// unwindLazyState does (determine what the registers would be IF you had never executed and unmanaged C++
+// code).
+//
+// By design, this code does not handle all X86 instructions, but only those instructions needed in an
+// epilog. If you get a failure because of a missing instruction, it MAY simply be because the compiler
+// changed and now emits a new instruction in the epilog, but it MAY also be because the unwinder is
+// 'confused' and is trying to follow a code path that is NOT AN EPILOG, and in this case adding
+// instructions to 'fix' it is inappropriate.
+//
+void LazyMachState::unwindLazyState(LazyMachState* baseState,
+ MachState* lazyState,
+ DWORD threadId,
+ int funCallDepth /* = 1 */,
+ HostCallPreference hostCallPreference /* = (HostCallPreference)(-1) */)
+{
+ CONTRACTL {
+ NOTHROW;
+ GC_NOTRIGGER;
+ SO_TOLERANT;
+ SUPPORTS_DAC;
+ } CONTRACTL_END;
+
+ lazyState->_edi = baseState->_edi;
+ lazyState->_esi = baseState->_esi;
+ lazyState->_ebx = baseState->_ebx;
+ lazyState->_ebp = baseState->captureEbp;
+#ifndef DACCESS_COMPILE
+ lazyState->_pEdi = &baseState->_edi;
+ lazyState->_pEsi = &baseState->_esi;
+ lazyState->_pEbx = &baseState->_ebx;
+ lazyState->_pEbp = &baseState->_ebp;
+#endif
+
+ // We have captured the state of the registers as they exist in 'captureState'
+ // we need to simulate execution from the return address captured in 'captureState
+ // until we return from the caller of captureState.
+
+ PTR_BYTE ip = PTR_BYTE(baseState->captureEip);
+ PTR_TADDR ESP = PTR_TADDR(baseState->captureEsp);
+ ESP++; // pop captureState's return address
+
+
+ // VC now has small helper calls that it uses in epilogs. We need to walk into these
+ // helpers if we are to decode the stack properly. After we walk the helper we need
+ // to return and continue walking the epiliog. This varaible remembers were to return to
+ PTR_BYTE epilogCallRet = PTR_BYTE((TADDR)0);
+
+ // The very first conditional jump that we are going to encounter is
+ // the one testing for the return value of LazyMachStateCaptureState.
+ // The non-zero path is the one directly leading to a return statement.
+ // This variable keeps track of whether we are still looking for that
+ // first conditional jump.
+ BOOL bFirstCondJmp = TRUE;
+
+ // The general strategy is that we always try to plough forward:
+ // we follow a conditional jump if and only if it is a forward jump.
+ // However, in fcall functions that set up a HELPER_METHOD_FRAME in
+ // more than one place, gcc will have both of them share the same
+ // epilog - and the second one may actually be a backward jump.
+ // This can lead us to loop in a destructor code loop. To protect
+ // against this, we remember the ip of the last conditional jump
+ // we followed, and if we encounter it again, we take the other branch.
+ PTR_BYTE lastCondJmpIp = PTR_BYTE((TADDR)0);
+
+ int datasize; // helper variable for decoding of address modes
+ int mod; // helper variable for decoding of mod r/m
+ int rm; // helper variable for decoding of mod r/m
+
+#ifdef _DEBUG
+ int count = 0;
+ const DWORD cInstructions = 1000;
+ PTR_BYTE *instructionBytes = (PTR_BYTE*)alloca(cInstructions * sizeof(PTR_BYTE));
+ memset(instructionBytes, 0, cInstructions * sizeof(PTR_BYTE));
+#endif
+ bool bset16bit=false;
+ bool b16bit=false;
+ for(;;)
+ {
+ _ASSERTE(count++ < 1000); // we should never walk more than 1000 instructions!
+ b16bit=bset16bit;
+ bset16bit=false;
+
+#ifndef DACCESS_COMPILE
+ again:
+#endif
+#ifdef _DEBUG
+ instructionBytes[count-1] = ip;
+#endif
+ switch(*ip)
+ {
+
+ case 0x64: // FS: prefix
+ bset16bit=b16bit; // In case we have just seen a 0x66 prefix
+ goto incIp1;
+
+ case 0x66:
+ bset16bit=true; // Remember that we saw the 0x66 prefix [16-bit datasize override]
+ goto incIp1;
+
+ case 0x50: // push EAX
+ case 0x51: // push ECX
+ case 0x52: // push EDX
+ case 0x53: // push EBX
+ case 0x55: // push EBP
+ case 0x56: // push ESI
+ case 0x57: // push EDI
+ case 0x9C: // pushfd
+ --ESP;
+ case 0x40: // inc EAX
+ case 0x41: // inc ECX
+ case 0x42: // inc EDX
+ case 0x43: // inc EBX
+ case 0x46: // inc ESI
+ case 0x47: // inc EDI
+ goto incIp1;
+
+ case 0x58: // pop EAX
+ case 0x59: // pop ECX
+ case 0x5A: // pop EDX
+ case 0x9D: // popfd
+ ESP++;
+ // FALL THROUGH
+
+ case 0x90: // nop
+ incIp1:
+ ip++;
+ break;
+
+ case 0x5B: // pop EBX
+ lazyState->_pEbx = ESP;
+ lazyState->_ebx = *ESP++;
+ goto incIp1;
+ case 0x5D: // pop EBP
+ lazyState->_pEbp = ESP;
+ lazyState->_ebp = *ESP++;
+ goto incIp1;
+ case 0x5E: // pop ESI
+ lazyState->_pEsi = ESP;
+ lazyState->_esi = *ESP++;
+ goto incIp1;
+ case 0x5F: // pop EDI
+ lazyState->_pEdi = ESP;
+ lazyState->_edi = *ESP++;
+ goto incIp1;
+
+ case 0xEB: // jmp <disp8>
+ ip += (signed __int8) ip[1] + 2;
+ break;
+
+ case 0x72: // jb <disp8> for gcc.
+ {
+ PTR_BYTE tmpIp = ip + (int)(signed __int8)ip[1] + 2;
+ if (tmpIp > ip)
+ ip = tmpIp;
+ else
+ ip += 2;
+ }
+ break;
+
+ case 0xE8: // call <disp32>
+ ip += 5;
+ if (epilogCallRet == 0)
+ {
+ PTR_BYTE target = ip + (__int32)*PTR_DWORD(PTR_TO_TADDR(ip) - 4); // calculate target
+
+ if (shouldEnterCall(target))
+ {
+ epilogCallRet = ip; // remember our return address
+ --ESP; // simulate pushing the return address
+ ip = target;
+ }
+ }
+ break;
+
+ case 0xE9: // jmp <disp32>
+ {
+ PTR_BYTE tmpIp = ip
+ + ((__int32)*dac_cast<PTR_DWORD>(ip + 1) + 5);
+ ip = tmpIp;
+ }
+ break;
+
+ case 0x0f: // follow non-zero jumps:
+ if (ip[1] >= 0x90 && ip[1] <= 0x9f) {
+ if ((ip[2] & 0xC0) != 0xC0) // set<cc> reg
+ goto badOpcode;
+ ip += 3;
+ break;
+ }
+ else if ((ip[1] & 0xf0) == 0x40) { //cmov mod/rm
+ ++ip;
+ datasize = 0;
+ goto decodeRM;
+ }
+ else if (ip[1] >= 0x10 && ip[1] <= 0x17) { // movups, movlps, movhps, unpcklpd, unpckhpd
+ ++ip;
+ datasize = 0;
+ goto decodeRM;
+ }
+ else if (ip[1] == 0x1f) { // nop (multi-byte)
+ ++ip;
+ datasize = 0;
+ goto decodeRM;
+ }
+ else if (ip[1] == 0x57) { // xorps
+ ++ip;
+ datasize = 0;
+ goto decodeRM;
+ }
+ else if (ip[1] == 0xb6 || ip[1] == 0xb7) { //movzx reg, r/m8
+ ++ip;
+ datasize = 0;
+ goto decodeRM;
+ }
+ else if (ip[1] == 0xbf) { //movsx reg, r/m16
+ ++ip;
+ datasize = 0;
+ goto decodeRM;
+ }
+ else if (ip[1] == 0xd6 || ip[1] == 0x7e) { // movq
+ ++ip;
+ datasize = 0;
+ goto decodeRM;
+ }
+ else if (bFirstCondJmp) {
+ bFirstCondJmp = FALSE;
+ if (ip[1] == 0x85) // jne <disp32>
+ ip += (__int32)*dac_cast<PTR_DWORD>(ip + 2) + 6;
+ else if (ip[1] >= 0x80 && ip[1] <= 0x8F) // jcc <disp32>
+ ip += 6;
+ else
+ goto badOpcode;
+ }
+ else {
+ if ((ip[1] >= 0x80) && (ip[1] <= 0x8F)) {
+ PTR_BYTE tmpIp = ip + (__int32)*dac_cast<PTR_DWORD>(ip + 2) + 6;
+
+ if ((tmpIp > ip) == (lastCondJmpIp != ip)) {
+ lastCondJmpIp = ip;
+ ip = tmpIp;
+ }
+ else {
+ lastCondJmpIp = ip;
+ ip += 6;
+ }
+ }
+ else
+ goto badOpcode;
+ }
+ break;
+
+ // This is here because VC seems to not always optimize
+ // away a test for a literal constant
+ case 0x6A: // push 0xXX
+ ip += 2;
+ --ESP;
+ break;
+
+ case 0x68: // push 0xXXXXXXXX
+ if ((ip[5] == 0xFF) && (ip[6] == 0x15)) {
+ ip += 11; //
+ }
+ else {
+ ip += 5;
+
+ // For office profiler. They morph calls into push TARGET; call helper
+ // so if you see
+ //
+ // push XXXX
+ // call xxxx
+ //
+ // and we notice that mscorwks has been instrumented and
+ // xxxx starts with a JMP [] then do what you would do for call XXXX
+ if ((*ip & 0xFE) == 0xE8 && callsInstrumented()) { // It is a call or a jump (E8 or E9)
+ PTR_BYTE tmpIp = ip + 5;
+ PTR_BYTE target = tmpIp + (__int32)*PTR_DWORD(PTR_TO_TADDR(tmpIp) - 4);
+ if (target[0] == 0xFF && target[1] == 0x25) { // jmp [xxxx] (to external dll)
+ target = PTR_BYTE(*PTR_TADDR(PTR_TO_TADDR(ip) - 4));
+ if (*ip == 0xE9) { // Do logic for jmp
+ ip = target;
+ }
+ else if (shouldEnterCall(target)) { // Do logic for calls
+ epilogCallRet = ip; // remember our return address
+ --ESP; // simulate pushing the return address
+ ip = target;
+ }
+ }
+ }
+ }
+ break;
+
+ case 0x74: // jz <target>
+ if (bFirstCondJmp) {
+ bFirstCondJmp = FALSE;
+ ip += 2; // follow the non-zero path
+ break;
+ }
+ goto condJumpDisp8;
+
+ case 0x75: // jnz <target>
+ // Except the first jump, we always follow forward jump to avoid possible looping.
+ //
+ if (bFirstCondJmp) {
+ bFirstCondJmp = FALSE;
+ ip += (signed __int8) ip[1] + 2; // follow the non-zero path
+ break;
+ }
+ goto condJumpDisp8;
+
+ case 0x77: // ja <target>
+ case 0x78: // js <target>
+ case 0x79: // jns <target>
+ case 0x7d: // jge <target>
+ case 0x7c: // jl <target>
+ goto condJumpDisp8;
+
+ condJumpDisp8:
+ {
+ PTR_BYTE tmpIp = ip + (TADDR)(signed __int8) ip[1] + 2;
+ if ((tmpIp > ip) == (lastCondJmpIp != ip)) {
+ lastCondJmpIp = ip;
+ ip = tmpIp;
+ }
+ else {
+ lastCondJmpIp = ip;
+ ip += 2;
+ }
+ }
+ break;
+
+ case 0x84:
+ case 0x85:
+ mod = (ip[1] & 0xC0) >> 6;
+ if (mod != 3) // test reg1, reg2
+ goto badOpcode;
+ ip += 2;
+ break;
+
+ case 0x31:
+ case 0x32:
+ case 0x33:
+#ifdef __GNUC__
+ //there are lots of special workarounds for XOR for msvc. For GnuC
+ //just do the normal Mod/rm stuff.
+ datasize = 0;
+ goto decodeRM;
+#else
+ mod = (ip[1] & 0xC0) >> 6;
+ if (mod == 3)
+ {
+ // XOR reg1, reg2
+
+ // VC generates this sequence in some code:
+ // xor reg, reg
+ // test reg reg
+ // je <target>
+ // This is just an unconditional branch, so jump to it
+ if ((ip[1] & 7) == ((ip[1] >> 3) & 7)) { // reg1 == reg2?
+ if (ip[2] == 0x85 && ip[3] == ip[1]) { // TEST reg, reg
+ if (ip[4] == 0x74) {
+ ip += (signed __int8) ip[5] + 6; // follow the non-zero path
+ break;
+ }
+ _ASSERTE(ip[4] != 0x0f || ((ip[5] & 0xF0)!=0x80)); // If this goes off, we need the big jumps
+ }
+ else
+ {
+ if (ip[2]==0x74)
+ {
+ ip += (signed __int8) ip[3] + 4;
+ break;
+ }
+ _ASSERTE(ip[2] != 0x0f || ((ip[3] & 0xF0)!=0x80)); // If this goes off, we need the big jumps
+ }
+ }
+ ip += 2;
+ }
+ else if (mod == 1)
+ {
+ // XOR reg1, [reg+offs8]
+ // Used by the /GS flag for call to __security_check_cookie()
+ // Should only be XOR ECX,[EBP+4]
+ _ASSERTE((((ip[1] >> 3) & 0x7) == 0x1) && ((ip[1] & 0x7) == 0x5) && (ip[2] == 4));
+ ip += 3;
+ }
+ else if (mod == 2)
+ {
+ // XOR reg1, [reg+offs32]
+ // Should not happen but may occur with __security_check_cookie()
+ _ASSERTE(!"Unexpected XOR reg1, [reg+offs32]");
+ ip += 6;
+ }
+ else // (mod == 0)
+ {
+ // XOR reg1, [reg]
+ goto badOpcode;
+ }
+ break;
+#endif
+
+ case 0x05:
+ // added to handle gcc 3.3 generated code
+ // add %reg, constant
+ ip += 5;
+ break;
+
+ case 0xFF:
+ if ( (ip[1] & 0x38) == 0x30)
+ {
+ // opcode generated by Vulcan/BBT instrumentation
+ // search for push dword ptr[esp]; push imm32; call disp32 and if found ignore it
+ if ((ip[1] == 0x34) && (ip[2] == 0x24) && // push dword ptr[esp] (length 3 bytes)
+ (ip[3] == 0x68) && // push imm32 (length 5 bytes)
+ (ip[8] == 0xe8)) // call disp32 (length 5 bytes)
+ {
+ // found the magic seq emitted by Vulcan instrumentation
+ ip += 13; // (3+5+5)
+ break;
+ }
+
+ --ESP; // push r/m
+ datasize = 0;
+ goto decodeRM;
+ }
+ else if ( (ip[1] & 0x38) == 0x10)
+ {
+ // added to handle gcc 3.3 generated code
+ // This is a call *(%eax) generated by gcc for destructor calls.
+ // We can safely skip over the call
+ datasize = 0;
+ goto decodeRM;
+ }
+ else if (ip[1] == 0xe0)
+ {
+ goto badOpcode;
+#if 0
+ // Handles jmp *%eax from gcc
+ datasize = 0;
+ goto decodeRM;
+#endif
+ }
+ else if (ip[1] == 0x25 && epilogInstrumented()) // is it jmp [XXXX]
+ {
+ // this is a office profiler epilog (this jmp is acting as a return instruction)
+ PTR_BYTE epilogHelper = PTR_BYTE(*PTR_TADDR(*PTR_TADDR(PTR_TO_TADDR(ip) + 2)));
+
+ ip = PTR_BYTE(*ESP);
+ lazyState->_pRetAddr = ESP++;
+
+ if (epilogHelper[0] != 0x6A) // push <number of dwords to pop>
+ goto badOpcode;
+ unsigned disp = *PTR_BYTE(PTR_TO_TADDR(epilogHelper) + 1) * 4;
+ ESP = PTR_TADDR(PTR_TO_TADDR(ESP) + disp); // pop args
+ goto ret_with_epilogHelperCheck;
+
+ }
+ else
+ {
+ goto badOpcode;
+ }
+ break;
+
+ case 0x39: // comp r/m, reg
+ case 0x3B: // comp reg, r/m
+ datasize = 0;
+ goto decodeRM;
+
+ case 0xA1: // MOV EAX, [XXXX]
+ ip += 5;
+ break;
+
+ case 0x89: // MOV r/m, reg
+ if (ip[1] == 0xEC) // MOV ESP, EBP
+ goto mov_esp_ebp;
+ // FALL THROUGH
+
+ case 0x18: // SBB r/m8, r8
+ case 0x19: // SBB r/m[16|32], r[16|32]
+ case 0x1A: // SBB r8, r/m8
+ case 0x1B: // SBB r[16|32], r/m[16|32]
+
+ case 0x88: // MOV reg, r/m (BYTE)
+ case 0x8A: // MOV r/m, reg (BYTE)
+
+ move:
+ datasize = 0;
+
+ decodeRM:
+ // Note that we don't want to read from ip[]
+ // after we do ANY incrementing of ip
+
+ mod = (ip[1] & 0xC0) >> 6;
+ if (mod != 3) {
+ rm = (ip[1] & 0x07);
+ if (mod == 0) { // (mod == 0)
+ if (rm == 5) // has disp32?
+ ip += 4; // [disp32]
+ else if (rm == 4) // has SIB byte?
+ ip += 1; // [reg*K+reg]
+ }
+ else if (mod == 1) { // (mod == 1)
+ if (rm == 4) // has SIB byte?
+ ip += 1; // [reg*K+reg+disp8]
+ ip += 1; // for disp8
+ }
+ else { // (mod == 2)
+ if (rm == 4) // has SIB byte?
+ ip += 1; // [reg*K+reg+disp32]
+ ip += 4; // for disp32
+ }
+ }
+ ip += 2; // opcode and Mod R/M byte
+ ip += datasize;
+ break;
+
+ case 0x80: // OP r/m8, <imm8>
+ datasize = 1;
+ goto decodeRM;
+
+ case 0x81: // OP r/m32, <imm32>
+ if (!b16bit && ip[1] == 0xC4) { // ADD ESP, <imm32>
+ ESP = dac_cast<PTR_TADDR>(dac_cast<TADDR>(ESP) +
+ (__int32)*dac_cast<PTR_DWORD>(ip + 2));
+ ip += 6;
+ break;
+ } else if (!b16bit && ip[1] == 0xC5) { // ADD EBP, <imm32>
+ lazyState->_ebp += (__int32)*dac_cast<PTR_DWORD>(ip + 2);
+ ip += 6;
+ break;
+ }
+
+ datasize = b16bit?2:4;
+ goto decodeRM;
+
+ case 0x01: // ADD mod/rm
+ case 0x03:
+ case 0x29: // SUB mod/rm
+ case 0x2B:
+ datasize = 0;
+ goto decodeRM;
+ case 0x83: // OP r/m32, <imm8>
+ if (ip[1] == 0xC4) { // ADD ESP, <imm8>
+ ESP = dac_cast<PTR_TADDR>(dac_cast<TADDR>(ESP) + (signed __int8)ip[2]);
+ ip += 3;
+ break;
+ }
+ if (ip[1] == 0xec) { // SUB ESP, <imm8>
+ ESP = PTR_TADDR(PTR_TO_TADDR(ESP) - (signed __int8)ip[2]);
+ ip += 3;
+ break;
+ }
+ if (ip[1] == 0xe4) { // AND ESP, <imm8>
+ ESP = PTR_TADDR(PTR_TO_TADDR(ESP) & (signed __int8)ip[2]);
+ ip += 3;
+ break;
+ }
+ if (ip[1] == 0xc5) { // ADD EBP, <imm8>
+ lazyState->_ebp += (signed __int8)ip[2];
+ ip += 3;
+ break;
+ }
+
+ datasize = 1;
+ goto decodeRM;
+
+ case 0x8B: // MOV reg, r/m
+ if (ip[1] == 0xE5) { // MOV ESP, EBP
+ mov_esp_ebp:
+ ESP = PTR_TADDR(lazyState->_ebp);
+ ip += 2;
+ break;
+ }
+
+ if ((ip[1] & 0xc7) == 0x4 && ip[2] == 0x24) // move reg, [esp]
+ {
+ if ( ip[1] == 0x1C ) { // MOV EBX, [ESP]
+ lazyState->_pEbx = ESP;
+ lazyState->_ebx = *lazyState->_pEbx;
+ }
+ else if ( ip[1] == 0x34 ) { // MOV ESI, [ESP]
+ lazyState->_pEsi = ESP;
+ lazyState->_esi = *lazyState->_pEsi;
+ }
+ else if ( ip[1] == 0x3C ) { // MOV EDI, [ESP]
+ lazyState->_pEdi = ESP;
+ lazyState->_edi = *lazyState->_pEdi;
+ }
+ else if ( ip[1] == 0x24 /*ESP*/ || ip[1] == 0x2C /*EBP*/)
+ goto badOpcode;
+
+ ip += 3;
+ break;
+ }
+
+ if ((ip[1] & 0xc7) == 0x44 && ip[2] == 0x24) // move reg, [esp+imm8]
+ {
+ if ( ip[1] == 0x5C ) { // MOV EBX, [ESP+XX]
+ lazyState->_pEbx = PTR_TADDR(PTR_TO_TADDR(ESP) + (signed __int8)ip[3]);
+ lazyState->_ebx = *lazyState->_pEbx ;
+ }
+ else if ( ip[1] == 0x74 ) { // MOV ESI, [ESP+XX]
+ lazyState->_pEsi = PTR_TADDR(PTR_TO_TADDR(ESP) + (signed __int8)ip[3]);
+ lazyState->_esi = *lazyState->_pEsi;
+ }
+ else if ( ip[1] == 0x7C ) { // MOV EDI, [ESP+XX]
+ lazyState->_pEdi = PTR_TADDR(PTR_TO_TADDR(ESP) + (signed __int8)ip[3]);
+ lazyState->_edi = *lazyState->_pEdi;
+ }
+ else if ( ip[1] == 0x64 /*ESP*/ || ip[1] == 0x6C /*EBP*/)
+ goto badOpcode;
+
+ ip += 4;
+ break;
+ }
+
+ if ((ip[1] & 0xC7) == 0x45) { // MOV reg, [EBP + imm8]
+ // gcc sometimes restores callee-preserved registers
+ // via 'mov reg, [ebp-xx]' instead of 'pop reg'
+ if ( ip[1] == 0x5D ) { // MOV EBX, [EBP+XX]
+ lazyState->_pEbx = PTR_TADDR(lazyState->_ebp + (signed __int8)ip[2]);
+ lazyState->_ebx = *lazyState->_pEbx ;
+ }
+ else if ( ip[1] == 0x75 ) { // MOV ESI, [EBP+XX]
+ lazyState->_pEsi = PTR_TADDR(lazyState->_ebp + (signed __int8)ip[2]);
+ lazyState->_esi = *lazyState->_pEsi;
+ }
+ else if ( ip[1] == 0x7D ) { // MOV EDI, [EBP+XX]
+ lazyState->_pEdi = PTR_TADDR(lazyState->_ebp + (signed __int8)ip[2]);
+ lazyState->_edi = *lazyState->_pEdi;
+ }
+ else if ( ip[1] == 0x65 /*ESP*/ || ip[1] == 0x6D /*EBP*/)
+ goto badOpcode;
+
+ // We don't track the values of EAX,ECX,EDX
+
+ ip += 3; // MOV reg, [reg + imm8]
+ break;
+ }
+
+ if ((ip[1] & 0xC7) == 0x85) { // MOV reg, [EBP+imm32]
+ // gcc sometimes restores callee-preserved registers
+ // via 'mov reg, [ebp-xx]' instead of 'pop reg'
+ if ( ip[1] == 0xDD ) { // MOV EBX, [EBP+XXXXXXXX]
+ lazyState->_pEbx = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast<PTR_DWORD>(ip + 2));
+ lazyState->_ebx = *lazyState->_pEbx ;
+ }
+ else if ( ip[1] == 0xF5 ) { // MOV ESI, [EBP+XXXXXXXX]
+ lazyState->_pEsi = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast<PTR_DWORD>(ip + 2));
+ lazyState->_esi = *lazyState->_pEsi;
+ }
+ else if ( ip[1] == 0xFD ) { // MOV EDI, [EBP+XXXXXXXX]
+ lazyState->_pEdi = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast<PTR_DWORD>(ip + 2));
+ lazyState->_edi = *lazyState->_pEdi;
+ }
+ else if ( ip[1] == 0xE5 /*ESP*/ || ip[1] == 0xED /*EBP*/)
+ goto badOpcode; // Add more registers
+
+ // We don't track the values of EAX,ECX,EDX
+
+ ip += 6; // MOV reg, [reg + imm32]
+ break;
+ }
+ goto move;
+
+ case 0x8D: // LEA
+ if ((ip[1] & 0x38) == 0x20) { // Don't allow ESP to be updated
+ if (ip[1] == 0xA5) // LEA ESP, [EBP+XXXX]
+ ESP = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast<PTR_DWORD>(ip + 2));
+ else if (ip[1] == 0x65) // LEA ESP, [EBP+XX]
+ ESP = PTR_TADDR(lazyState->_ebp + (signed __int8) ip[2]);
+ else if (ip[1] == 0x24 && ip[2] == 0x24) // LEA ESP, [ESP]
+ ;
+ else if (ip[1] == 0xa4 && ip[2] == 0x24 && *((DWORD *)(&ip[3])) == 0) // Another form of: LEA ESP, [ESP]
+ ;
+ else if (ip[1] == 0x64 && ip[2] == 0x24 && ip[3] == 0) // Yet another form of: LEA ESP, [ESP] (8 bit offset)
+ ;
+ else
+ {
+ goto badOpcode;
+ }
+ }
+
+ datasize = 0;
+ goto decodeRM;
+
+ case 0xB0: // MOV AL, imm8
+ ip += 2;
+ break;
+ case 0xB8: // MOV EAX, imm32
+ case 0xB9: // MOV ECX, imm32
+ case 0xBA: // MOV EDX, imm32
+ case 0xBB: // MOV EBX, imm32
+ case 0xBE: // MOV ESI, imm32
+ case 0xBF: // MOV EDI, imm32
+ if(b16bit)
+ ip += 3;
+ else
+ ip += 5;
+ break;
+
+ case 0xC2: // ret N
+ {
+ unsigned __int16 disp = *dac_cast<PTR_WORD>(ip + 1);
+ ip = PTR_BYTE(*ESP);
+ lazyState->_pRetAddr = ESP++;
+ _ASSERTE(disp < 64); // sanity check (although strictly speaking not impossible)
+ ESP = dac_cast<PTR_TADDR>(dac_cast<TADDR>(ESP) + disp); // pop args
+ goto ret;
+ }
+ case 0xC3: // ret
+ ip = PTR_BYTE(*ESP);
+ lazyState->_pRetAddr = ESP++;
+
+ ret_with_epilogHelperCheck:
+ if (epilogCallRet != 0) { // we are returning from a special epilog helper
+ ip = epilogCallRet;
+ epilogCallRet = 0;
+ break; // this does not count toward funCallDepth
+ }
+ ret:
+ if (funCallDepth > 0)
+ {
+ --funCallDepth;
+ if (funCallDepth == 0)
+ goto done;
+ }
+ else
+ {
+ // Determine whether given IP resides in JITted code. (It returns nonzero in that case.)
+ // Use it now to see if we've unwound to managed code yet.
+ BOOL fFailedReaderLock = FALSE;
+ BOOL fIsManagedCode = ExecutionManager::IsManagedCode(*lazyState->pRetAddr(), hostCallPreference, &fFailedReaderLock);
+ if (fFailedReaderLock)
+ {
+ // We don't know if we would have been able to find a JIT
+ // manager, because we couldn't enter the reader lock without
+ // yielding (and our caller doesn't want us to yield). So abort
+ // now.
+
+ // Invalidate the lazyState we're returning, so the caller knows
+ // we aborted before we could fully unwind
+ lazyState->_pRetAddr = NULL;
+ return;
+ }
+
+ if (fIsManagedCode)
+ goto done;
+ }
+
+ bFirstCondJmp = TRUE;
+ break;
+
+ case 0xC6: // MOV r/m8, imm8
+ datasize = 1;
+ goto decodeRM;
+
+ case 0xC7: // MOV r/m32, imm32
+ datasize = b16bit?2:4;
+ goto decodeRM;
+
+ case 0xC9: // leave
+ ESP = PTR_TADDR(lazyState->_ebp);
+ lazyState->_pEbp = ESP;
+ lazyState->_ebp = *ESP++;
+ ip++;
+ break;
+
+#ifndef DACCESS_COMPILE
+ case 0xCC:
+ if (IsDebuggerPresent())
+ {
+ OutputDebugStringA("CLR: Invalid breakpoint in a helpermethod frame epilog\n");
+ DebugBreak();
+ goto again;
+ }
+#ifndef _PREFIX_
+ *((int*) 0) = 1; // If you get at this error, it is because yout
+ // set a breakpoint in a helpermethod frame epilog
+ // you can't do that unfortunately. Just move it
+ // into the interior of the method to fix it
+#endif // !_PREFIX_
+ goto done;
+#endif //!DACCESS_COMPILE
+
+ case 0xD0: // shl REG16, 1
+ case 0xD1: // shl REG32, 1
+ if (0xE4 == ip[1] || 0xE5 == ip[1]) // shl, ESP, 1 or shl EBP, 1
+ goto badOpcode; // Doesn't look like valid code
+ ip += 2;
+ break;
+
+ case 0xC1: // shl REG32, imm8
+ if (0xE4 == ip[1] || 0xE5 == ip[1]) // shl, ESP, imm8 or shl EBP, imm8
+ goto badOpcode; // Doesn't look like valid code
+ ip += 3;
+ break;
+
+ case 0xD9: // single prefix
+ if (0xEE == ip[1])
+ {
+ ip += 2; // FLDZ
+ break;
+ }
+ //
+ // INTENTIONAL FALL THRU
+ //
+ case 0xDD: // double prefix
+ if ((ip[1] & 0xC0) != 0xC0)
+ {
+ datasize = 0; // floatop r/m
+ goto decodeRM;
+ }
+ else
+ {
+ goto badOpcode;
+ }
+ break;
+
+ case 0xf2: // repne prefix
+ case 0xF3: // rep prefix
+ ip += 1;
+ break;
+
+ case 0xA4: // MOVS byte
+ case 0xA5: // MOVS word/dword
+ ip += 1;
+ break;
+
+ case 0xA8: //test AL, imm8
+ ip += 2;
+ break;
+ case 0xA9: //test EAX, imm32
+ ip += 5;
+ break;
+ case 0xF6:
+ if ( (ip[1] & 0x38) == 0x00) // TEST r/m8, imm8
+ {
+ datasize = 1;
+ goto decodeRM;
+ }
+ else
+ {
+ goto badOpcode;
+ }
+ break;
+
+ case 0xF7:
+ if ( (ip[1] & 0x38) == 0x00) // TEST r/m32, imm32
+ {
+ datasize = b16bit?2:4;
+ goto decodeRM;
+ }
+ else if ((ip[1] & 0xC8) == 0xC8) //neg reg
+ {
+ ip += 2;
+ break;
+ }
+ else if ((ip[1] & 0x30) == 0x30) //div eax by mod/rm
+ {
+ datasize = 0;
+ goto decodeRM;
+ }
+ else
+ {
+ goto badOpcode;
+ }
+ break;
+
+#ifdef __GNUC__
+ case 0x2e:
+ // Group 2 instruction prefix.
+ if (ip[1] == 0x0f && ip[2] == 0x1f)
+ {
+ // Although not the recommended multi-byte sequence for 9-byte
+ // nops (the suggestion is to use 0x66 as the prefix), this shows
+ // up in GCC-optimized code.
+ ip += 2;
+ datasize = 0;
+ goto decodeRM;
+ }
+ else
+ {
+ goto badOpcode;
+ }
+ break;
+#endif // __GNUC__
+
+ default:
+ badOpcode:
+ _ASSERTE(!"Bad opcode");
+ // FIX what to do here?
+#ifndef DACCESS_COMPILE
+#ifndef _PREFIX_
+ *((unsigned __int8**) 0) = ip; // cause an access violation (Free Build assert)
+#endif // !_PREFIX_
+#else
+ DacNotImpl();
+#endif
+ goto done;
+ }
+ }
+done:
+ _ASSERTE(epilogCallRet == 0);
+
+ // At this point the fields in 'frame' coorespond exactly to the register
+ // state when the the helper returns to its caller.
+ lazyState->_esp = dac_cast<TADDR>(ESP);
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
diff --git a/src/vm/i386/jithelp.asm b/src/vm/i386/jithelp.asm
new file mode 100644
index 0000000000..ac767287ee
--- /dev/null
+++ b/src/vm/i386/jithelp.asm
@@ -0,0 +1,2574 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+
+; ==++==
+;
+
+;
+; ==--==
+; ***********************************************************************
+; File: JIThelp.asm
+;
+; ***********************************************************************
+;
+; *** NOTE: If you make changes to this file, propagate the changes to
+; jithelp.s in this directory
+
+; This contains JITinterface routines that are 100% x86 assembly
+
+ .586
+ .model flat
+
+ include asmconstants.inc
+
+ option casemap:none
+ .code
+;
+; <TODO>@TODO Switch to g_ephemeral_low and g_ephemeral_high
+; @TODO instead of g_lowest_address, g_highest address</TODO>
+;
+
+ARGUMENT_REG1 equ ecx
+ARGUMENT_REG2 equ edx
+g_ephemeral_low TEXTEQU <_g_ephemeral_low>
+g_ephemeral_high TEXTEQU <_g_ephemeral_high>
+g_lowest_address TEXTEQU <_g_lowest_address>
+g_highest_address TEXTEQU <_g_highest_address>
+g_card_table TEXTEQU <_g_card_table>
+WriteBarrierAssert TEXTEQU <_WriteBarrierAssert@8>
+JIT_LLsh TEXTEQU <_JIT_LLsh@0>
+JIT_LRsh TEXTEQU <_JIT_LRsh@0>
+JIT_LRsz TEXTEQU <_JIT_LRsz@0>
+JIT_LMul TEXTEQU <@JIT_LMul@16>
+JIT_Dbl2LngOvf TEXTEQU <@JIT_Dbl2LngOvf@8>
+JIT_Dbl2Lng TEXTEQU <@JIT_Dbl2Lng@8>
+JIT_Dbl2IntSSE2 TEXTEQU <@JIT_Dbl2IntSSE2@8>
+JIT_Dbl2LngP4x87 TEXTEQU <@JIT_Dbl2LngP4x87@8>
+JIT_Dbl2LngSSE3 TEXTEQU <@JIT_Dbl2LngSSE3@8>
+JIT_InternalThrowFromHelper TEXTEQU <@JIT_InternalThrowFromHelper@4>
+JIT_WriteBarrierReg_PreGrow TEXTEQU <_JIT_WriteBarrierReg_PreGrow@0>
+JIT_WriteBarrierReg_PostGrow TEXTEQU <_JIT_WriteBarrierReg_PostGrow@0>
+JIT_TailCall TEXTEQU <_JIT_TailCall@0>
+JIT_TailCallLeave TEXTEQU <_JIT_TailCallLeave@0>
+JIT_TailCallVSDLeave TEXTEQU <_JIT_TailCallVSDLeave@0>
+JIT_TailCallHelper TEXTEQU <_JIT_TailCallHelper@4>
+JIT_TailCallReturnFromVSD TEXTEQU <_JIT_TailCallReturnFromVSD@0>
+
+EXTERN g_ephemeral_low:DWORD
+EXTERN g_ephemeral_high:DWORD
+EXTERN g_lowest_address:DWORD
+EXTERN g_highest_address:DWORD
+EXTERN g_card_table:DWORD
+ifdef _DEBUG
+EXTERN WriteBarrierAssert:PROC
+endif ; _DEBUG
+EXTERN JIT_InternalThrowFromHelper:PROC
+ifdef FEATURE_HIJACK
+EXTERN JIT_TailCallHelper:PROC
+endif
+EXTERN _g_TailCallFrameVptr:DWORD
+EXTERN @JIT_FailFast@0:PROC
+EXTERN _s_gsCookie:DWORD
+EXTERN @JITutil_IsInstanceOfInterface@8:PROC
+EXTERN @JITutil_ChkCastInterface@8:PROC
+EXTERN @JITutil_IsInstanceOfAny@8:PROC
+EXTERN @JITutil_ChkCastAny@8:PROC
+ifdef FEATURE_IMPLICIT_TLS
+EXTERN _GetThread@0:PROC
+endif
+
+ifdef WRITE_BARRIER_CHECK
+; Those global variables are always defined, but should be 0 for Server GC
+g_GCShadow TEXTEQU <?g_GCShadow@@3PAEA>
+g_GCShadowEnd TEXTEQU <?g_GCShadowEnd@@3PAEA>
+EXTERN g_GCShadow:DWORD
+EXTERN g_GCShadowEnd:DWORD
+INVALIDGCVALUE equ 0CCCCCCCDh
+endif
+
+ifdef FEATURE_REMOTING
+EXTERN _TransparentProxyStub_CrossContext@0:PROC
+EXTERN _InContextTPQuickDispatchAsmStub@0:PROC
+endif
+
+.686P
+.XMM
+; The following macro is needed because of a MASM issue with the
+; movsd mnemonic
+;
+$movsd MACRO op1, op2
+ LOCAL begin_movsd, end_movsd
+begin_movsd:
+ movupd op1, op2
+end_movsd:
+ org begin_movsd
+ db 0F2h
+ org end_movsd
+ENDM
+.586
+
+; The following macro is used to match the JITs
+; multi-byte NOP sequence
+$nop3 MACRO
+ db 090h
+ db 090h
+ db 090h
+ENDM
+
+
+
+;***
+;JIT_WriteBarrier* - GC write barrier helper
+;
+;Purpose:
+; Helper calls in order to assign an object to a field
+; Enables book-keeping of the GC.
+;
+;Entry:
+; EDX - address of ref-field (assigned to)
+; the resp. other reg - RHS of assignment
+;
+;Exit:
+;
+;Uses:
+; EDX is destroyed.
+;
+;Exceptions:
+;
+;*******************************************************************************
+
+; The code here is tightly coupled with AdjustContextForWriteBarrier, if you change
+; anything here, you might need to change AdjustContextForWriteBarrier as well
+WriteBarrierHelper MACRO rg
+ ALIGN 4
+
+ ;; The entry point is the fully 'safe' one in which we check if EDX (the REF
+ ;; begin updated) is actually in the GC heap
+
+PUBLIC _JIT_CheckedWriteBarrier&rg&@0
+_JIT_CheckedWriteBarrier&rg&@0 PROC
+ ;; check in the REF being updated is in the GC heap
+ cmp edx, g_lowest_address
+ jb WriteBarrier_NotInHeap_&rg
+ cmp edx, g_highest_address
+ jae WriteBarrier_NotInHeap_&rg
+
+ ;; fall through to unchecked routine
+ ;; note that its entry point also happens to be aligned
+
+ifdef WRITE_BARRIER_CHECK
+ ;; This entry point is used when you know the REF pointer being updated
+ ;; is in the GC heap
+PUBLIC _JIT_DebugWriteBarrier&rg&@0
+_JIT_DebugWriteBarrier&rg&@0:
+endif
+
+ifdef _DEBUG
+ push edx
+ push ecx
+ push eax
+
+ push rg
+ push edx
+ call WriteBarrierAssert
+
+ pop eax
+ pop ecx
+ pop edx
+endif ;_DEBUG
+
+ ; in the !WRITE_BARRIER_CHECK case this will be the move for all
+ ; addresses in the GCHeap, addresses outside the GCHeap will get
+ ; taken care of below at WriteBarrier_NotInHeap_&rg
+
+ifndef WRITE_BARRIER_CHECK
+ mov DWORD PTR [edx], rg
+endif
+
+ifdef WRITE_BARRIER_CHECK
+ ; Test dest here so if it is bad AV would happen before we change register/stack
+ ; status. This makes job of AdjustContextForWriteBarrier easier.
+ cmp [edx], 0
+ ;; ALSO update the shadow GC heap if that is enabled
+ ; Make ebp into the temporary src register. We need to do this so that we can use ecx
+ ; in the calculation of the shadow GC address, but still have access to the src register
+ push ecx
+ push ebp
+ mov ebp, rg
+
+ ; if g_GCShadow is 0, don't perform the check
+ cmp g_GCShadow, 0
+ je WriteBarrier_NoShadow_&rg
+
+ mov ecx, edx
+ sub ecx, g_lowest_address ; U/V
+ jb WriteBarrier_NoShadow_&rg
+ add ecx, [g_GCShadow]
+ cmp ecx, [g_GCShadowEnd]
+ ja WriteBarrier_NoShadow_&rg
+
+ ; TODO: In Orcas timeframe if we move to P4+ only on X86 we should enable
+ ; mfence barriers on either side of these two writes to make sure that
+ ; they stay as close together as possible
+
+ ; edx contains address in GC
+ ; ecx contains address in ShadowGC
+ ; ebp temporarially becomes the src register
+
+ ;; When we're writing to the shadow GC heap we want to be careful to minimize
+ ;; the risk of a race that can occur here where the GC and ShadowGC don't match
+ mov DWORD PTR [edx], ebp
+ mov DWORD PTR [ecx], ebp
+
+ ;; We need a scratch register to verify the shadow heap. We also need to
+ ;; construct a memory barrier so that the write to the shadow heap happens
+ ;; before the read from the GC heap. We can do both by using SUB/XCHG
+ ;; rather than PUSH.
+ ;;
+ ;; TODO: Should be changed to a push if the mfence described above is added.
+ ;;
+ sub esp, 4
+ xchg [esp], eax
+
+ ;; As part of our race avoidance (see above) we will now check whether the values
+ ;; in the GC and ShadowGC match. There is a possibility that we're wrong here but
+ ;; being overaggressive means we might mask a case where someone updates GC refs
+ ;; without going to a write barrier, but by its nature it will be indeterminant
+ ;; and we will find real bugs whereas the current implementation is indeterminant
+ ;; but only leads to investigations that find that this code is fundamentally flawed
+ mov eax, [edx]
+ cmp [ecx], eax
+ je WriteBarrier_CleanupShadowCheck_&rg
+ mov [ecx], INVALIDGCVALUE
+
+WriteBarrier_CleanupShadowCheck_&rg:
+ pop eax
+
+ jmp WriteBarrier_ShadowCheckEnd_&rg
+
+WriteBarrier_NoShadow_&rg:
+ ; If we come here then we haven't written the value to the GC and need to.
+ ; ebp contains rg
+ ; We restore ebp/ecx immediately after this, and if either of them is the src
+ ; register it will regain its value as the src register.
+ mov DWORD PTR [edx], ebp
+WriteBarrier_ShadowCheckEnd_&rg:
+ pop ebp
+ pop ecx
+endif
+ cmp rg, g_ephemeral_low
+ jb WriteBarrier_NotInEphemeral_&rg
+ cmp rg, g_ephemeral_high
+ jae WriteBarrier_NotInEphemeral_&rg
+
+ shr edx, 10
+ add edx, [g_card_table]
+ cmp BYTE PTR [edx], 0FFh
+ jne WriteBarrier_UpdateCardTable_&rg
+ ret
+
+WriteBarrier_UpdateCardTable_&rg:
+ mov BYTE PTR [edx], 0FFh
+ ret
+
+WriteBarrier_NotInHeap_&rg:
+ ; If it wasn't in the heap then we haven't updated the dst in memory yet
+ mov DWORD PTR [edx], rg
+WriteBarrier_NotInEphemeral_&rg:
+ ; If it is in the GC Heap but isn't in the ephemeral range we've already
+ ; updated the Heap with the Object*.
+ ret
+_JIT_CheckedWriteBarrier&rg&@0 ENDP
+
+ENDM
+
+
+;***
+;JIT_ByRefWriteBarrier* - GC write barrier helper
+;
+;Purpose:
+; Helper calls in order to assign an object to a byref field
+; Enables book-keeping of the GC.
+;
+;Entry:
+; EDI - address of ref-field (assigned to)
+; ESI - address of the data (source)
+; ECX can be trashed
+;
+;Exit:
+;
+;Uses:
+; EDI and ESI are incremented by a DWORD
+;
+;Exceptions:
+;
+;*******************************************************************************
+
+; The code here is tightly coupled with AdjustContextForWriteBarrier, if you change
+; anything here, you might need to change AdjustContextForWriteBarrier as well
+
+ByRefWriteBarrierHelper MACRO
+ ALIGN 4
+PUBLIC _JIT_ByRefWriteBarrier@0
+_JIT_ByRefWriteBarrier@0 PROC
+ ;;test for dest in range
+ mov ecx, [esi]
+ cmp edi, g_lowest_address
+ jb ByRefWriteBarrier_NotInHeap
+ cmp edi, g_highest_address
+ jae ByRefWriteBarrier_NotInHeap
+
+ifndef WRITE_BARRIER_CHECK
+ ;;write barrier
+ mov [edi],ecx
+endif
+
+ifdef WRITE_BARRIER_CHECK
+ ; Test dest here so if it is bad AV would happen before we change register/stack
+ ; status. This makes job of AdjustContextForWriteBarrier easier.
+ cmp [edi], 0
+
+ ;; ALSO update the shadow GC heap if that is enabled
+
+ ; use edx for address in GC Shadow,
+ push edx
+
+ ;if g_GCShadow is 0, don't do the update
+ cmp g_GCShadow, 0
+ je ByRefWriteBarrier_NoShadow
+
+ mov edx, edi
+ sub edx, g_lowest_address ; U/V
+ jb ByRefWriteBarrier_NoShadow
+ add edx, [g_GCShadow]
+ cmp edx, [g_GCShadowEnd]
+ ja ByRefWriteBarrier_NoShadow
+
+ ; TODO: In Orcas timeframe if we move to P4+ only on X86 we should enable
+ ; mfence barriers on either side of these two writes to make sure that
+ ; they stay as close together as possible
+
+ ; edi contains address in GC
+ ; edx contains address in ShadowGC
+ ; ecx is the value to assign
+
+ ;; When we're writing to the shadow GC heap we want to be careful to minimize
+ ;; the risk of a race that can occur here where the GC and ShadowGC don't match
+ mov DWORD PTR [edi], ecx
+ mov DWORD PTR [edx], ecx
+
+ ;; We need a scratch register to verify the shadow heap. We also need to
+ ;; construct a memory barrier so that the write to the shadow heap happens
+ ;; before the read from the GC heap. We can do both by using SUB/XCHG
+ ;; rather than PUSH.
+ ;;
+ ;; TODO: Should be changed to a push if the mfence described above is added.
+ ;;
+ sub esp, 4
+ xchg [esp], eax
+
+ ;; As part of our race avoidance (see above) we will now check whether the values
+ ;; in the GC and ShadowGC match. There is a possibility that we're wrong here but
+ ;; being overaggressive means we might mask a case where someone updates GC refs
+ ;; without going to a write barrier, but by its nature it will be indeterminant
+ ;; and we will find real bugs whereas the current implementation is indeterminant
+ ;; but only leads to investigations that find that this code is fundamentally flawed
+
+ mov eax, [edi]
+ cmp [edx], eax
+ je ByRefWriteBarrier_CleanupShadowCheck
+ mov [edx], INVALIDGCVALUE
+ByRefWriteBarrier_CleanupShadowCheck:
+ pop eax
+ jmp ByRefWriteBarrier_ShadowCheckEnd
+
+ByRefWriteBarrier_NoShadow:
+ ; If we come here then we haven't written the value to the GC and need to.
+ mov DWORD PTR [edi], ecx
+
+ByRefWriteBarrier_ShadowCheckEnd:
+ pop edx
+endif
+ ;;test for *src in ephemeral segement
+ cmp ecx, g_ephemeral_low
+ jb ByRefWriteBarrier_NotInEphemeral
+ cmp ecx, g_ephemeral_high
+ jae ByRefWriteBarrier_NotInEphemeral
+
+ mov ecx, edi
+ add esi,4
+ add edi,4
+
+ shr ecx, 10
+ add ecx, [g_card_table]
+ cmp byte ptr [ecx], 0FFh
+ jne ByRefWriteBarrier_UpdateCardTable
+ ret
+ByRefWriteBarrier_UpdateCardTable:
+ mov byte ptr [ecx], 0FFh
+ ret
+
+ByRefWriteBarrier_NotInHeap:
+ ; If it wasn't in the heap then we haven't updated the dst in memory yet
+ mov [edi],ecx
+ByRefWriteBarrier_NotInEphemeral:
+ ; If it is in the GC Heap but isn't in the ephemeral range we've already
+ ; updated the Heap with the Object*.
+ add esi,4
+ add edi,4
+ ret
+_JIT_ByRefWriteBarrier@0 ENDP
+ENDM
+
+;*******************************************************************************
+; Write barrier wrappers with fcall calling convention
+;
+UniversalWriteBarrierHelper MACRO name
+ ALIGN 4
+PUBLIC @JIT_&name&@8
+@JIT_&name&@8 PROC
+ mov eax,edx
+ mov edx,ecx
+ jmp _JIT_&name&EAX@0
+@JIT_&name&@8 ENDP
+ENDM
+
+; WriteBarrierStart and WriteBarrierEnd are used to determine bounds of
+; WriteBarrier functions so can determine if got AV in them.
+;
+PUBLIC _JIT_WriteBarrierStart@0
+_JIT_WriteBarrierStart@0 PROC
+ret
+_JIT_WriteBarrierStart@0 ENDP
+
+ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+; Only define these if we're using the ASM GC write barriers; if this flag is not defined,
+; we'll use C++ versions of these write barriers.
+UniversalWriteBarrierHelper <CheckedWriteBarrier>
+UniversalWriteBarrierHelper <WriteBarrier>
+endif
+
+WriteBarrierHelper <EAX>
+WriteBarrierHelper <EBX>
+WriteBarrierHelper <ECX>
+WriteBarrierHelper <ESI>
+WriteBarrierHelper <EDI>
+WriteBarrierHelper <EBP>
+
+ByRefWriteBarrierHelper
+
+PUBLIC _JIT_WriteBarrierLast@0
+_JIT_WriteBarrierLast@0 PROC
+ret
+_JIT_WriteBarrierLast@0 ENDP
+
+; This is the first function outside the "keep together range". Used by BBT scripts.
+PUBLIC _JIT_WriteBarrierEnd@0
+_JIT_WriteBarrierEnd@0 PROC
+ret
+_JIT_WriteBarrierEnd@0 ENDP
+
+;*********************************************************************/
+; In cases where we support it we have an optimized GC Poll callback. Normall (when we're not trying to
+; suspend for GC, the CORINFO_HELP_POLL_GC helper points to this nop routine. When we're ready to suspend
+; for GC, we whack the Jit Helper table entry to point to the real helper. When we're done with GC we
+; whack it back.
+PUBLIC @JIT_PollGC_Nop@0
+@JIT_PollGC_Nop@0 PROC
+ret
+@JIT_PollGC_Nop@0 ENDP
+
+;*********************************************************************/
+;llshl - long shift left
+;
+;Purpose:
+; Does a Long Shift Left (signed and unsigned are identical)
+; Shifts a long left any number of bits.
+;
+; NOTE: This routine has been adapted from the Microsoft CRTs.
+;
+;Entry:
+; EDX:EAX - long value to be shifted
+; ECX - number of bits to shift by
+;
+;Exit:
+; EDX:EAX - shifted value
+;
+ ALIGN 16
+PUBLIC JIT_LLsh
+JIT_LLsh PROC
+; Handle shifts of between bits 0 and 31
+ cmp ecx, 32
+ jae short LLshMORE32
+ shld edx,eax,cl
+ shl eax,cl
+ ret
+; Handle shifts of between bits 32 and 63
+LLshMORE32:
+ ; The x86 shift instructions only use the lower 5 bits.
+ mov edx,eax
+ xor eax,eax
+ shl edx,cl
+ ret
+JIT_LLsh ENDP
+
+
+;*********************************************************************/
+;LRsh - long shift right
+;
+;Purpose:
+; Does a signed Long Shift Right
+; Shifts a long right any number of bits.
+;
+; NOTE: This routine has been adapted from the Microsoft CRTs.
+;
+;Entry:
+; EDX:EAX - long value to be shifted
+; ECX - number of bits to shift by
+;
+;Exit:
+; EDX:EAX - shifted value
+;
+ ALIGN 16
+PUBLIC JIT_LRsh
+JIT_LRsh PROC
+; Handle shifts of between bits 0 and 31
+ cmp ecx, 32
+ jae short LRshMORE32
+ shrd eax,edx,cl
+ sar edx,cl
+ ret
+; Handle shifts of between bits 32 and 63
+LRshMORE32:
+ ; The x86 shift instructions only use the lower 5 bits.
+ mov eax,edx
+ sar edx, 31
+ sar eax,cl
+ ret
+JIT_LRsh ENDP
+
+
+;*********************************************************************/
+; LRsz:
+;Purpose:
+; Does a unsigned Long Shift Right
+; Shifts a long right any number of bits.
+;
+; NOTE: This routine has been adapted from the Microsoft CRTs.
+;
+;Entry:
+; EDX:EAX - long value to be shifted
+; ECX - number of bits to shift by
+;
+;Exit:
+; EDX:EAX - shifted value
+;
+ ALIGN 16
+PUBLIC JIT_LRsz
+JIT_LRsz PROC
+; Handle shifts of between bits 0 and 31
+ cmp ecx, 32
+ jae short LRszMORE32
+ shrd eax,edx,cl
+ shr edx,cl
+ ret
+; Handle shifts of between bits 32 and 63
+LRszMORE32:
+ ; The x86 shift instructions only use the lower 5 bits.
+ mov eax,edx
+ xor edx,edx
+ shr eax,cl
+ ret
+JIT_LRsz ENDP
+
+;*********************************************************************/
+; LMul:
+;Purpose:
+; Does a long multiply (same for signed/unsigned)
+;
+; NOTE: This routine has been adapted from the Microsoft CRTs.
+;
+;Entry:
+; Parameters are passed on the stack:
+; 1st pushed: multiplier (QWORD)
+; 2nd pushed: multiplicand (QWORD)
+;
+;Exit:
+; EDX:EAX - product of multiplier and multiplicand
+;
+ ALIGN 16
+PUBLIC JIT_LMul
+JIT_LMul PROC
+
+; AHI, BHI : upper 32 bits of A and B
+; ALO, BLO : lower 32 bits of A and B
+;
+; ALO * BLO
+; ALO * BHI
+; + BLO * AHI
+; ---------------------
+
+ mov eax,[esp + 8] ; AHI
+ mov ecx,[esp + 16] ; BHI
+ or ecx,eax ;test for both hiwords zero.
+ mov ecx,[esp + 12] ; BLO
+ jnz LMul_hard ;both are zero, just mult ALO and BLO
+
+ mov eax,[esp + 4]
+ mul ecx
+
+ ret 16 ; callee restores the stack
+
+LMul_hard:
+ push ebx
+
+ mul ecx ;eax has AHI, ecx has BLO, so AHI * BLO
+ mov ebx,eax ;save result
+
+ mov eax,[esp + 8] ; ALO
+ mul dword ptr [esp + 20] ;ALO * BHI
+ add ebx,eax ;ebx = ((ALO * BHI) + (AHI * BLO))
+
+ mov eax,[esp + 8] ; ALO ;ecx = BLO
+ mul ecx ;so edx:eax = ALO*BLO
+ add edx,ebx ;now edx has all the LO*HI stuff
+
+ pop ebx
+
+ ret 16 ; callee restores the stack
+
+JIT_LMul ENDP
+
+;*********************************************************************/
+; JIT_Dbl2LngOvf
+
+;Purpose:
+; converts a double to a long truncating toward zero (C semantics)
+; with check for overflow
+;
+; uses stdcall calling conventions
+;
+PUBLIC JIT_Dbl2LngOvf
+JIT_Dbl2LngOvf PROC
+ fnclex
+ fld qword ptr [esp+4]
+ push ecx
+ push ecx
+ fstp qword ptr [esp]
+ call JIT_Dbl2Lng
+ mov ecx,eax
+ fnstsw ax
+ test ax,01h
+ jnz Dbl2LngOvf_throw
+ mov eax,ecx
+ ret 8
+
+Dbl2LngOvf_throw:
+ mov ECX, CORINFO_OverflowException_ASM
+ call JIT_InternalThrowFromHelper
+ ret 8
+JIT_Dbl2LngOvf ENDP
+
+;*********************************************************************/
+; JIT_Dbl2Lng
+
+;Purpose:
+; converts a double to a long truncating toward zero (C semantics)
+;
+; uses stdcall calling conventions
+;
+; note that changing the rounding mode is very expensive. This
+; routine basiclly does the truncation sematics without changing
+; the rounding mode, resulting in a win.
+;
+PUBLIC JIT_Dbl2Lng
+JIT_Dbl2Lng PROC
+ fld qword ptr[ESP+4] ; fetch arg
+ lea ecx,[esp-8]
+ sub esp,16 ; allocate frame
+ and ecx,-8 ; align pointer on boundary of 8
+ fld st(0) ; duplciate top of stack
+ fistp qword ptr[ecx] ; leave arg on stack, also save in temp
+ fild qword ptr[ecx] ; arg, round(arg) now on stack
+ mov edx,[ecx+4] ; high dword of integer
+ mov eax,[ecx] ; low dword of integer
+ test eax,eax
+ je integer_QNaN_or_zero
+
+arg_is_not_integer_QNaN:
+ fsubp st(1),st ; TOS=d-round(d),
+ ; { st(1)=st(1)-st & pop ST }
+ test edx,edx ; what's sign of integer
+ jns positive
+ ; number is negative
+ ; dead cycle
+ ; dead cycle
+ fstp dword ptr[ecx] ; result of subtraction
+ mov ecx,[ecx] ; dword of difference(single precision)
+ add esp,16
+ xor ecx,80000000h
+ add ecx,7fffffffh ; if difference>0 then increment integer
+ adc eax,0 ; inc eax (add CARRY flag)
+ adc edx,0 ; propagate carry flag to upper bits
+ ret 8
+
+positive:
+ fstp dword ptr[ecx] ;17-18 ; result of subtraction
+ mov ecx,[ecx] ; dword of difference (single precision)
+ add esp,16
+ add ecx,7fffffffh ; if difference<0 then decrement integer
+ sbb eax,0 ; dec eax (subtract CARRY flag)
+ sbb edx,0 ; propagate carry flag to upper bits
+ ret 8
+
+integer_QNaN_or_zero:
+ test edx,7fffffffh
+ jnz arg_is_not_integer_QNaN
+ fstp st(0) ;; pop round(arg)
+ fstp st(0) ;; arg
+ add esp,16
+ ret 8
+JIT_Dbl2Lng ENDP
+
+;*********************************************************************/
+; JIT_Dbl2LngP4x87
+
+;Purpose:
+; converts a double to a long truncating toward zero (C semantics)
+;
+; uses stdcall calling conventions
+;
+; This code is faster on a P4 than the Dbl2Lng code above, but is
+; slower on a PIII. Hence we choose this code when on a P4 or above.
+;
+PUBLIC JIT_Dbl2LngP4x87
+JIT_Dbl2LngP4x87 PROC
+arg1 equ <[esp+0Ch]>
+
+ sub esp, 8 ; get some local space
+
+ fld qword ptr arg1 ; fetch arg
+ fnstcw word ptr arg1 ; store FPCW
+ movzx eax, word ptr arg1 ; zero extend - wide
+ or ah, 0Ch ; turn on OE and DE flags
+ mov dword ptr [esp], eax ; store new FPCW bits
+ fldcw word ptr [esp] ; reload FPCW with new bits
+ fistp qword ptr [esp] ; convert
+ mov eax, dword ptr [esp] ; reload FP result
+ mov edx, dword ptr [esp+4] ;
+ fldcw word ptr arg1 ; reload original FPCW value
+
+ add esp, 8 ; restore stack
+
+ ret 8
+JIT_Dbl2LngP4x87 ENDP
+
+;*********************************************************************/
+; JIT_Dbl2LngSSE3
+
+;Purpose:
+; converts a double to a long truncating toward zero (C semantics)
+;
+; uses stdcall calling conventions
+;
+; This code is faster than the above P4 x87 code for Intel processors
+; equal or later than Core2 and Atom that have SSE3 support
+;
+.686P
+.XMM
+PUBLIC JIT_Dbl2LngSSE3
+JIT_Dbl2LngSSE3 PROC
+arg1 equ <[esp+0Ch]>
+
+ sub esp, 8 ; get some local space
+
+ fld qword ptr arg1 ; fetch arg
+ fisttp qword ptr [esp] ; convert
+ mov eax, dword ptr [esp] ; reload FP result
+ mov edx, dword ptr [esp+4]
+
+ add esp, 8 ; restore stack
+
+ ret 8
+JIT_Dbl2LngSSE3 ENDP
+.586
+
+;*********************************************************************/
+; JIT_Dbl2IntSSE2
+
+;Purpose:
+; converts a double to a long truncating toward zero (C semantics)
+;
+; uses stdcall calling conventions
+;
+; This code is even faster than the P4 x87 code for Dbl2LongP4x87,
+; but only returns a 32 bit value (only good for int).
+;
+.686P
+.XMM
+PUBLIC JIT_Dbl2IntSSE2
+JIT_Dbl2IntSSE2 PROC
+ $movsd xmm0, [esp+4]
+ cvttsd2si eax, xmm0
+ ret 8
+JIT_Dbl2IntSSE2 ENDP
+.586
+
+
+;*********************************************************************/
+; This is the small write barrier thunk we use when we know the
+; ephemeral generation is higher in memory than older generations.
+; The 0x0F0F0F0F values are bashed by the two functions above.
+; This the generic version - wherever the code says ECX,
+; the specific register is patched later into a copy
+; Note: do not replace ECX by EAX - there is a smaller encoding for
+; the compares just for EAX, which won't work for other registers.
+;
+; READ THIS!!!!!!
+; it is imperative that the addresses of of the values that we overwrite
+; (card table, ephemeral region ranges, etc) are naturally aligned since
+; there are codepaths that will overwrite these values while the EE is running.
+;
+PUBLIC JIT_WriteBarrierReg_PreGrow
+JIT_WriteBarrierReg_PreGrow PROC
+ mov DWORD PTR [edx], ecx
+ cmp ecx, 0F0F0F0F0h
+ jb NoWriteBarrierPre
+
+ shr edx, 10
+ nop ; padding for alignment of constant
+ cmp byte ptr [edx+0F0F0F0F0h], 0FFh
+ jne WriteBarrierPre
+NoWriteBarrierPre:
+ ret
+ nop ; padding for alignment of constant
+ nop ; padding for alignment of constant
+WriteBarrierPre:
+ mov byte ptr [edx+0F0F0F0F0h], 0FFh
+ ret
+JIT_WriteBarrierReg_PreGrow ENDP
+
+;*********************************************************************/
+; This is the larger write barrier thunk we use when we know that older
+; generations may be higher in memory than the ephemeral generation
+; The 0x0F0F0F0F values are bashed by the two functions above.
+; This the generic version - wherever the code says ECX,
+; the specific register is patched later into a copy
+; Note: do not replace ECX by EAX - there is a smaller encoding for
+; the compares just for EAX, which won't work for other registers.
+; NOTE: we need this aligned for our validation to work properly
+ ALIGN 4
+PUBLIC JIT_WriteBarrierReg_PostGrow
+JIT_WriteBarrierReg_PostGrow PROC
+ mov DWORD PTR [edx], ecx
+ cmp ecx, 0F0F0F0F0h
+ jb NoWriteBarrierPost
+ cmp ecx, 0F0F0F0F0h
+ jae NoWriteBarrierPost
+
+ shr edx, 10
+ nop ; padding for alignment of constant
+ cmp byte ptr [edx+0F0F0F0F0h], 0FFh
+ jne WriteBarrierPost
+NoWriteBarrierPost:
+ ret
+ nop ; padding for alignment of constant
+ nop ; padding for alignment of constant
+WriteBarrierPost:
+ mov byte ptr [edx+0F0F0F0F0h], 0FFh
+ ret
+JIT_WriteBarrierReg_PostGrow ENDP
+
+;*********************************************************************/
+;
+
+ ; a fake virtual stub dispatch register indirect callsite
+ $nop3
+ call dword ptr [eax]
+
+
+PUBLIC JIT_TailCallReturnFromVSD
+JIT_TailCallReturnFromVSD:
+ifdef _DEBUG
+ nop ; blessed callsite
+endif
+ call VSDHelperLabel ; keep call-ret count balanced.
+VSDHelperLabel:
+
+; Stack at this point :
+; ...
+; m_ReturnAddress
+; m_regs
+; m_CallerAddress
+; m_pThread
+; vtbl
+; GSCookie
+; &VSDHelperLabel
+OffsetOfTailCallFrame = 8
+
+; ebx = pThread
+
+ifdef _DEBUG
+ mov esi, _s_gsCookie ; GetProcessGSCookie()
+ cmp dword ptr [esp+OffsetOfTailCallFrame-SIZEOF_GSCookie], esi
+ je TailCallFrameGSCookieIsValid
+ call @JIT_FailFast@0
+ TailCallFrameGSCookieIsValid:
+endif
+ ; remove the padding frame from the chain
+ mov esi, dword ptr [esp+OffsetOfTailCallFrame+4] ; esi = TailCallFrame::m_Next
+ mov dword ptr [ebx + Thread_m_pFrame], esi
+
+ ; skip the frame
+ add esp, 20 ; &VSDHelperLabel, GSCookie, vtbl, m_Next, m_CallerAddress
+
+ pop edi ; restore callee saved registers
+ pop esi
+ pop ebx
+ pop ebp
+
+ ret ; return to m_ReturnAddress
+
+;------------------------------------------------------------------------------
+;
+
+PUBLIC JIT_TailCall
+JIT_TailCall PROC
+
+; the stack layout at this point is:
+;
+; ebp+8+4*nOldStackArgs <- end of argument destination
+; ... ...
+; ebp+8+ old args (size is nOldStackArgs)
+; ... ...
+; ebp+8 <- start of argument destination
+; ebp+4 ret addr
+; ebp+0 saved ebp
+; ebp-c saved ebx, esi, edi (if have callee saved regs = 1)
+;
+; other stuff (local vars) in the jitted callers' frame
+;
+; esp+20+4*nNewStackArgs <- end of argument source
+; ... ...
+; esp+20+ new args (size is nNewStackArgs) to be passed to the target of the tail-call
+; ... ...
+; esp+20 <- start of argument source
+; esp+16 nOldStackArgs
+; esp+12 nNewStackArgs
+; esp+8 flags (1 = have callee saved regs, 2 = virtual stub dispatch)
+; esp+4 target addr
+; esp+0 retaddr
+;
+; If you change this function, make sure you update code:TailCallStubManager as well.
+
+RetAddr equ 0
+TargetAddr equ 4
+nNewStackArgs equ 12
+nOldStackArgs equ 16
+NewArgs equ 20
+
+; extra space is incremented as we push things on the stack along the way
+ExtraSpace = 0
+
+ call _GetThread@0; eax = Thread*
+ push eax ; Thread*
+
+ ; save ArgumentRegisters
+ push ecx
+ push edx
+
+ExtraSpace = 12 ; pThread, ecx, edx
+
+ifdef FEATURE_HIJACK
+ ; Make sure that the EE does have the return address patched. So we can move it around.
+ test dword ptr [eax+Thread_m_State], TS_Hijacked_ASM
+ jz NoHijack
+
+ ; JIT_TailCallHelper(Thread *)
+ push eax
+ call JIT_TailCallHelper ; this is __stdcall
+
+NoHijack:
+endif
+
+ mov edx, dword ptr [esp+ExtraSpace+JIT_TailCall_StackOffsetToFlags] ; edx = flags
+
+ mov eax, dword ptr [esp+ExtraSpace+nOldStackArgs] ; eax = nOldStackArgs
+ mov ecx, dword ptr [esp+ExtraSpace+nNewStackArgs] ; ecx = nNewStackArgs
+
+ ; restore callee saved registers
+ ; <TODO>@TODO : esp based - doesnt work with localloc</TODO>
+ test edx, 1
+ jz NoCalleeSaveRegisters
+
+ mov edi, dword ptr [ebp-4] ; restore edi
+ mov esi, dword ptr [ebp-8] ; restore esi
+ mov ebx, dword ptr [ebp-12] ; restore ebx
+
+NoCalleeSaveRegisters:
+
+ push dword ptr [ebp+4] ; save the original return address for later
+ push edi
+ push esi
+
+ExtraSpace = 24 ; pThread, ecx, edx, orig retaddr, edi, esi
+CallersEsi = 0
+CallersEdi = 4
+OrigRetAddr = 8
+pThread = 20
+
+ lea edi, [ebp+8+4*eax] ; edi = the end of argument destination
+ lea esi, [esp+ExtraSpace+NewArgs+4*ecx] ; esi = the end of argument source
+
+ mov ebp, dword ptr [ebp] ; restore ebp (do not use ebp as scratch register to get a good stack trace in debugger)
+
+ test edx, 2
+ jnz VSDTailCall
+
+ ; copy the arguments to the final destination
+ test ecx, ecx
+ jz ArgumentsCopied
+ArgumentCopyLoop:
+ ; At this point, this is the value of the registers :
+ ; edi = end of argument dest
+ ; esi = end of argument source
+ ; ecx = nNewStackArgs
+ mov eax, dword ptr [esi-4]
+ sub edi, 4
+ sub esi, 4
+ mov dword ptr [edi], eax
+ dec ecx
+ jnz ArgumentCopyLoop
+ArgumentsCopied:
+
+ ; edi = the start of argument destination
+
+ mov eax, dword ptr [esp+4+4] ; return address
+ mov ecx, dword ptr [esp+ExtraSpace+TargetAddr] ; target address
+
+ mov dword ptr [edi-4], eax ; return address
+ mov dword ptr [edi-8], ecx ; target address
+
+ lea eax, [edi-8] ; new value for esp
+
+ pop esi
+ pop edi
+ pop ecx ; skip original return address
+ pop edx
+ pop ecx
+
+ mov esp, eax
+
+PUBLIC JIT_TailCallLeave ; add a label here so that TailCallStubManager can access it
+JIT_TailCallLeave:
+ retn ; Will branch to targetAddr. This matches the
+ ; "call" done by JITted code, keeping the
+ ; call-ret count balanced.
+
+ ;----------------------------------------------------------------------
+VSDTailCall:
+ ;----------------------------------------------------------------------
+
+ ; For the Virtual Stub Dispatch, we create a fake callsite to fool
+ ; the callsite probes. In order to create the call site, we need to insert TailCallFrame
+ ; if we do not have one already.
+ ;
+ ; ecx = nNewStackArgs
+ ; esi = the end of argument source
+ ; edi = the end of argument destination
+ ;
+ ; The stub has pushed the following onto the stack at this point :
+ ; pThread, ecx, edx, orig retaddr, edi, esi
+
+
+ cmp dword ptr [esp+OrigRetAddr], JIT_TailCallReturnFromVSD
+ jz VSDTailCallFrameInserted_DoSlideUpArgs ; There is an exiting TailCallFrame that can be reused
+
+ ; try to allocate space for the frame / check whether there is enough space
+ ; If there is sufficient space, we will setup the frame and then slide
+ ; the arguments up the stack. Else, we first need to slide the arguments
+ ; down the stack to make space for the TailCallFrame
+ sub edi, (SIZEOF_GSCookie + SIZEOF_TailCallFrame)
+ cmp edi, esi
+ jae VSDSpaceForFrameChecked
+
+ ; There is not sufficient space to wedge in the TailCallFrame without
+ ; overwriting the new arguments.
+ ; We need to allocate the extra space on the stack,
+ ; and slide down the new arguments
+
+ mov eax, esi
+ sub eax, edi
+ sub esp, eax
+
+ mov eax, ecx ; to subtract the size of arguments
+ mov edx, ecx ; for counter
+
+ neg eax
+
+ ; copy down the arguments to the final destination, need to copy all temporary storage as well
+ add edx, (ExtraSpace+NewArgs)/4
+
+ lea esi, [esi+4*eax-(ExtraSpace+NewArgs)]
+ lea edi, [edi+4*eax-(ExtraSpace+NewArgs)]
+
+VSDAllocFrameCopyLoop:
+ mov eax, dword ptr [esi]
+ mov dword ptr [edi], eax
+ add esi, 4
+ add edi, 4
+ dec edx
+ jnz VSDAllocFrameCopyLoop
+
+ ; the argument source and destination are same now
+ mov esi, edi
+
+VSDSpaceForFrameChecked:
+
+ ; At this point, we have enough space on the stack for the TailCallFrame,
+ ; and we may already have slided down the arguments
+
+ mov eax, _s_gsCookie ; GetProcessGSCookie()
+ mov dword ptr [edi], eax ; set GSCookie
+ mov eax, _g_TailCallFrameVptr ; vptr
+ mov edx, dword ptr [esp+OrigRetAddr] ; orig return address
+ mov dword ptr [edi+SIZEOF_GSCookie], eax ; TailCallFrame::vptr
+ mov dword ptr [edi+SIZEOF_GSCookie+28], edx ; TailCallFrame::m_ReturnAddress
+
+ mov eax, dword ptr [esp+CallersEdi] ; restored edi
+ mov edx, dword ptr [esp+CallersEsi] ; restored esi
+ mov dword ptr [edi+SIZEOF_GSCookie+12], eax ; TailCallFrame::m_regs::edi
+ mov dword ptr [edi+SIZEOF_GSCookie+16], edx ; TailCallFrame::m_regs::esi
+ mov dword ptr [edi+SIZEOF_GSCookie+20], ebx ; TailCallFrame::m_regs::ebx
+ mov dword ptr [edi+SIZEOF_GSCookie+24], ebp ; TailCallFrame::m_regs::ebp
+
+ mov ebx, dword ptr [esp+pThread] ; ebx = pThread
+
+ mov eax, dword ptr [ebx+Thread_m_pFrame]
+ lea edx, [edi+SIZEOF_GSCookie]
+ mov dword ptr [edi+SIZEOF_GSCookie+4], eax ; TailCallFrame::m_pNext
+ mov dword ptr [ebx+Thread_m_pFrame], edx ; hook the new frame into the chain
+
+ ; setup ebp chain
+ lea ebp, [edi+SIZEOF_GSCookie+24] ; TailCallFrame::m_regs::ebp
+
+ ; Do not copy arguments again if they are in place already
+ ; Otherwise, we will need to slide the new arguments up the stack
+ cmp esi, edi
+ jne VSDTailCallFrameInserted_DoSlideUpArgs
+
+ ; At this point, we must have already previously slided down the new arguments,
+ ; or the TailCallFrame is a perfect fit
+ ; set the caller address
+ mov edx, dword ptr [esp+ExtraSpace+RetAddr] ; caller address
+ mov dword ptr [edi+SIZEOF_GSCookie+8], edx ; TailCallFrame::m_CallerAddress
+
+ ; adjust edi as it would by copying
+ neg ecx
+ lea edi, [edi+4*ecx]
+
+ jmp VSDArgumentsCopied
+
+VSDTailCallFrameInserted_DoSlideUpArgs:
+ ; set the caller address
+ mov edx, dword ptr [esp+ExtraSpace+RetAddr] ; caller address
+ mov dword ptr [edi+SIZEOF_GSCookie+8], edx ; TailCallFrame::m_CallerAddress
+
+ ; copy the arguments to the final destination
+ test ecx, ecx
+ jz VSDArgumentsCopied
+VSDArgumentCopyLoop:
+ mov eax, dword ptr [esi-4]
+ sub edi, 4
+ sub esi, 4
+ mov dword ptr [edi], eax
+ dec ecx
+ jnz VSDArgumentCopyLoop
+VSDArgumentsCopied:
+
+ ; edi = the start of argument destination
+
+ mov ecx, dword ptr [esp+ExtraSpace+TargetAddr] ; target address
+
+ mov dword ptr [edi-4], JIT_TailCallReturnFromVSD ; return address
+ mov dword ptr [edi-12], ecx ; address of indirection cell
+ mov ecx, [ecx]
+ mov dword ptr [edi-8], ecx ; target address
+
+ ; skip original return address and saved esi, edi
+ add esp, 12
+
+ pop edx
+ pop ecx
+
+ lea esp, [edi-12] ; new value for esp
+ pop eax
+
+PUBLIC JIT_TailCallVSDLeave ; add a label here so that TailCallStubManager can access it
+JIT_TailCallVSDLeave:
+ retn ; Will branch to targetAddr. This matches the
+ ; "call" done by JITted code, keeping the
+ ; call-ret count balanced.
+
+JIT_TailCall ENDP
+
+
+;------------------------------------------------------------------------------
+
+; HCIMPL2_VV(float, JIT_FltRem, float dividend, float divisor)
+@JIT_FltRem@8 proc public
+ fld dword ptr [esp+4] ; divisor
+ fld dword ptr [esp+8] ; dividend
+fremloop:
+ fprem
+ fstsw ax
+ fwait
+ sahf
+ jp fremloop ; Continue while the FPU status bit C2 is set
+ fxch ; swap, so divisor is on top and result is in st(1)
+ fstp ST(0) ; Pop the divisor from the FP stack
+ retn 8 ; Return value is in st(0)
+@JIT_FltRem@8 endp
+
+; HCIMPL2_VV(float, JIT_DblRem, float dividend, float divisor)
+@JIT_DblRem@16 proc public
+ fld qword ptr [esp+4] ; divisor
+ fld qword ptr [esp+12] ; dividend
+fremloopd:
+ fprem
+ fstsw ax
+ fwait
+ sahf
+ jp fremloopd ; Continue while the FPU status bit C2 is set
+ fxch ; swap, so divisor is on top and result is in st(1)
+ fstp ST(0) ; Pop the divisor from the FP stack
+ retn 16 ; Return value is in st(0)
+@JIT_DblRem@16 endp
+
+;------------------------------------------------------------------------------
+
+g_SystemInfo TEXTEQU <?g_SystemInfo@@3U_SYSTEM_INFO@@A>
+g_SpinConstants TEXTEQU <?g_SpinConstants@@3USpinConstants@@A>
+g_pSyncTable TEXTEQU <?g_pSyncTable@@3PAVSyncTableEntry@@A>
+JITutil_MonEnterWorker TEXTEQU <@JITutil_MonEnterWorker@4>
+JITutil_MonReliableEnter TEXTEQU <@JITutil_MonReliableEnter@8>
+JITutil_MonTryEnter TEXTEQU <@JITutil_MonTryEnter@12>
+JITutil_MonExitWorker TEXTEQU <@JITutil_MonExitWorker@4>
+JITutil_MonContention TEXTEQU <@JITutil_MonContention@4>
+JITutil_MonReliableContention TEXTEQU <@JITutil_MonReliableContention@8>
+JITutil_MonSignal TEXTEQU <@JITutil_MonSignal@4>
+JIT_InternalThrow TEXTEQU <@JIT_InternalThrow@4>
+EXTRN g_SystemInfo:BYTE
+EXTRN g_SpinConstants:BYTE
+EXTRN g_pSyncTable:DWORD
+EXTRN JITutil_MonEnterWorker:PROC
+EXTRN JITutil_MonReliableEnter:PROC
+EXTRN JITutil_MonTryEnter:PROC
+EXTRN JITutil_MonExitWorker:PROC
+EXTRN JITutil_MonContention:PROC
+EXTRN JITutil_MonReliableContention:PROC
+EXTRN JITutil_MonSignal:PROC
+EXTRN JIT_InternalThrow:PROC
+
+ifdef MON_DEBUG
+ifdef TRACK_SYNC
+EnterSyncHelper TEXTEQU <_EnterSyncHelper@8>
+LeaveSyncHelper TEXTEQU <_LeaveSyncHelper@8>
+EXTRN EnterSyncHelper:PROC
+EXTRN LeaveSyncHelper:PROC
+endif ;TRACK_SYNC
+endif ;MON_DEBUG
+
+; The following macro is needed because MASM returns
+; "instruction prefix not allowed" error message for
+; rep nop mnemonic
+$repnop MACRO
+ db 0F3h
+ db 090h
+ENDM
+
+; Safe ThreadAbort does not abort a thread if it is running finally or has lock counts.
+; At the time we call Monitor.Enter, we initiate the abort if we can.
+; We do not need to do the same for Monitor.Leave, since most of time, Monitor.Leave is called
+; during finally.
+
+;**********************************************************************
+; This is a frameless helper for entering a monitor on a object.
+; The object is in ARGUMENT_REG1. This tries the normal case (no
+; blocking or object allocation) in line and calls a framed helper
+; for the other cases.
+; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined
+; to make sure you don't break the non-debug build. This is very fragile code.
+; Also, propagate the changes to jithelp.s which contains the same helper and assembly code
+; (in AT&T syntax) for gnu assembler.
+@JIT_MonEnterWorker@4 proc public
+ ; Initialize delay value for retry with exponential backoff
+ push ebx
+ mov ebx, dword ptr g_SpinConstants+SpinConstants_dwInitialDuration
+
+ ; We need yet another register to avoid refetching the thread object
+ push esi
+
+ ; Check if the instance is NULL.
+ test ARGUMENT_REG1, ARGUMENT_REG1
+ jz MonEnterFramedLockHelper
+
+ call _GetThread@0
+ mov esi,eax
+
+ ; Check if we can abort here
+ mov eax, [esi+Thread_m_State]
+ and eax, TS_CatchAtSafePoint_ASM
+ jz MonEnterRetryThinLock
+ ; go through the slow code path to initiate ThreadAbort.
+ jmp MonEnterFramedLockHelper
+
+MonEnterRetryThinLock:
+ ; Fetch the object header dword
+ mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM]
+
+ ; Check whether we have the "thin lock" layout, the lock is free and the spin lock bit not set
+ ; SBLK_COMBINED_MASK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + SBLK_MASK_LOCK_THREADID + SBLK_MASK_LOCK_RECLEVEL
+ test eax, SBLK_COMBINED_MASK_ASM
+ jnz MonEnterNeedMoreTests
+
+ ; Everything is fine - get the thread id to store in the lock
+ mov edx, [esi+Thread_m_ThreadId]
+
+ ; If the thread id is too large, we need a syncblock for sure
+ cmp edx, SBLK_MASK_LOCK_THREADID_ASM
+ ja MonEnterFramedLockHelper
+
+ ; We want to store a new value with the current thread id set in the low 10 bits
+ or edx,eax
+ lock cmpxchg dword ptr [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx
+ jnz MonEnterPrepareToWaitThinLock
+
+ ; Everything went fine and we're done
+ add [esi+Thread_m_dwLockCount],1
+ pop esi
+ pop ebx
+ ret
+
+MonEnterNeedMoreTests:
+ ; Ok, it's not the simple case - find out which case it is
+ test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM
+ jnz MonEnterHaveHashOrSyncBlockIndex
+
+ ; The header is transitioning or the lock - treat this as if the lock was taken
+ test eax, BIT_SBLK_SPIN_LOCK_ASM
+ jnz MonEnterPrepareToWaitThinLock
+
+ ; Here we know we have the "thin lock" layout, but the lock is not free.
+ ; It could still be the recursion case - compare the thread id to check
+ mov edx,eax
+ and edx, SBLK_MASK_LOCK_THREADID_ASM
+ cmp edx, [esi+Thread_m_ThreadId]
+ jne MonEnterPrepareToWaitThinLock
+
+ ; Ok, the thread id matches, it's the recursion case.
+ ; Bump up the recursion level and check for overflow
+ lea edx, [eax+SBLK_LOCK_RECLEVEL_INC_ASM]
+ test edx, SBLK_MASK_LOCK_RECLEVEL_ASM
+ jz MonEnterFramedLockHelper
+
+ ; Try to put the new recursion level back. If the header was changed in the meantime,
+ ; we need a full retry, because the layout could have changed.
+ lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx
+ jnz MonEnterRetryHelperThinLock
+
+ ; Everything went fine and we're done
+ pop esi
+ pop ebx
+ ret
+
+MonEnterPrepareToWaitThinLock:
+ ; If we are on an MP system, we try spinning for a certain number of iterations
+ cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1
+ jle MonEnterFramedLockHelper
+
+ ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII)
+ mov eax, ebx
+MonEnterdelayLoopThinLock:
+ $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs)
+ dec eax
+ jnz MonEnterdelayLoopThinLock
+
+ ; next time, wait a factor longer
+ imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor
+
+ cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration
+ jle MonEnterRetryHelperThinLock
+
+ jmp MonEnterFramedLockHelper
+
+MonEnterRetryHelperThinLock:
+ jmp MonEnterRetryThinLock
+
+MonEnterHaveHashOrSyncBlockIndex:
+ ; If we have a hash code already, we need to create a sync block
+ test eax, BIT_SBLK_IS_HASHCODE_ASM
+ jnz MonEnterFramedLockHelper
+
+ ; Ok, we have a sync block index - just and out the top bits and grab the syncblock index
+ and eax, MASK_SYNCBLOCKINDEX_ASM
+
+ ; Get the sync block pointer.
+ mov ARGUMENT_REG2, dword ptr g_pSyncTable
+ mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock]
+
+ ; Check if the sync block has been allocated.
+ test ARGUMENT_REG2, ARGUMENT_REG2
+ jz MonEnterFramedLockHelper
+
+ ; Get a pointer to the lock object.
+ lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor]
+
+ ; Attempt to acquire the lock.
+MonEnterRetrySyncBlock:
+ mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld]
+ test eax,eax
+ jne MonEnterHaveWaiters
+
+ ; Common case, lock isn't held and there are no waiters. Attempt to
+ ; gain ownership ourselves.
+ mov ARGUMENT_REG1,1
+ lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld], ARGUMENT_REG1
+ jnz MonEnterRetryHelperSyncBlock
+
+ ; Success. Save the thread object in the lock and increment the use count.
+ mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi
+ inc dword ptr [esi+Thread_m_dwLockCount]
+ inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion]
+
+ifdef MON_DEBUG
+ifdef TRACK_SYNC
+ push ARGUMENT_REG2 ; AwareLock
+ push [esp+4] ; return address
+ call EnterSyncHelper
+endif ;TRACK_SYNC
+endif ;MON_DEBUG
+ pop esi
+ pop ebx
+ ret
+
+ ; It's possible to get here with waiters but no lock held, but in this
+ ; case a signal is about to be fired which will wake up a waiter. So
+ ; for fairness sake we should wait too.
+ ; Check first for recursive lock attempts on the same thread.
+MonEnterHaveWaiters:
+ ; Is mutex already owned by current thread?
+ cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi
+ jne MonEnterPrepareToWait
+
+ ; Yes, bump our use count.
+ inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion]
+ifdef MON_DEBUG
+ifdef TRACK_SYNC
+ push ARGUMENT_REG2 ; AwareLock
+ push [esp+4] ; return address
+ call EnterSyncHelper
+endif ;TRACK_SYNC
+endif ;MON_DEBUG
+ pop esi
+ pop ebx
+ ret
+
+MonEnterPrepareToWait:
+ ; If we are on an MP system, we try spinning for a certain number of iterations
+ cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1
+ jle MonEnterHaveWaiters1
+
+ ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII)
+ mov eax,ebx
+MonEnterdelayLoop:
+ $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs)
+ dec eax
+ jnz MonEnterdelayLoop
+
+ ; next time, wait a factor longer
+ imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor
+
+ cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration
+ jle MonEnterRetrySyncBlock
+
+MonEnterHaveWaiters1:
+
+ pop esi
+ pop ebx
+
+ ; Place AwareLock in arg1 then call contention helper.
+ mov ARGUMENT_REG1, ARGUMENT_REG2
+ jmp JITutil_MonContention
+
+MonEnterRetryHelperSyncBlock:
+ jmp MonEnterRetrySyncBlock
+
+ ; ECX has the object to synchronize on
+MonEnterFramedLockHelper:
+ pop esi
+ pop ebx
+ jmp JITutil_MonEnterWorker
+
+@JIT_MonEnterWorker@4 endp
+
+;**********************************************************************
+; This is a frameless helper for entering a monitor on a object, and
+; setting a flag to indicate that the lock was taken.
+; The object is in ARGUMENT_REG1. The flag is in ARGUMENT_REG2.
+; This tries the normal case (no blocking or object allocation) in line
+; and calls a framed helper for the other cases.
+; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined
+; to make sure you don't break the non-debug build. This is very fragile code.
+; Also, propagate the changes to jithelp.s which contains the same helper and assembly code
+; (in AT&T syntax) for gnu assembler.
+@JIT_MonReliableEnter@8 proc public
+ ; Initialize delay value for retry with exponential backoff
+ push ebx
+ mov ebx, dword ptr g_SpinConstants+SpinConstants_dwInitialDuration
+
+ ; Put pbLockTaken in edi
+ push edi
+ mov edi, ARGUMENT_REG2
+
+ ; We need yet another register to avoid refetching the thread object
+ push esi
+
+ ; Check if the instance is NULL.
+ test ARGUMENT_REG1, ARGUMENT_REG1
+ jz MonReliableEnterFramedLockHelper
+
+ call _GetThread@0
+ mov esi,eax
+
+ ; Check if we can abort here
+ mov eax, [esi+Thread_m_State]
+ and eax, TS_CatchAtSafePoint_ASM
+ jz MonReliableEnterRetryThinLock
+ ; go through the slow code path to initiate ThreadAbort.
+ jmp MonReliableEnterFramedLockHelper
+
+MonReliableEnterRetryThinLock:
+ ; Fetch the object header dword
+ mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM]
+
+ ; Check whether we have the "thin lock" layout, the lock is free and the spin lock bit not set
+ ; SBLK_COMBINED_MASK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + SBLK_MASK_LOCK_THREADID + SBLK_MASK_LOCK_RECLEVEL
+ test eax, SBLK_COMBINED_MASK_ASM
+ jnz MonReliableEnterNeedMoreTests
+
+ ; Everything is fine - get the thread id to store in the lock
+ mov edx, [esi+Thread_m_ThreadId]
+
+ ; If the thread id is too large, we need a syncblock for sure
+ cmp edx, SBLK_MASK_LOCK_THREADID_ASM
+ ja MonReliableEnterFramedLockHelper
+
+ ; We want to store a new value with the current thread id set in the low 10 bits
+ or edx,eax
+ lock cmpxchg dword ptr [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx
+ jnz MonReliableEnterPrepareToWaitThinLock
+
+ ; Everything went fine and we're done
+ add [esi+Thread_m_dwLockCount],1
+ ; Set *pbLockTaken=true
+ mov byte ptr [edi],1
+ pop esi
+ pop edi
+ pop ebx
+ ret
+
+MonReliableEnterNeedMoreTests:
+ ; Ok, it's not the simple case - find out which case it is
+ test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM
+ jnz MonReliableEnterHaveHashOrSyncBlockIndex
+
+ ; The header is transitioning or the lock - treat this as if the lock was taken
+ test eax, BIT_SBLK_SPIN_LOCK_ASM
+ jnz MonReliableEnterPrepareToWaitThinLock
+
+ ; Here we know we have the "thin lock" layout, but the lock is not free.
+ ; It could still be the recursion case - compare the thread id to check
+ mov edx,eax
+ and edx, SBLK_MASK_LOCK_THREADID_ASM
+ cmp edx, [esi+Thread_m_ThreadId]
+ jne MonReliableEnterPrepareToWaitThinLock
+
+ ; Ok, the thread id matches, it's the recursion case.
+ ; Bump up the recursion level and check for overflow
+ lea edx, [eax+SBLK_LOCK_RECLEVEL_INC_ASM]
+ test edx, SBLK_MASK_LOCK_RECLEVEL_ASM
+ jz MonReliableEnterFramedLockHelper
+
+ ; Try to put the new recursion level back. If the header was changed in the meantime,
+ ; we need a full retry, because the layout could have changed.
+ lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx
+ jnz MonReliableEnterRetryHelperThinLock
+
+ ; Everything went fine and we're done
+ ; Set *pbLockTaken=true
+ mov byte ptr [edi],1
+ pop esi
+ pop edi
+ pop ebx
+ ret
+
+MonReliableEnterPrepareToWaitThinLock:
+ ; If we are on an MP system, we try spinning for a certain number of iterations
+ cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1
+ jle MonReliableEnterFramedLockHelper
+
+ ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII)
+ mov eax, ebx
+MonReliableEnterdelayLoopThinLock:
+ $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs)
+ dec eax
+ jnz MonReliableEnterdelayLoopThinLock
+
+ ; next time, wait a factor longer
+ imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor
+
+ cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration
+ jle MonReliableEnterRetryHelperThinLock
+
+ jmp MonReliableEnterFramedLockHelper
+
+MonReliableEnterRetryHelperThinLock:
+ jmp MonReliableEnterRetryThinLock
+
+MonReliableEnterHaveHashOrSyncBlockIndex:
+ ; If we have a hash code already, we need to create a sync block
+ test eax, BIT_SBLK_IS_HASHCODE_ASM
+ jnz MonReliableEnterFramedLockHelper
+
+ ; Ok, we have a sync block index - just and out the top bits and grab the syncblock index
+ and eax, MASK_SYNCBLOCKINDEX_ASM
+
+ ; Get the sync block pointer.
+ mov ARGUMENT_REG2, dword ptr g_pSyncTable
+ mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock]
+
+ ; Check if the sync block has been allocated.
+ test ARGUMENT_REG2, ARGUMENT_REG2
+ jz MonReliableEnterFramedLockHelper
+
+ ; Get a pointer to the lock object.
+ lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor]
+
+ ; Attempt to acquire the lock.
+MonReliableEnterRetrySyncBlock:
+ mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld]
+ test eax,eax
+ jne MonReliableEnterHaveWaiters
+
+ ; Common case, lock isn't held and there are no waiters. Attempt to
+ ; gain ownership ourselves.
+ mov ARGUMENT_REG1,1
+ lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld], ARGUMENT_REG1
+ jnz MonReliableEnterRetryHelperSyncBlock
+
+ ; Success. Save the thread object in the lock and increment the use count.
+ mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi
+ inc dword ptr [esi+Thread_m_dwLockCount]
+ inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion]
+ ; Set *pbLockTaken=true
+ mov byte ptr [edi],1
+
+ifdef MON_DEBUG
+ifdef TRACK_SYNC
+ push ARGUMENT_REG2 ; AwareLock
+ push [esp+4] ; return address
+ call EnterSyncHelper
+endif ;TRACK_SYNC
+endif ;MON_DEBUG
+ pop esi
+ pop edi
+ pop ebx
+ ret
+
+ ; It's possible to get here with waiters but no lock held, but in this
+ ; case a signal is about to be fired which will wake up a waiter. So
+ ; for fairness sake we should wait too.
+ ; Check first for recursive lock attempts on the same thread.
+MonReliableEnterHaveWaiters:
+ ; Is mutex already owned by current thread?
+ cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi
+ jne MonReliableEnterPrepareToWait
+
+ ; Yes, bump our use count.
+ inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion]
+ ; Set *pbLockTaken=true
+ mov byte ptr [edi],1
+ifdef MON_DEBUG
+ifdef TRACK_SYNC
+ push ARGUMENT_REG2 ; AwareLock
+ push [esp+4] ; return address
+ call EnterSyncHelper
+endif ;TRACK_SYNC
+endif ;MON_DEBUG
+ pop esi
+ pop edi
+ pop ebx
+ ret
+
+MonReliableEnterPrepareToWait:
+ ; If we are on an MP system, we try spinning for a certain number of iterations
+ cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1
+ jle MonReliableEnterHaveWaiters1
+
+ ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII)
+ mov eax,ebx
+MonReliableEnterdelayLoop:
+ $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs)
+ dec eax
+ jnz MonReliableEnterdelayLoop
+
+ ; next time, wait a factor longer
+ imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor
+
+ cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration
+ jle MonReliableEnterRetrySyncBlock
+
+MonReliableEnterHaveWaiters1:
+
+ ; Place AwareLock in arg1, pbLockTaken in arg2, then call contention helper.
+ mov ARGUMENT_REG1, ARGUMENT_REG2
+ mov ARGUMENT_REG2, edi
+
+ pop esi
+ pop edi
+ pop ebx
+
+ jmp JITutil_MonReliableContention
+
+MonReliableEnterRetryHelperSyncBlock:
+ jmp MonReliableEnterRetrySyncBlock
+
+ ; ECX has the object to synchronize on
+MonReliableEnterFramedLockHelper:
+ mov ARGUMENT_REG2, edi
+ pop esi
+ pop edi
+ pop ebx
+ jmp JITutil_MonReliableEnter
+
+@JIT_MonReliableEnter@8 endp
+
+;************************************************************************
+; This is a frameless helper for trying to enter a monitor on a object.
+; The object is in ARGUMENT_REG1 and a timeout in ARGUMENT_REG2. This tries the
+; normal case (no object allocation) in line and calls a framed helper for the
+; other cases.
+; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined
+; to make sure you don't break the non-debug build. This is very fragile code.
+; Also, propagate the changes to jithelp.s which contains the same helper and assembly code
+; (in AT&T syntax) for gnu assembler.
+@JIT_MonTryEnter@12 proc public
+ ; Save the timeout parameter.
+ push ARGUMENT_REG2
+
+ ; Initialize delay value for retry with exponential backoff
+ push ebx
+ mov ebx, dword ptr g_SpinConstants+SpinConstants_dwInitialDuration
+
+ ; The thin lock logic needs another register to store the thread
+ push esi
+
+ ; Check if the instance is NULL.
+ test ARGUMENT_REG1, ARGUMENT_REG1
+ jz MonTryEnterFramedLockHelper
+
+ ; Check if the timeout looks valid
+ cmp ARGUMENT_REG2,-1
+ jl MonTryEnterFramedLockHelper
+
+ ; Get the thread right away, we'll need it in any case
+ call _GetThread@0
+ mov esi,eax
+
+ ; Check if we can abort here
+ mov eax, [esi+Thread_m_State]
+ and eax, TS_CatchAtSafePoint_ASM
+ jz MonTryEnterRetryThinLock
+ ; go through the slow code path to initiate ThreadAbort.
+ jmp MonTryEnterFramedLockHelper
+
+MonTryEnterRetryThinLock:
+ ; Get the header dword and check its layout
+ mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM]
+
+ ; Check whether we have the "thin lock" layout, the lock is free and the spin lock bit not set
+ ; SBLK_COMBINED_MASK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + SBLK_MASK_LOCK_THREADID + SBLK_MASK_LOCK_RECLEVEL
+ test eax, SBLK_COMBINED_MASK_ASM
+ jnz MonTryEnterNeedMoreTests
+
+ ; Ok, everything is fine. Fetch the thread id and make sure it's small enough for thin locks
+ mov edx, [esi+Thread_m_ThreadId]
+ cmp edx, SBLK_MASK_LOCK_THREADID_ASM
+ ja MonTryEnterFramedLockHelper
+
+ ; Try to put our thread id in there
+ or edx,eax
+ lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx
+ jnz MonTryEnterRetryHelperThinLock
+
+ ; Got the lock - everything is fine"
+ add [esi+Thread_m_dwLockCount],1
+ pop esi
+
+ ; Delay value no longer needed
+ pop ebx
+
+ ; Timeout parameter not needed, ditch it from the stack.
+ add esp,4
+
+ mov eax, [esp+4]
+ mov byte ptr [eax], 1
+ ret 4
+
+MonTryEnterNeedMoreTests:
+ ; Ok, it's not the simple case - find out which case it is
+ test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM
+ jnz MonTryEnterHaveSyncBlockIndexOrHash
+
+ ; The header is transitioning or the lock is taken
+ test eax, BIT_SBLK_SPIN_LOCK_ASM
+ jnz MonTryEnterRetryHelperThinLock
+
+ mov edx, eax
+ and edx, SBLK_MASK_LOCK_THREADID_ASM
+ cmp edx, [esi+Thread_m_ThreadId]
+ jne MonTryEnterPrepareToWaitThinLock
+
+ ; Ok, the thread id matches, it's the recursion case.
+ ; Bump up the recursion level and check for overflow
+ lea edx, [eax+SBLK_LOCK_RECLEVEL_INC_ASM]
+ test edx, SBLK_MASK_LOCK_RECLEVEL_ASM
+ jz MonTryEnterFramedLockHelper
+
+ ; Try to put the new recursion level back. If the header was changed in the meantime,
+ ; we need a full retry, because the layout could have changed.
+ lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx
+ jnz MonTryEnterRetryHelperThinLock
+
+ ; Everything went fine and we're done
+ pop esi
+ pop ebx
+
+ ; Timeout parameter not needed, ditch it from the stack.
+ add esp, 4
+ mov eax, [esp+4]
+ mov byte ptr [eax], 1
+ ret 4
+
+MonTryEnterPrepareToWaitThinLock:
+ ; If we are on an MP system, we try spinning for a certain number of iterations
+ cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1
+ jle MonTryEnterFramedLockHelper
+
+ ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII)
+ mov eax, ebx
+MonTryEnterdelayLoopThinLock:
+ $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs)
+ dec eax
+ jnz MonTryEnterdelayLoopThinLock
+
+ ; next time, wait a factor longer
+ imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor
+
+ cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration
+ jle MonTryEnterRetryHelperThinLock
+
+ jmp MonTryEnterWouldBlock
+
+MonTryEnterRetryHelperThinLock:
+ jmp MonTryEnterRetryThinLock
+
+
+MonTryEnterHaveSyncBlockIndexOrHash:
+ ; If we have a hash code already, we need to create a sync block
+ test eax, BIT_SBLK_IS_HASHCODE_ASM
+ jnz MonTryEnterFramedLockHelper
+
+ ; Just and out the top bits and grab the syncblock index
+ and eax, MASK_SYNCBLOCKINDEX_ASM
+
+ ; Get the sync block pointer.
+ mov ARGUMENT_REG2, dword ptr g_pSyncTable
+ mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock]
+
+ ; Check if the sync block has been allocated.
+ test ARGUMENT_REG2, ARGUMENT_REG2
+ jz MonTryEnterFramedLockHelper
+
+ ; Get a pointer to the lock object.
+ lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor]
+
+MonTryEnterRetrySyncBlock:
+ ; Attempt to acquire the lock.
+ mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld]
+ test eax,eax
+ jne MonTryEnterHaveWaiters
+
+ ; We need another scratch register for what follows, so save EBX now so"
+ ; we can use it for that purpose."
+ push ebx
+
+ ; Common case, lock isn't held and there are no waiters. Attempt to
+ ; gain ownership ourselves.
+ mov ebx,1
+ lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld],ebx
+
+ pop ebx
+
+ jnz MonTryEnterRetryHelperSyncBlock
+
+ ; Success. Save the thread object in the lock and increment the use count.
+ mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi
+ inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion]
+ inc dword ptr [esi+Thread_m_dwLockCount]
+
+ifdef MON_DEBUG
+ifdef TRACK_SYNC
+ push ARGUMENT_REG2 ; AwareLock
+ push [esp+4] ; return address
+ call EnterSyncHelper
+endif ;TRACK_SYNC
+endif ;MON_DEBUG
+
+ pop esi
+ pop ebx
+
+ ; Timeout parameter not needed, ditch it from the stack."
+ add esp,4
+
+ mov eax, [esp+4]
+ mov byte ptr [eax], 1
+ ret 4
+
+ ; It's possible to get here with waiters but no lock held, but in this
+ ; case a signal is about to be fired which will wake up a waiter. So
+ ; for fairness sake we should wait too.
+ ; Check first for recursive lock attempts on the same thread.
+MonTryEnterHaveWaiters:
+ ; Is mutex already owned by current thread?
+ cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi
+ jne MonTryEnterPrepareToWait
+
+ ; Yes, bump our use count.
+ inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion]
+ifdef MON_DEBUG
+ifdef TRACK_SYNC
+ push ARGUMENT_REG2 ; AwareLock
+ push [esp+4] ; return address
+ call EnterSyncHelper
+endif ;TRACK_SYNC
+endif ;MON_DEBUG
+ pop esi
+ pop ebx
+
+ ; Timeout parameter not needed, ditch it from the stack.
+ add esp,4
+
+ mov eax, [esp+4]
+ mov byte ptr [eax], 1
+ ret 4
+
+MonTryEnterPrepareToWait:
+ ; If we are on an MP system, we try spinning for a certain number of iterations
+ cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1
+ jle MonTryEnterWouldBlock
+
+ ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII)
+ mov eax, ebx
+MonTryEnterdelayLoop:
+ $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs)
+ dec eax
+ jnz MonTryEnterdelayLoop
+
+ ; next time, wait a factor longer
+ imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor
+
+ cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration
+ jle MonTryEnterRetrySyncBlock
+
+ ; We would need to block to enter the section. Return failure if
+ ; timeout is zero, else call the framed helper to do the blocking
+ ; form of TryEnter."
+MonTryEnterWouldBlock:
+ pop esi
+ pop ebx
+ pop ARGUMENT_REG2
+ test ARGUMENT_REG2, ARGUMENT_REG2
+ jnz MonTryEnterBlock
+ mov eax, [esp+4]
+ mov byte ptr [eax], 0
+ ret 4
+
+MonTryEnterRetryHelperSyncBlock:
+ jmp MonTryEnterRetrySyncBlock
+
+MonTryEnterFramedLockHelper:
+ ; ARGUMENT_REG1 has the object to synchronize on, must retrieve the
+ ; timeout parameter from the stack.
+ pop esi
+ pop ebx
+ pop ARGUMENT_REG2
+MonTryEnterBlock:
+ jmp JITutil_MonTryEnter
+
+@JIT_MonTryEnter@12 endp
+
+;**********************************************************************
+; This is a frameless helper for exiting a monitor on a object.
+; The object is in ARGUMENT_REG1. This tries the normal case (no
+; blocking or object allocation) in line and calls a framed helper
+; for the other cases.
+; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined
+; to make sure you don't break the non-debug build. This is very fragile code.
+; Also, propagate the changes to jithelp.s which contains the same helper and assembly code
+; (in AT&T syntax) for gnu assembler.
+@JIT_MonExitWorker@4 proc public
+ ; The thin lock logic needs an additional register to hold the thread, unfortunately
+ push esi
+
+ ; Check if the instance is NULL.
+ test ARGUMENT_REG1, ARGUMENT_REG1
+ jz MonExitFramedLockHelper
+
+ call _GetThread@0
+ mov esi,eax
+
+MonExitRetryThinLock:
+ ; Fetch the header dword and check its layout and the spin lock bit
+ mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM]
+ ;BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_SPIN_LOCK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK
+ test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_SPIN_LOCK_ASM
+ jnz MonExitNeedMoreTests
+
+ ; Ok, we have a "thin lock" layout - check whether the thread id matches
+ mov edx,eax
+ and edx, SBLK_MASK_LOCK_THREADID_ASM
+ cmp edx, [esi+Thread_m_ThreadId]
+ jne MonExitFramedLockHelper
+
+ ; Check the recursion level
+ test eax, SBLK_MASK_LOCK_RECLEVEL_ASM
+ jne MonExitDecRecursionLevel
+
+ ; It's zero - we're leaving the lock.
+ ; So try to put back a zero thread id.
+ ; edx and eax match in the thread id bits, and edx is zero elsewhere, so the xor is sufficient
+ xor edx,eax
+ lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx
+ jnz MonExitRetryHelperThinLock
+
+ ; We're done
+ sub [esi+Thread_m_dwLockCount],1
+ pop esi
+ ret
+
+MonExitDecRecursionLevel:
+ lea edx, [eax-SBLK_LOCK_RECLEVEL_INC_ASM]
+ lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx
+ jnz MonExitRetryHelperThinLock
+
+ ; We're done
+ pop esi
+ ret
+
+MonExitNeedMoreTests:
+ ;Forward all special cases to the slow helper
+ ;BIT_SBLK_IS_HASHCODE_OR_SPIN_LOCK_ASM = BIT_SBLK_IS_HASHCODE + BIT_SBLK_SPIN_LOCK
+ test eax, BIT_SBLK_IS_HASHCODE_OR_SPIN_LOCK_ASM
+ jnz MonExitFramedLockHelper
+
+ ; Get the sync block index and use it to compute the sync block pointer
+ mov ARGUMENT_REG2, dword ptr g_pSyncTable
+ and eax, MASK_SYNCBLOCKINDEX_ASM
+ mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock]
+
+ ; was there a sync block?
+ test ARGUMENT_REG2, ARGUMENT_REG2
+ jz MonExitFramedLockHelper
+
+ ; Get a pointer to the lock object.
+ lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor]
+
+ ; Check if lock is held.
+ cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi
+ jne MonExitFramedLockHelper
+
+ifdef MON_DEBUG
+ifdef TRACK_SYNC
+ push ARGUMENT_REG1 ; preserve regs
+ push ARGUMENT_REG2
+
+ push ARGUMENT_REG2 ; AwareLock
+ push [esp+8] ; return address
+ call LeaveSyncHelper
+
+ pop ARGUMENT_REG2 ; restore regs
+ pop ARGUMENT_REG1
+endif ;TRACK_SYNC
+endif ;MON_DEBUG
+ ; Reduce our recursion count.
+ dec dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion]
+ jz MonExitLastRecursion
+
+ pop esi
+ ret
+
+MonExitRetryHelperThinLock:
+ jmp MonExitRetryThinLock
+
+MonExitFramedLockHelper:
+ pop esi
+ jmp JITutil_MonExitWorker
+
+ ; This is the last count we held on this lock, so release the lock.
+MonExitLastRecursion:
+ dec dword ptr [esi+Thread_m_dwLockCount]
+ mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],0
+
+MonExitRetry:
+ mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld]
+ lea esi, [eax-1]
+ lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld], esi
+ jne MonExitRetryHelper
+ pop esi
+ test eax,0FFFFFFFEh
+ jne MonExitMustSignal
+
+ ret
+
+MonExitMustSignal:
+ mov ARGUMENT_REG1, ARGUMENT_REG2
+ jmp JITutil_MonSignal
+
+MonExitRetryHelper:
+ jmp MonExitRetry
+
+@JIT_MonExitWorker@4 endp
+
+;**********************************************************************
+; This is a frameless helper for entering a static monitor on a class.
+; The methoddesc is in ARGUMENT_REG1. This tries the normal case (no
+; blocking or object allocation) in line and calls a framed helper
+; for the other cases.
+; Note we are changing the methoddesc parameter to a pointer to the
+; AwareLock.
+; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined
+; to make sure you don't break the non-debug build. This is very fragile code.
+; Also, propagate the changes to jithelp.s which contains the same helper and assembly code
+; (in AT&T syntax) for gnu assembler.
+@JIT_MonEnterStatic@4 proc public
+ ; We need another scratch register for what follows, so save EBX now so
+ ; we can use it for that purpose.
+ push ebx
+
+ ; Attempt to acquire the lock
+MonEnterStaticRetry:
+ mov eax, [ARGUMENT_REG1+AwareLock_m_MonitorHeld]
+ test eax,eax
+ jne MonEnterStaticHaveWaiters
+
+ ; Common case, lock isn't held and there are no waiters. Attempt to
+ ; gain ownership ourselves.
+ mov ebx,1
+ lock cmpxchg [ARGUMENT_REG1+AwareLock_m_MonitorHeld],ebx
+ jnz MonEnterStaticRetryHelper
+
+ pop ebx
+
+ ; Success. Save the thread object in the lock and increment the use count.
+ call _GetThread@0
+ mov [ARGUMENT_REG1+AwareLock_m_HoldingThread], eax
+ inc dword ptr [ARGUMENT_REG1+AwareLock_m_Recursion]
+ inc dword ptr [eax+Thread_m_dwLockCount]
+
+ifdef MON_DEBUG
+ifdef TRACK_SYNC
+ push ARGUMENT_REG1 ; AwareLock
+ push [esp+4] ; return address
+ call EnterSyncHelper
+endif ;TRACK_SYNC
+endif ;MON_DEBUG
+ ret
+
+ ; It's possible to get here with waiters but no lock held, but in this
+ ; case a signal is about to be fired which will wake up a waiter. So
+ ; for fairness sake we should wait too.
+ ; Check first for recursive lock attempts on the same thread.
+MonEnterStaticHaveWaiters:
+ ; Get thread but preserve EAX (contains cached contents of m_MonitorHeld).
+ push eax
+ call _GetThread@0
+ mov ebx,eax
+ pop eax
+
+ ; Is mutex already owned by current thread?
+ cmp [ARGUMENT_REG1+AwareLock_m_HoldingThread],ebx
+ jne MonEnterStaticPrepareToWait
+
+ ; Yes, bump our use count.
+ inc dword ptr [ARGUMENT_REG1+AwareLock_m_Recursion]
+ifdef MON_DEBUG
+ifdef TRACK_SYNC
+ push ARGUMENT_REG1 ; AwareLock
+ push [esp+4] ; return address
+ call EnterSyncHelper
+endif ;TRACK_SYNC
+endif ;MON_DEBUG
+ pop ebx
+ ret
+
+MonEnterStaticPrepareToWait:
+ pop ebx
+
+ ; ARGUMENT_REG1 should have AwareLock. Call contention helper.
+ jmp JITutil_MonContention
+
+MonEnterStaticRetryHelper:
+ jmp MonEnterStaticRetry
+@JIT_MonEnterStatic@4 endp
+
+;**********************************************************************
+; A frameless helper for exiting a static monitor on a class.
+; The methoddesc is in ARGUMENT_REG1. This tries the normal case (no
+; blocking or object allocation) in line and calls a framed helper
+; for the other cases.
+; Note we are changing the methoddesc parameter to a pointer to the
+; AwareLock.
+; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined
+; to make sure you don't break the non-debug build. This is very fragile code.
+; Also, propagate the changes to jithelp.s which contains the same helper and assembly code
+; (in AT&T syntax) for gnu assembler.
+@JIT_MonExitStatic@4 proc public
+
+ifdef MON_DEBUG
+ifdef TRACK_SYNC
+ push ARGUMENT_REG1 ; preserve regs
+
+ push ARGUMENT_REG1 ; AwareLock
+ push [esp+8] ; return address
+ call LeaveSyncHelper
+
+ pop [ARGUMENT_REG1] ; restore regs
+endif ;TRACK_SYNC
+endif ;MON_DEBUG
+
+ ; Check if lock is held.
+ call _GetThread@0
+ cmp [ARGUMENT_REG1+AwareLock_m_HoldingThread],eax
+ jne MonExitStaticLockError
+
+ ; Reduce our recursion count.
+ dec dword ptr [ARGUMENT_REG1+AwareLock_m_Recursion]
+ jz MonExitStaticLastRecursion
+
+ ret
+
+ ; This is the last count we held on this lock, so release the lock.
+MonExitStaticLastRecursion:
+ ; eax must have the thread object
+ dec dword ptr [eax+Thread_m_dwLockCount]
+ mov dword ptr [ARGUMENT_REG1+AwareLock_m_HoldingThread],0
+ push ebx
+
+MonExitStaticRetry:
+ mov eax, [ARGUMENT_REG1+AwareLock_m_MonitorHeld]
+ lea ebx, [eax-1]
+ lock cmpxchg [ARGUMENT_REG1+AwareLock_m_MonitorHeld],ebx
+ jne MonExitStaticRetryHelper
+ pop ebx
+ test eax,0FFFFFFFEh
+ jne MonExitStaticMustSignal
+
+ ret
+
+MonExitStaticMustSignal:
+ jmp JITutil_MonSignal
+
+MonExitStaticRetryHelper:
+ jmp MonExitStaticRetry
+ ; Throw a synchronization lock exception.
+MonExitStaticLockError:
+ mov ARGUMENT_REG1, CORINFO_SynchronizationLockException_ASM
+ jmp JIT_InternalThrow
+
+@JIT_MonExitStatic@4 endp
+
+; PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code.
+;
+
+_JIT_PatchedCodeStart@0 proc public
+ret
+_JIT_PatchedCodeStart@0 endp
+
+;
+; Optimized TLS getters
+;
+
+ ALIGN 4
+
+ifndef FEATURE_IMPLICIT_TLS
+_GetThread@0 proc public
+ ; This will be overwritten at runtime with optimized GetThread implementation
+ jmp short _GetTLSDummy@0
+ ; Just allocate space that will be filled in at runtime
+ db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh)
+_GetThread@0 endp
+
+ ALIGN 4
+
+_GetAppDomain@0 proc public
+ ; This will be overwritten at runtime with optimized GetAppDomain implementation
+ jmp short _GetTLSDummy@0
+ ; Just allocate space that will be filled in at runtime
+ db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh)
+_GetAppDomain@0 endp
+
+_GetTLSDummy@0 proc public
+ xor eax,eax
+ ret
+_GetTLSDummy@0 endp
+
+ ALIGN 4
+
+_ClrFlsGetBlock@0 proc public
+ ; This will be overwritten at runtime with optimized ClrFlsGetBlock implementation
+ jmp short _GetTLSDummy@0
+ ; Just allocate space that will be filled in at runtime
+ db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh)
+_ClrFlsGetBlock@0 endp
+endif
+
+;**********************************************************************
+; Write barriers generated at runtime
+
+PUBLIC _JIT_PatchedWriteBarrierStart@0
+_JIT_PatchedWriteBarrierStart@0 PROC
+ret
+_JIT_PatchedWriteBarrierStart@0 ENDP
+
+PatchedWriteBarrierHelper MACRO rg
+ ALIGN 8
+PUBLIC _JIT_WriteBarrier&rg&@0
+_JIT_WriteBarrier&rg&@0 PROC
+ ; Just allocate space that will be filled in at runtime
+ db (48) DUP (0CCh)
+_JIT_WriteBarrier&rg&@0 ENDP
+
+ENDM
+
+PatchedWriteBarrierHelper <EAX>
+PatchedWriteBarrierHelper <EBX>
+PatchedWriteBarrierHelper <ECX>
+PatchedWriteBarrierHelper <ESI>
+PatchedWriteBarrierHelper <EDI>
+PatchedWriteBarrierHelper <EBP>
+
+PUBLIC _JIT_PatchedWriteBarrierLast@0
+_JIT_PatchedWriteBarrierLast@0 PROC
+ret
+_JIT_PatchedWriteBarrierLast@0 ENDP
+
+;**********************************************************************
+; PrecodeRemotingThunk is patched at runtime to activate it
+ifdef FEATURE_REMOTING
+ ALIGN 16
+_PrecodeRemotingThunk@0 proc public
+
+ ret ; This is going to be patched to "test ecx,ecx"
+ nop
+
+ jz RemotingDone ; predicted not taken
+
+ cmp dword ptr [ecx],11111111h ; This is going to be patched to address of the transparent proxy
+ je RemotingCheck ; predicted not taken
+
+RemotingDone:
+ ret
+
+RemotingCheck:
+ push eax ; save method desc
+ mov eax, dword ptr [ecx + TransparentProxyObject___stubData]
+ call [ecx + TransparentProxyObject___stub]
+ test eax, eax
+ jnz RemotingCtxMismatch
+ mov eax, [esp]
+ mov ax, [eax + MethodDesc_m_wFlags]
+ and ax, MethodDesc_mdcClassification
+ cmp ax, MethodDesc_mcComInterop
+ je ComPlusCall
+ pop eax ; throw away method desc
+ jmp RemotingDone
+
+RemotingCtxMismatch:
+ pop eax ; restore method desc
+ add esp, 4 ; pop return address into the precode
+ jmp _TransparentProxyStub_CrossContext@0
+
+ComPlusCall:
+ pop eax ; restore method desc
+ mov [esp],eax ; replace return address into the precode with method desc (argument for TP stub)
+ jmp _InContextTPQuickDispatchAsmStub@0
+
+_PrecodeRemotingThunk@0 endp
+endif ; FEATURE_REMOTING
+
+_JIT_PatchedCodeLast@0 proc public
+ret
+_JIT_PatchedCodeLast@0 endp
+
+; This is the first function outside the "keep together range". Used by BBT scripts.
+_JIT_PatchedCodeEnd@0 proc public
+ret
+_JIT_PatchedCodeEnd@0 endp
+
+; This is the ASM portion of JIT_IsInstanceOfInterface. For all the bizarre cases, it quickly
+; fails and falls back on the JITutil_IsInstanceOfAny helper. So all failure cases take
+; the slow path, too.
+;
+; ARGUMENT_REG1 = array or interface to check for.
+; ARGUMENT_REG2 = instance to be cast.
+
+ ALIGN 16
+PUBLIC @JIT_IsInstanceOfInterface@8
+@JIT_IsInstanceOfInterface@8 PROC
+ test ARGUMENT_REG2, ARGUMENT_REG2
+ jz IsNullInst
+
+ mov eax, [ARGUMENT_REG2] ; get MethodTable
+
+ push ebx
+ push esi
+ movzx ebx, word ptr [eax+MethodTable_m_wNumInterfaces]
+
+ ; check if this MT implements any interfaces
+ test ebx, ebx
+ jz IsInstanceOfInterfaceDoBizarre
+
+ ; move Interface map ptr into eax
+ mov eax, [eax+MethodTable_m_pInterfaceMap]
+
+IsInstanceOfInterfaceTop:
+ ; eax -> current InterfaceInfo_t entry in interface map list
+ifdef FEATURE_PREJIT
+ mov esi, [eax]
+ test esi, 1
+ ; Move the deference out of line so that this jump is correctly predicted for the case
+ ; when there is no indirection
+ jnz IsInstanceOfInterfaceIndir
+ cmp ARGUMENT_REG1, esi
+else
+ cmp ARGUMENT_REG1, [eax]
+endif
+ je IsInstanceOfInterfaceFound
+
+IsInstanceOfInterfaceNext:
+ add eax, SIZEOF_InterfaceInfo_t
+ dec ebx
+ jnz IsInstanceOfInterfaceTop
+
+ ; fall through to DoBizarre
+
+IsInstanceOfInterfaceDoBizarre:
+ pop esi
+ pop ebx
+ mov eax, [ARGUMENT_REG2] ; get MethodTable
+ test dword ptr [eax+MethodTable_m_dwFlags], NonTrivialInterfaceCastFlags
+ jnz IsInstanceOfInterfaceNonTrivialCast
+
+IsNullInst:
+ xor eax,eax
+ ret
+
+ifdef FEATURE_PREJIT
+IsInstanceOfInterfaceIndir:
+ cmp ARGUMENT_REG1,[esi-1]
+ jne IsInstanceOfInterfaceNext
+endif
+
+IsInstanceOfInterfaceFound:
+ pop esi
+ pop ebx
+ mov eax, ARGUMENT_REG2 ; the successful instance
+ ret
+
+IsInstanceOfInterfaceNonTrivialCast:
+ jmp @JITutil_IsInstanceOfInterface@8
+
+@JIT_IsInstanceOfInterface@8 endp
+
+; This is the ASM portion of JIT_ChkCastInterface. For all the bizarre cases, it quickly
+; fails and falls back on the JITutil_ChkCastAny helper. So all failure cases take
+; the slow path, too.
+;
+; ARGUMENT_REG1 = array or interface to check for.
+; ARGUMENT_REG2 = instance to be cast.
+
+ ALIGN 16
+PUBLIC @JIT_ChkCastInterface@8
+@JIT_ChkCastInterface@8 PROC
+ test ARGUMENT_REG2, ARGUMENT_REG2
+ jz ChkCastInterfaceIsNullInst
+
+ mov eax, [ARGUMENT_REG2] ; get MethodTable
+
+ push ebx
+ push esi
+ movzx ebx, word ptr [eax+MethodTable_m_wNumInterfaces]
+
+ ; speculatively move Interface map ptr into eax
+ mov eax, [eax+MethodTable_m_pInterfaceMap]
+
+ ; check if this MT implements any interfaces
+ test ebx, ebx
+ jz ChkCastInterfaceDoBizarre
+
+ChkCastInterfaceTop:
+ ; eax -> current InterfaceInfo_t entry in interface map list
+ifdef FEATURE_PREJIT
+ mov esi, [eax]
+ test esi, 1
+ ; Move the deference out of line so that this jump is correctly predicted for the case
+ ; when there is no indirection
+ jnz ChkCastInterfaceIndir
+ cmp ARGUMENT_REG1, esi
+else
+ cmp ARGUMENT_REG1, [eax]
+endif
+ je ChkCastInterfaceFound
+
+ChkCastInterfaceNext:
+ add eax, SIZEOF_InterfaceInfo_t
+ dec ebx
+ jnz ChkCastInterfaceTop
+
+ ; fall through to DoBizarre
+
+ChkCastInterfaceDoBizarre:
+ pop esi
+ pop ebx
+ jmp @JITutil_ChkCastInterface@8
+
+ifdef FEATURE_PREJIT
+ChkCastInterfaceIndir:
+ cmp ARGUMENT_REG1,[esi-1]
+ jne ChkCastInterfaceNext
+endif
+
+ChkCastInterfaceFound:
+ pop esi
+ pop ebx
+
+ChkCastInterfaceIsNullInst:
+ mov eax, ARGUMENT_REG2 ; either null, or the successful instance
+ ret
+
+@JIT_ChkCastInterface@8 endp
+
+ end
diff --git a/src/vm/i386/jitinterfacex86.cpp b/src/vm/i386/jitinterfacex86.cpp
new file mode 100644
index 0000000000..949b115ce2
--- /dev/null
+++ b/src/vm/i386/jitinterfacex86.cpp
@@ -0,0 +1,1922 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// ===========================================================================
+// File: JITinterfaceX86.CPP
+//
+// ===========================================================================
+
+// This contains JITinterface routines that are tailored for
+// X86 platforms. Non-X86 versions of these can be found in
+// JITinterfaceGen.cpp
+
+
+#include "common.h"
+#include "jitinterface.h"
+#include "eeconfig.h"
+#include "excep.h"
+#include "comdelegate.h"
+#ifdef FEATURE_REMOTING
+#include "remoting.h" // create context bound and remote class instances
+#endif
+#include "field.h"
+#include "ecall.h"
+#include "asmconstants.h"
+#include "virtualcallstub.h"
+#include "eventtrace.h"
+#include "threadsuspend.h"
+
+#if defined(_DEBUG) && !defined (WRITE_BARRIER_CHECK)
+#define WRITE_BARRIER_CHECK 1
+#endif
+
+// To test with MON_DEBUG off, comment out the following line. DO NOT simply define
+// to be 0 as the checks are for #ifdef not #if 0.
+//
+#ifdef _DEBUG
+#define MON_DEBUG 1
+#endif
+
+class generation;
+extern "C" generation generation_table[];
+
+extern "C" void STDCALL JIT_WriteBarrierReg_PreGrow();// JIThelp.asm/JIThelp.s
+extern "C" void STDCALL JIT_WriteBarrierReg_PostGrow();// JIThelp.asm/JIThelp.s
+
+#ifdef _DEBUG
+extern "C" void STDCALL WriteBarrierAssert(BYTE* ptr, Object* obj)
+{
+ STATIC_CONTRACT_SO_TOLERANT;
+ WRAPPER_NO_CONTRACT;
+
+ static BOOL fVerifyHeap = -1;
+
+ if (fVerifyHeap == -1)
+ fVerifyHeap = g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC;
+
+ if (fVerifyHeap)
+ {
+ obj->Validate(FALSE);
+ if(GCHeap::GetGCHeap()->IsHeapPointer(ptr))
+ {
+ Object* pObj = *(Object**)ptr;
+ _ASSERTE (pObj == NULL || GCHeap::GetGCHeap()->IsHeapPointer(pObj));
+ }
+ }
+ else
+ {
+ _ASSERTE((g_lowest_address <= ptr && ptr < g_highest_address) ||
+ ((size_t)ptr < MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT));
+ }
+}
+
+#endif // _DEBUG
+
+/****************************************************************************/
+/* assigns 'val to 'array[idx], after doing all the proper checks */
+
+/* note that we can do almost as well in portable code, but this
+ squezes the last little bit of perf out */
+
+__declspec(naked) void F_CALL_CONV JIT_Stelem_Ref(PtrArray* array, unsigned idx, Object* val)
+{
+ STATIC_CONTRACT_SO_TOLERANT;
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+
+ enum { CanCast = TypeHandle::CanCast,
+#if CHECK_APP_DOMAIN_LEAKS
+ EEClassFlags = EEClass::AUXFLAG_APP_DOMAIN_AGILE |
+ EEClass::AUXFLAG_CHECK_APP_DOMAIN_AGILE,
+#endif // CHECK_APP_DOMAIN_LEAKS
+ };
+
+ __asm {
+ mov EAX, [ESP+4] // EAX = val
+
+ test ECX, ECX
+ je ThrowNullReferenceException
+
+ cmp EDX, [ECX+4]; // test if in bounds
+ jae ThrowIndexOutOfRangeException
+
+ test EAX, EAX
+ jz Assigning0
+
+#if CHECK_APP_DOMAIN_LEAKS
+ mov EAX,[g_pConfig]
+ movzx EAX, [EAX]EEConfig.fAppDomainLeaks;
+ test EAX, EAX
+ jz NoCheck
+ // Check if the instance is agile or check agile
+ mov EAX, [ECX]
+ mov EAX, [EAX]MethodTable.m_ElementTypeHnd
+ test EAX, 2 // Check for non-MT
+ jnz NoCheck
+ // Check VMflags of element type
+ mov EAX, [EAX]MethodTable.m_pEEClass
+ mov EAX, dword ptr [EAX]EEClass.m_wAuxFlags
+ test EAX, EEClassFlags
+ jnz NeedFrame // Jump to the generic case so we can do an app domain check
+ NoCheck:
+ mov EAX, [ESP+4] // EAX = val
+#endif // CHECK_APP_DOMAIN_LEAKS
+
+ push EDX
+ mov EDX, [ECX]
+ mov EDX, [EDX]MethodTable.m_ElementTypeHnd
+
+ cmp EDX, [EAX] // do we have an exact match
+ jne NotExactMatch
+
+DoWrite2:
+ pop EDX
+ lea EDX, [ECX + 4*EDX + 8]
+ call JIT_WriteBarrierEAX
+ ret 4
+
+Assigning0:
+ // write barrier is not necessary for assignment of NULL references
+ mov [ECX + 4*EDX + 8], EAX
+ ret 4
+
+DoWrite:
+ mov EAX, [ESP+4] // EAX = val
+ lea EDX, [ECX + 4*EDX + 8]
+ call JIT_WriteBarrierEAX
+ ret 4
+
+NotExactMatch:
+ cmp EDX, [g_pObjectClass] // are we assigning to Array of objects
+ je DoWrite2
+
+ // push EDX // caller-save ECX and EDX
+ push ECX
+
+ push EDX // element type handle
+ push EAX // object
+
+ call ObjIsInstanceOfNoGC
+
+ pop ECX // caller-restore ECX and EDX
+ pop EDX
+
+ cmp EAX, CanCast
+ je DoWrite
+
+#if CHECK_APP_DOMAIN_LEAKS
+NeedFrame:
+#endif
+ // Call the helper that knows how to erect a frame
+ push EDX
+ push ECX
+
+ lea ECX, [ESP+8+4] // ECX = address of object being stored
+ lea EDX, [ESP] // EDX = address of array
+
+ call ArrayStoreCheck
+
+ pop ECX // these might have been updated!
+ pop EDX
+
+ cmp EAX, EAX // set zero flag
+ jnz Epilog // This jump never happens, it keeps the epilog walker happy
+
+ jmp DoWrite
+
+ThrowNullReferenceException:
+ mov ECX, CORINFO_NullReferenceException
+ jmp Throw
+
+ThrowIndexOutOfRangeException:
+ mov ECX, CORINFO_IndexOutOfRangeException
+
+Throw:
+ call JIT_InternalThrowFromHelper
+Epilog:
+ ret 4
+ }
+}
+
+extern "C" __declspec(naked) Object* F_CALL_CONV JIT_IsInstanceOfClass(MethodTable *pMT, Object *pObject)
+{
+ STATIC_CONTRACT_SO_TOLERANT;
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+
+#if defined(FEATURE_TYPEEQUIVALENCE) || defined(FEATURE_REMOTING)
+ enum
+ {
+ MTEquivalenceFlags = MethodTable::public_enum_flag_HasTypeEquivalence,
+ };
+#endif
+
+ __asm
+ {
+ // Check if the instance is NULL
+ test ARGUMENT_REG2, ARGUMENT_REG2
+ je ReturnInst
+
+ // Get the method table for the instance.
+ mov eax, dword ptr [ARGUMENT_REG2]
+
+ // Check if they are the same.
+ cmp eax, ARGUMENT_REG1
+ jne CheckParent
+
+ ReturnInst:
+ // We matched the class.
+ mov eax, ARGUMENT_REG2
+ ret
+
+ // Check if the parent class matches.
+ CheckParent:
+ mov eax, dword ptr [eax]MethodTable.m_pParentMethodTable
+ cmp eax, ARGUMENT_REG1
+ je ReturnInst
+
+ // Check if we hit the top of the hierarchy.
+ test eax, eax
+ jne CheckParent
+
+ // Check if the instance is a proxy.
+#if defined(FEATURE_TYPEEQUIVALENCE) || defined(FEATURE_REMOTING)
+ mov eax, [ARGUMENT_REG2]
+ test dword ptr [eax]MethodTable.m_dwFlags, MTEquivalenceFlags
+ jne SlowPath
+#endif
+ // It didn't match and it isn't a proxy and it doesn't have type equivalence
+ xor eax, eax
+ ret
+
+ // Cast didn't match, so try the worker to check for the proxy/equivalence case.
+#if defined(FEATURE_TYPEEQUIVALENCE) || defined(FEATURE_REMOTING)
+ SlowPath:
+ jmp JITutil_IsInstanceOfAny
+#endif
+ }
+}
+
+extern "C" __declspec(naked) Object* F_CALL_CONV JIT_ChkCastClass(MethodTable *pMT, Object *pObject)
+{
+ STATIC_CONTRACT_SO_TOLERANT;
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+
+ __asm
+ {
+ // Check if the instance is NULL
+ test ARGUMENT_REG2, ARGUMENT_REG2
+ je ReturnInst
+
+ // Get the method table for the instance.
+ mov eax, dword ptr [ARGUMENT_REG2]
+
+ // Check if they are the same.
+ cmp eax, ARGUMENT_REG1
+ jne CheckParent
+
+ ReturnInst:
+ // We matched the class.
+ mov eax, ARGUMENT_REG2
+ ret
+
+ // Check if the parent class matches.
+ CheckParent:
+ mov eax, dword ptr [eax]MethodTable.m_pParentMethodTable
+ cmp eax, ARGUMENT_REG1
+ je ReturnInst
+
+ // Check if we hit the top of the hierarchy.
+ test eax, eax
+ jne CheckParent
+
+ // Call out to JITutil_ChkCastAny to handle the proxy case and throw a rich
+ // InvalidCastException in case of failure.
+ jmp JITutil_ChkCastAny
+ }
+}
+
+extern "C" __declspec(naked) Object* F_CALL_CONV JIT_ChkCastClassSpecial(MethodTable *pMT, Object *pObject)
+{
+ STATIC_CONTRACT_SO_TOLERANT;
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+
+ // Assumes that the check for the trivial cases has been inlined by the JIT.
+
+ __asm
+ {
+ // Get the method table for the instance.
+ mov eax, dword ptr [ARGUMENT_REG2]
+
+ // Check if the parent class matches.
+ CheckParent:
+ mov eax, dword ptr [eax]MethodTable.m_pParentMethodTable
+ cmp eax, ARGUMENT_REG1
+ jne CheckNull
+
+ // We matched the class.
+ mov eax, ARGUMENT_REG2
+ ret
+
+ CheckNull:
+ // Check if we hit the top of the hierarchy.
+ test eax, eax
+ jne CheckParent
+
+ // Call out to JITutil_ChkCastAny to handle the proxy case and throw a rich
+ // InvalidCastException in case of failure.
+ jmp JITutil_ChkCastAny
+ }
+}
+
+HCIMPL1_V(INT32, JIT_Dbl2IntOvf, double val)
+{
+ FCALL_CONTRACT;
+
+ INT64 ret = HCCALL1_V(JIT_Dbl2Lng, val);
+
+ if (ret != (INT32) ret)
+ goto THROW;
+
+ return (INT32) ret;
+
+THROW:
+ FCThrow(kOverflowException);
+}
+HCIMPLEND
+
+
+FCDECL1(Object*, JIT_New, CORINFO_CLASS_HANDLE typeHnd_);
+
+#ifdef FEATURE_REMOTING
+HCIMPL1(Object*, JIT_NewCrossContextHelper, CORINFO_CLASS_HANDLE typeHnd_)
+{
+ CONTRACTL
+ {
+ FCALL_CHECK;
+ }
+ CONTRACTL_END;
+
+ TypeHandle typeHnd(typeHnd_);
+
+ OBJECTREF newobj = NULL;
+ HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame
+
+ _ASSERTE(!typeHnd.IsTypeDesc()); // we never use this helper for arrays
+ MethodTable *pMT = typeHnd.AsMethodTable();
+ pMT->CheckRestore();
+
+ // Remoting services determines if the current context is appropriate
+ // for activation. If the current context is OK then it creates an object
+ // else it creates a proxy.
+ // Note: 3/20/03 Added fIsNewObj flag to indicate that CreateProxyOrObject
+ // is being called from Jit_NewObj ... the fIsCom flag is FALSE by default -
+ // which used to be the case before this change as well.
+ newobj = CRemotingServices::CreateProxyOrObject(pMT,FALSE /*fIsCom*/,TRUE/*fIsNewObj*/);
+
+ HELPER_METHOD_FRAME_END();
+ return(OBJECTREFToObject(newobj));
+}
+HCIMPLEND
+#endif // FEATURE_REMOTING
+
+HCIMPL1(Object*, AllocObjectWrapper, MethodTable *pMT)
+{
+ CONTRACTL
+ {
+ FCALL_CHECK;
+ }
+ CONTRACTL_END;
+
+ OBJECTREF newObj = NULL;
+ HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame
+ newObj = AllocateObject(pMT);
+ HELPER_METHOD_FRAME_END();
+ return OBJECTREFToObject(newObj);
+}
+HCIMPLEND
+
+/*********************************************************************/
+// This is a frameless helper for allocating an object whose type derives
+// from marshalbyref. We check quickly to see if it is configured to
+// have remote activation. If not, we use the superfast allocator to
+// allocate the object. Otherwise, we take the slow path of allocating
+// the object via remoting services.
+#ifdef FEATURE_REMOTING
+__declspec(naked) Object* F_CALL_CONV JIT_NewCrossContext(CORINFO_CLASS_HANDLE typeHnd_)
+{
+ STATIC_CONTRACT_SO_TOLERANT;
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_TRIGGERS;
+
+ _asm
+ {
+ // Check if remoting has been configured
+ push ARGUMENT_REG1 // save registers
+ push ARGUMENT_REG1
+ call CRemotingServices::RequiresManagedActivation
+ test eax, eax
+ // Jump to the slow path
+ jne SpecialOrXCtxHelper
+#ifdef _DEBUG
+ push LL_INFO10
+ push LF_GCALLOC
+ call LoggingOn
+ test eax, eax
+ jne AllocWithLogHelper
+#endif // _DEBUG
+
+ // if the object doesn't have a finalizer and the size is small, jump to super fast asm helper
+ mov ARGUMENT_REG1, [esp]
+ call MethodTable::CannotUseSuperFastHelper
+ test eax, eax
+ jne FastHelper
+
+ pop ARGUMENT_REG1
+ // Jump to the super fast helper
+ jmp dword ptr [hlpDynamicFuncTable + DYNAMIC_CORINFO_HELP_NEWSFAST * SIZE VMHELPDEF]VMHELPDEF.pfnHelper
+
+FastHelper:
+ pop ARGUMENT_REG1
+ // Jump to the helper
+ jmp JIT_New
+
+SpecialOrXCtxHelper:
+#ifdef FEATURE_COMINTEROP
+ test eax, ComObjectType
+ jz XCtxHelper
+ pop ARGUMENT_REG1
+ // Jump to the helper
+ jmp JIT_New
+
+XCtxHelper:
+#endif // FEATURE_COMINTEROP
+
+ pop ARGUMENT_REG1
+ // Jump to the helper
+ jmp JIT_NewCrossContextHelper
+
+#ifdef _DEBUG
+AllocWithLogHelper:
+ pop ARGUMENT_REG1
+ // Jump to the helper
+ jmp AllocObjectWrapper
+#endif // _DEBUG
+ }
+}
+#endif // FEATURE_REMOTING
+
+
+/*********************************************************************/
+extern "C" void* g_TailCallFrameVptr;
+void* g_TailCallFrameVptr;
+
+#ifdef FEATURE_HIJACK
+extern "C" void STDCALL JIT_TailCallHelper(Thread * pThread);
+void STDCALL JIT_TailCallHelper(Thread * pThread)
+{
+ CONTRACTL {
+ NOTHROW;
+ GC_NOTRIGGER;
+ SO_TOLERANT;
+ } CONTRACTL_END;
+
+ pThread->UnhijackThread();
+}
+#endif // FEATURE_HIJACK
+
+#if CHECK_APP_DOMAIN_LEAKS
+HCIMPL1(void *, SetObjectAppDomain, Object *pObject)
+{
+ FCALL_CONTRACT;
+ DEBUG_ONLY_FUNCTION;
+
+ HELPER_METHOD_FRAME_BEGIN_RET_ATTRIB_NOPOLL(Frame::FRAME_ATTR_CAPTURE_DEPTH_2|Frame::FRAME_ATTR_EXACT_DEPTH|Frame::FRAME_ATTR_NO_THREAD_ABORT);
+ pObject->SetAppDomain();
+ HELPER_METHOD_FRAME_END();
+
+ return pObject;
+}
+HCIMPLEND
+#endif // CHECK_APP_DOMAIN_LEAKS
+
+ // emit code that adds MIN_OBJECT_SIZE to reg if reg is unaligned thus making it aligned
+void JIT_TrialAlloc::EmitAlignmentRoundup(CPUSTUBLINKER *psl, X86Reg testAlignReg, X86Reg adjReg, Flags flags)
+{
+ STANDARD_VM_CONTRACT;
+
+ _ASSERTE((MIN_OBJECT_SIZE & 7) == 4); // want to change alignment
+
+ CodeLabel *AlreadyAligned = psl->NewCodeLabel();
+
+ // test reg, 7
+ psl->Emit16(0xC0F7 | (static_cast<unsigned short>(testAlignReg) << 8));
+ psl->Emit32(0x7);
+
+ // jz alreadyAligned
+ if (flags & ALIGN8OBJ)
+ {
+ psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJNZ);
+ }
+ else
+ {
+ psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJZ);
+ }
+
+ psl->X86EmitAddReg(adjReg, MIN_OBJECT_SIZE);
+ // AlreadyAligned:
+ psl->EmitLabel(AlreadyAligned);
+}
+
+ // if 'reg' is unaligned, then set the dummy object at EAX and increment EAX past
+ // the dummy object
+void JIT_TrialAlloc::EmitDummyObject(CPUSTUBLINKER *psl, X86Reg alignTestReg, Flags flags)
+{
+ STANDARD_VM_CONTRACT;
+
+ CodeLabel *AlreadyAligned = psl->NewCodeLabel();
+
+ // test reg, 7
+ psl->Emit16(0xC0F7 | (static_cast<unsigned short>(alignTestReg) << 8));
+ psl->Emit32(0x7);
+
+ // jz alreadyAligned
+ if (flags & ALIGN8OBJ)
+ {
+ psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJNZ);
+ }
+ else
+ {
+ psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJZ);
+ }
+
+ // Make the fake object
+ // mov EDX, [g_pObjectClass]
+ psl->Emit16(0x158B);
+ psl->Emit32((int)(size_t)&g_pObjectClass);
+
+ // mov [EAX], EDX
+ psl->X86EmitOffsetModRM(0x89, kEDX, kEAX, 0);
+
+#if CHECK_APP_DOMAIN_LEAKS
+ EmitSetAppDomain(psl);
+#endif
+
+ // add EAX, MIN_OBJECT_SIZE
+ psl->X86EmitAddReg(kEAX, MIN_OBJECT_SIZE);
+
+ // AlreadyAligned:
+ psl->EmitLabel(AlreadyAligned);
+}
+
+void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *noAlloc, Flags flags)
+{
+ STANDARD_VM_CONTRACT;
+
+ // Upon entry here, ecx contains the method we are to try allocate memory for
+ // Upon exit, eax contains the allocated memory, edx is trashed, and ecx undisturbed
+
+ if (flags & MP_ALLOCATOR)
+ {
+ if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ))
+ {
+ if (flags & ALIGN8OBJ)
+ {
+ // mov eax, [ecx]MethodTable.m_BaseSize
+ psl->X86EmitIndexRegLoad(kEAX, kECX, offsetof(MethodTable, m_BaseSize));
+ }
+
+ psl->X86EmitPushReg(kEBX); // we need a spare register
+ }
+ else
+ {
+ // mov eax, [ecx]MethodTable.m_BaseSize
+ psl->X86EmitIndexRegLoad(kEAX, kECX, offsetof(MethodTable, m_BaseSize));
+ }
+
+ assert( ((flags & ALIGN8)==0 || // EAX loaded by else statement
+ (flags & SIZE_IN_EAX) || // EAX already comes filled out
+ (flags & ALIGN8OBJ) ) // EAX loaded in the if (flags & ALIGN8OBJ) statement
+ && "EAX should contain size for allocation and it doesnt!!!");
+
+ // Fetch current thread into EDX, preserving EAX and ECX
+ psl->X86EmitCurrentThreadFetch(kEDX, (1<<kEAX)|(1<<kECX));
+
+ // Try the allocation.
+
+
+ if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ))
+ {
+ // MOV EBX, [edx]Thread.m_alloc_context.alloc_ptr
+ psl->X86EmitOffsetModRM(0x8B, kEBX, kEDX, offsetof(Thread, m_alloc_context) + offsetof(alloc_context, alloc_ptr));
+ // add EAX, EBX
+ psl->Emit16(0xC303);
+ if (flags & ALIGN8)
+ EmitAlignmentRoundup(psl, kEBX, kEAX, flags); // bump EAX up size by 12 if EBX unaligned (so that we are aligned)
+ }
+ else
+ {
+ // add eax, [edx]Thread.m_alloc_context.alloc_ptr
+ psl->X86EmitOffsetModRM(0x03, kEAX, kEDX, offsetof(Thread, m_alloc_context) + offsetof(alloc_context, alloc_ptr));
+ }
+
+ // cmp eax, [edx]Thread.m_alloc_context.alloc_limit
+ psl->X86EmitOffsetModRM(0x3b, kEAX, kEDX, offsetof(Thread, m_alloc_context) + offsetof(alloc_context, alloc_limit));
+
+ // ja noAlloc
+ psl->X86EmitCondJump(noAlloc, X86CondCode::kJA);
+
+ // Fill in the allocation and get out.
+
+ // mov [edx]Thread.m_alloc_context.alloc_ptr, eax
+ psl->X86EmitIndexRegStore(kEDX, offsetof(Thread, m_alloc_context) + offsetof(alloc_context, alloc_ptr), kEAX);
+
+ if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ))
+ {
+ // mov EAX, EBX
+ psl->Emit16(0xC38B);
+ // pop EBX
+ psl->X86EmitPopReg(kEBX);
+
+ if (flags & ALIGN8)
+ EmitDummyObject(psl, kEAX, flags);
+ }
+ else
+ {
+ // sub eax, [ecx]MethodTable.m_BaseSize
+ psl->X86EmitOffsetModRM(0x2b, kEAX, kECX, offsetof(MethodTable, m_BaseSize));
+ }
+
+ // mov dword ptr [eax], ecx
+ psl->X86EmitIndexRegStore(kEAX, 0, kECX);
+ }
+ else
+ {
+ // Take the GC lock (there is no lock prefix required - we will use JIT_TrialAllocSFastMP on an MP System).
+ // inc dword ptr [m_GCLock]
+ psl->Emit16(0x05ff);
+ psl->Emit32((int)(size_t)&m_GCLock);
+
+ // jnz NoLock
+ psl->X86EmitCondJump(noLock, X86CondCode::kJNZ);
+
+ if (flags & SIZE_IN_EAX)
+ {
+ // mov edx, eax
+ psl->Emit16(0xd08b);
+ }
+ else
+ {
+ // mov edx, [ecx]MethodTable.m_BaseSize
+ psl->X86EmitIndexRegLoad(kEDX, kECX, offsetof(MethodTable, m_BaseSize));
+ }
+
+ // mov eax, dword ptr [generation_table]
+ psl->Emit8(0xA1);
+ psl->Emit32((int)(size_t)&generation_table);
+
+ // Try the allocation.
+ // add edx, eax
+ psl->Emit16(0xd003);
+
+ if (flags & (ALIGN8 | ALIGN8OBJ))
+ EmitAlignmentRoundup(psl, kEAX, kEDX, flags); // bump up EDX size by 12 if EAX unaligned (so that we are aligned)
+
+ // cmp edx, dword ptr [generation_table+4]
+ psl->Emit16(0x153b);
+ psl->Emit32((int)(size_t)&generation_table + 4);
+
+ // ja noAlloc
+ psl->X86EmitCondJump(noAlloc, X86CondCode::kJA);
+
+ // Fill in the allocation and get out.
+ // mov dword ptr [generation_table], edx
+ psl->Emit16(0x1589);
+ psl->Emit32((int)(size_t)&generation_table);
+
+ if (flags & (ALIGN8 | ALIGN8OBJ))
+ EmitDummyObject(psl, kEAX, flags);
+
+ // mov dword ptr [eax], ecx
+ psl->X86EmitIndexRegStore(kEAX, 0, kECX);
+
+ // mov dword ptr [m_GCLock], 0FFFFFFFFh
+ psl->Emit16(0x05C7);
+ psl->Emit32((int)(size_t)&m_GCLock);
+ psl->Emit32(0xFFFFFFFF);
+ }
+
+
+#ifdef INCREMENTAL_MEMCLR
+ // <TODO>We're planning to get rid of this anyhow according to Patrick</TODO>
+ _ASSERTE(!"NYI");
+#endif // INCREMENTAL_MEMCLR
+}
+
+#if CHECK_APP_DOMAIN_LEAKS
+void JIT_TrialAlloc::EmitSetAppDomain(CPUSTUBLINKER *psl)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (!g_pConfig->AppDomainLeaks())
+ return;
+
+ // At both entry & exit, eax contains the allocated object.
+ // ecx is preserved, edx is not.
+
+ //
+ // Add in a call to SetAppDomain. (Note that this
+ // probably would have been easier to implement by just not using
+ // the generated helpers in a checked build, but we'd lose code
+ // coverage that way.)
+ //
+
+ // Save ECX over function call
+ psl->X86EmitPushReg(kECX);
+
+ // mov object to ECX
+ // mov ecx, eax
+ psl->Emit16(0xc88b);
+
+ // SetObjectAppDomain pops its arg & returns object in EAX
+ psl->X86EmitCall(psl->NewExternalCodeLabel((LPVOID)SetObjectAppDomain), 4);
+
+ psl->X86EmitPopReg(kECX);
+}
+
+#endif // CHECK_APP_DOMAIN_LEAKS
+
+
+void JIT_TrialAlloc::EmitNoAllocCode(CPUSTUBLINKER *psl, Flags flags)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (flags & MP_ALLOCATOR)
+ {
+ if (flags & (ALIGN8|SIZE_IN_EAX))
+ psl->X86EmitPopReg(kEBX);
+ }
+ else
+ {
+ // mov dword ptr [m_GCLock], 0FFFFFFFFh
+ psl->Emit16(0x05c7);
+ psl->Emit32((int)(size_t)&m_GCLock);
+ psl->Emit32(0xFFFFFFFF);
+ }
+}
+
+void *JIT_TrialAlloc::GenAllocSFast(Flags flags)
+{
+ STANDARD_VM_CONTRACT;
+
+ CPUSTUBLINKER sl;
+
+ CodeLabel *noLock = sl.NewCodeLabel();
+ CodeLabel *noAlloc = sl.NewCodeLabel();
+
+ // Emit the main body of the trial allocator, be it SP or MP
+ EmitCore(&sl, noLock, noAlloc, flags);
+
+#if CHECK_APP_DOMAIN_LEAKS
+ EmitSetAppDomain(&sl);
+#endif
+
+ // Here we are at the end of the success case - just emit a ret
+ sl.X86EmitReturn(0);
+
+ // Come here in case of no space
+ sl.EmitLabel(noAlloc);
+
+ // Release the lock in the uniprocessor case
+ EmitNoAllocCode(&sl, flags);
+
+ // Come here in case of failure to get the lock
+ sl.EmitLabel(noLock);
+
+ // Jump to the framed helper
+ sl.X86EmitNearJump(sl.NewExternalCodeLabel((LPVOID)JIT_New));
+
+ Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap());
+
+ return (void *)pStub->GetEntryPoint();
+}
+
+
+void *JIT_TrialAlloc::GenBox(Flags flags)
+{
+ STANDARD_VM_CONTRACT;
+
+ CPUSTUBLINKER sl;
+
+ CodeLabel *noLock = sl.NewCodeLabel();
+ CodeLabel *noAlloc = sl.NewCodeLabel();
+
+ // Save address of value to be boxed
+ sl.X86EmitPushReg(kEBX);
+ sl.Emit16(0xda8b);
+
+ // Save the MethodTable ptr
+ sl.X86EmitPushReg(kECX);
+
+ // mov ecx, [ecx]MethodTable.m_pWriteableData
+ sl.X86EmitOffsetModRM(0x8b, kECX, kECX, offsetof(MethodTable, m_pWriteableData));
+
+ // Check whether the class has not been initialized
+ // test [ecx]MethodTableWriteableData.m_dwFlags,MethodTableWriteableData::enum_flag_Unrestored
+ sl.X86EmitOffsetModRM(0xf7, (X86Reg)0x0, kECX, offsetof(MethodTableWriteableData, m_dwFlags));
+ sl.Emit32(MethodTableWriteableData::enum_flag_Unrestored);
+
+ // Restore the MethodTable ptr in ecx
+ sl.X86EmitPopReg(kECX);
+
+ // jne noAlloc
+ sl.X86EmitCondJump(noAlloc, X86CondCode::kJNE);
+
+ // Emit the main body of the trial allocator
+ EmitCore(&sl, noLock, noAlloc, flags);
+
+#if CHECK_APP_DOMAIN_LEAKS
+ EmitSetAppDomain(&sl);
+#endif
+
+ // Here we are at the end of the success case
+
+ // Check whether the object contains pointers
+ // test [ecx]MethodTable.m_dwFlags,MethodTable::enum_flag_ContainsPointers
+ sl.X86EmitOffsetModRM(0xf7, (X86Reg)0x0, kECX, offsetof(MethodTable, m_dwFlags));
+ sl.Emit32(MethodTable::enum_flag_ContainsPointers);
+
+ CodeLabel *pointerLabel = sl.NewCodeLabel();
+
+ // jne pointerLabel
+ sl.X86EmitCondJump(pointerLabel, X86CondCode::kJNE);
+
+ // We have no pointers - emit a simple inline copy loop
+
+ // mov ecx, [ecx]MethodTable.m_BaseSize
+ sl.X86EmitOffsetModRM(0x8b, kECX, kECX, offsetof(MethodTable, m_BaseSize));
+
+ // sub ecx,12
+ sl.X86EmitSubReg(kECX, 12);
+
+ CodeLabel *loopLabel = sl.NewCodeLabel();
+
+ sl.EmitLabel(loopLabel);
+
+ // mov edx,[ebx+ecx]
+ sl.X86EmitOp(0x8b, kEDX, kEBX, 0, kECX, 1);
+
+ // mov [eax+ecx+4],edx
+ sl.X86EmitOp(0x89, kEDX, kEAX, 4, kECX, 1);
+
+ // sub ecx,4
+ sl.X86EmitSubReg(kECX, 4);
+
+ // jg loopLabel
+ sl.X86EmitCondJump(loopLabel, X86CondCode::kJGE);
+
+ sl.X86EmitPopReg(kEBX);
+
+ sl.X86EmitReturn(0);
+
+ // Arrive at this label if there are pointers in the object
+ sl.EmitLabel(pointerLabel);
+
+ // Do call to CopyValueClassUnchecked(object, data, pMT)
+
+ // Pass pMT (still in ECX)
+ sl.X86EmitPushReg(kECX);
+
+ // Pass data (still in EBX)
+ sl.X86EmitPushReg(kEBX);
+
+ // Save the address of the object just allocated
+ // mov ebx,eax
+ sl.Emit16(0xD88B);
+
+
+ // Pass address of first user byte in the newly allocated object
+ sl.X86EmitAddReg(kEAX, 4);
+ sl.X86EmitPushReg(kEAX);
+
+ // call CopyValueClass
+ sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID) CopyValueClassUnchecked), 12);
+
+ // Restore the address of the newly allocated object and return it.
+ // mov eax,ebx
+ sl.Emit16(0xC38B);
+
+ sl.X86EmitPopReg(kEBX);
+
+ sl.X86EmitReturn(0);
+
+ // Come here in case of no space
+ sl.EmitLabel(noAlloc);
+
+ // Release the lock in the uniprocessor case
+ EmitNoAllocCode(&sl, flags);
+
+ // Come here in case of failure to get the lock
+ sl.EmitLabel(noLock);
+
+ // Restore the address of the value to be boxed
+ // mov edx,ebx
+ sl.Emit16(0xD38B);
+
+ // pop ebx
+ sl.X86EmitPopReg(kEBX);
+
+ // Jump to the slow version of JIT_Box
+ sl.X86EmitNearJump(sl.NewExternalCodeLabel((LPVOID) JIT_Box));
+
+ Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap());
+
+ return (void *)pStub->GetEntryPoint();
+}
+
+
+HCIMPL2_RAW(Object*, UnframedAllocateObjectArray, /*TypeHandle*/PVOID ArrayType, DWORD cElements)
+{
+ // This isn't _really_ an FCALL and therefore shouldn't have the
+ // SO_TOLERANT part of the FCALL_CONTRACT b/c it is not entered
+ // from managed code.
+ CONTRACTL {
+ THROWS;
+ GC_TRIGGERS;
+ MODE_COOPERATIVE;
+ SO_INTOLERANT;
+ } CONTRACTL_END;
+
+ return OBJECTREFToObject(AllocateArrayEx(TypeHandle::FromPtr(ArrayType),
+ (INT32 *)(&cElements),
+ 1,
+ FALSE
+ DEBUG_ARG(FALSE)));
+}
+HCIMPLEND_RAW
+
+
+HCIMPL2_RAW(Object*, UnframedAllocatePrimitiveArray, CorElementType type, DWORD cElements)
+{
+ // This isn't _really_ an FCALL and therefore shouldn't have the
+ // SO_TOLERANT part of the FCALL_CONTRACT b/c it is not entered
+ // from managed code.
+ CONTRACTL {
+ THROWS;
+ GC_TRIGGERS;
+ MODE_COOPERATIVE;
+ SO_INTOLERANT;
+ } CONTRACTL_END;
+
+ return OBJECTREFToObject( AllocatePrimitiveArray(type, cElements, FALSE) );
+}
+HCIMPLEND_RAW
+
+
+void *JIT_TrialAlloc::GenAllocArray(Flags flags)
+{
+ STANDARD_VM_CONTRACT;
+
+ CPUSTUBLINKER sl;
+
+ CodeLabel *noLock = sl.NewCodeLabel();
+ CodeLabel *noAlloc = sl.NewCodeLabel();
+
+ // We were passed a type descriptor in ECX, which contains the (shared)
+ // array method table and the element type.
+
+ // If this is the allocator for use from unmanaged code, ECX contains the
+ // element type descriptor, or the CorElementType.
+
+ // We need to save ECX for later
+
+ // push ecx
+ sl.X86EmitPushReg(kECX);
+
+ // The element count is in EDX - we need to save it for later.
+
+ // push edx
+ sl.X86EmitPushReg(kEDX);
+
+ if (flags & NO_FRAME)
+ {
+ if (flags & OBJ_ARRAY)
+ {
+ // we need to load the true method table from the type desc
+ sl.X86EmitIndexRegLoad(kECX, kECX, offsetof(ArrayTypeDesc,m_TemplateMT)-2);
+ }
+ else
+ {
+ // mov ecx,[g_pPredefinedArrayTypes+ecx*4]
+ sl.Emit8(0x8b);
+ sl.Emit16(0x8d0c);
+ sl.Emit32((int)(size_t)&g_pPredefinedArrayTypes);
+
+ // test ecx,ecx
+ sl.Emit16(0xc985);
+
+ // je noLock
+ sl.X86EmitCondJump(noLock, X86CondCode::kJZ);
+
+ // we need to load the true method table from the type desc
+ sl.X86EmitIndexRegLoad(kECX, kECX, offsetof(ArrayTypeDesc,m_TemplateMT));
+ }
+ }
+ else
+ {
+ // we need to load the true method table from the type desc
+ sl.X86EmitIndexRegLoad(kECX, kECX, offsetof(ArrayTypeDesc,m_TemplateMT)-2);
+
+#ifdef FEATURE_PREJIT
+ CodeLabel *indir = sl.NewCodeLabel();
+
+ // test cl,1
+ sl.Emit16(0xC1F6);
+ sl.Emit8(0x01);
+
+ // je indir
+ sl.X86EmitCondJump(indir, X86CondCode::kJZ);
+
+ // mov ecx, [ecx-1]
+ sl.X86EmitIndexRegLoad(kECX, kECX, -1);
+
+ sl.EmitLabel(indir);
+#endif
+ }
+
+ // Do a conservative check here. This is to avoid doing overflow checks within this function. We'll
+ // still have to do a size check before running through the body of EmitCore. The way we do the check
+ // against the allocation quantum there requires that we not overflow when adding the size to the
+ // current allocation context pointer. There is exactly LARGE_OBJECT_SIZE of headroom there, so do that
+ // check before we EmitCore.
+ //
+ // For reference types, we can just pick the correct value of maxElems and skip the second check.
+ //
+ // By the way, we use 258 as a "slack" value to ensure that we don't overflow because of the size of the
+ // array header or alignment.
+ sl.Emit16(0xfa81);
+
+
+ // The large object heap is 8 byte aligned, so for double arrays we
+ // want to bias toward putting things in the large object heap
+ unsigned maxElems = 0xffff - 256;
+
+ if ((flags & ALIGN8) && g_pConfig->GetDoubleArrayToLargeObjectHeapThreshold() < maxElems)
+ maxElems = g_pConfig->GetDoubleArrayToLargeObjectHeapThreshold();
+ if (flags & OBJ_ARRAY)
+ {
+ //Since we know that the array elements are sizeof(OBJECTREF), set maxElems exactly here (use the
+ //same slack from above.
+ maxElems = min(maxElems, (LARGE_OBJECT_SIZE/sizeof(OBJECTREF)) - 256);
+ }
+ sl.Emit32(maxElems);
+
+
+ // jae noLock - seems tempting to jump to noAlloc, but we haven't taken the lock yet
+ sl.X86EmitCondJump(noLock, X86CondCode::kJAE);
+
+ if (flags & OBJ_ARRAY)
+ {
+ // In this case we know the element size is sizeof(void *), or 4 for x86
+ // This helps us in two ways - we can shift instead of multiplying, and
+ // there's no need to align the size either
+
+ _ASSERTE(sizeof(void *) == 4);
+
+ // mov eax, [ecx]MethodTable.m_BaseSize
+ sl.X86EmitIndexRegLoad(kEAX, kECX, offsetof(MethodTable, m_BaseSize));
+
+ // lea eax, [eax+edx*4]
+ sl.X86EmitOp(0x8d, kEAX, kEAX, 0, kEDX, 4);
+ }
+ else
+ {
+ // movzx eax, [ECX]MethodTable.m_dwFlags /* component size */
+ sl.Emit8(0x0f);
+ sl.X86EmitOffsetModRM(0xb7, kEAX, kECX, offsetof(MethodTable, m_dwFlags /* component size */));
+
+ // mul eax, edx
+ sl.Emit16(0xe2f7);
+
+ // add eax, [ecx]MethodTable.m_BaseSize
+ sl.X86EmitOffsetModRM(0x03, kEAX, kECX, offsetof(MethodTable, m_BaseSize));
+
+ // Since this is an array of value classes, we need an extra compare here to make sure we're still
+ // less than LARGE_OBJECT_SIZE. This is the last bit of arithmetic before we compare against the
+ // allocation context, so do it here.
+
+ // cmp eax, LARGE_OBJECT_SIZE
+ // ja noLock
+ sl.Emit8(0x3d);
+ sl.Emit32(LARGE_OBJECT_SIZE);
+ sl.X86EmitCondJump(noLock, X86CondCode::kJA);
+ }
+
+#if DATA_ALIGNMENT == 4
+ if (flags & OBJ_ARRAY)
+ {
+ // No need for rounding in this case - element size is 4, and m_BaseSize is guaranteed
+ // to be a multiple of 4.
+ }
+ else
+#endif // DATA_ALIGNMENT == 4
+ {
+ // round the size to a multiple of 4
+
+ // add eax, 3
+ sl.X86EmitAddReg(kEAX, (DATA_ALIGNMENT-1));
+
+ // and eax, ~3
+ sl.Emit16(0xe083);
+ sl.Emit8(~(DATA_ALIGNMENT-1));
+ }
+
+ flags = (Flags)(flags | SIZE_IN_EAX);
+
+ // Emit the main body of the trial allocator, be it SP or MP
+ EmitCore(&sl, noLock, noAlloc, flags);
+
+ // Here we are at the end of the success case - store element count
+ // and possibly the element type descriptor and return
+
+ // pop edx - element count
+ sl.X86EmitPopReg(kEDX);
+
+ // pop ecx - array type descriptor
+ sl.X86EmitPopReg(kECX);
+
+ // mov dword ptr [eax]ArrayBase.m_NumComponents, edx
+ sl.X86EmitIndexRegStore(kEAX, offsetof(ArrayBase,m_NumComponents), kEDX);
+
+#if CHECK_APP_DOMAIN_LEAKS
+ EmitSetAppDomain(&sl);
+#endif
+
+ // no stack parameters
+ sl.X86EmitReturn(0);
+
+ // Come here in case of no space
+ sl.EmitLabel(noAlloc);
+
+ // Release the lock in the uniprocessor case
+ EmitNoAllocCode(&sl, flags);
+
+ // Come here in case of failure to get the lock
+ sl.EmitLabel(noLock);
+
+ // pop edx - element count
+ sl.X86EmitPopReg(kEDX);
+
+ // pop ecx - array type descriptor
+ sl.X86EmitPopReg(kECX);
+
+ CodeLabel * target;
+ if (flags & NO_FRAME)
+ {
+ if (flags & OBJ_ARRAY)
+ {
+ // Jump to the unframed helper
+ target = sl.NewExternalCodeLabel((LPVOID)UnframedAllocateObjectArray);
+ _ASSERTE(target->e.m_pExternalAddress);
+ }
+ else
+ {
+ // Jump to the unframed helper
+ target = sl.NewExternalCodeLabel((LPVOID)UnframedAllocatePrimitiveArray);
+ _ASSERTE(target->e.m_pExternalAddress);
+ }
+ }
+ else
+ {
+ // Jump to the framed helper
+ target = sl.NewExternalCodeLabel((LPVOID)JIT_NewArr1);
+ _ASSERTE(target->e.m_pExternalAddress);
+ }
+ sl.X86EmitNearJump(target);
+
+ Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap());
+
+ return (void *)pStub->GetEntryPoint();
+}
+
+
+void *JIT_TrialAlloc::GenAllocString(Flags flags)
+{
+ STANDARD_VM_CONTRACT;
+
+ CPUSTUBLINKER sl;
+
+ CodeLabel *noLock = sl.NewCodeLabel();
+ CodeLabel *noAlloc = sl.NewCodeLabel();
+
+ // We were passed the number of characters in ECX
+
+ // push ecx
+ sl.X86EmitPushReg(kECX);
+
+ // mov eax, ecx
+ sl.Emit16(0xc18b);
+
+ // we need to load the method table for string from the global
+
+ // mov ecx, [g_pStringMethodTable]
+ sl.Emit16(0x0d8b);
+ sl.Emit32((int)(size_t)&g_pStringClass);
+
+ // Instead of doing elaborate overflow checks, we just limit the number of elements
+ // to (LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR) or less.
+ // This will avoid all overflow problems, as well as making sure
+ // big string objects are correctly allocated in the big object heap.
+
+ _ASSERTE(sizeof(WCHAR) == 2);
+
+ // cmp edx,(LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR)
+ sl.Emit16(0xf881);
+ sl.Emit32((LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR));
+
+ // jae noLock - seems tempting to jump to noAlloc, but we haven't taken the lock yet
+ sl.X86EmitCondJump(noLock, X86CondCode::kJAE);
+
+ // mov edx, [ecx]MethodTable.m_BaseSize
+ sl.X86EmitIndexRegLoad(kEDX, kECX, offsetof(MethodTable,m_BaseSize));
+
+ // Calculate the final size to allocate.
+ // We need to calculate baseSize + cnt*2, then round that up by adding 3 and anding ~3.
+
+ // lea eax, [edx+eax*2+5]
+ sl.X86EmitOp(0x8d, kEAX, kEDX, (DATA_ALIGNMENT-1), kEAX, 2);
+
+ // and eax, ~3
+ sl.Emit16(0xe083);
+ sl.Emit8(~(DATA_ALIGNMENT-1));
+
+ flags = (Flags)(flags | SIZE_IN_EAX);
+
+ // Emit the main body of the trial allocator, be it SP or MP
+ EmitCore(&sl, noLock, noAlloc, flags);
+
+ // Here we are at the end of the success case - store element count
+ // and possibly the element type descriptor and return
+
+ // pop ecx - element count
+ sl.X86EmitPopReg(kECX);
+
+ // mov dword ptr [eax]ArrayBase.m_StringLength, ecx
+ sl.X86EmitIndexRegStore(kEAX, offsetof(StringObject,m_StringLength), kECX);
+
+#if CHECK_APP_DOMAIN_LEAKS
+ EmitSetAppDomain(&sl);
+#endif
+
+ // no stack parameters
+ sl.X86EmitReturn(0);
+
+ // Come here in case of no space
+ sl.EmitLabel(noAlloc);
+
+ // Release the lock in the uniprocessor case
+ EmitNoAllocCode(&sl, flags);
+
+ // Come here in case of failure to get the lock
+ sl.EmitLabel(noLock);
+
+ // pop ecx - element count
+ sl.X86EmitPopReg(kECX);
+
+ CodeLabel * target;
+ if (flags & NO_FRAME)
+ {
+ // Jump to the unframed helper
+ target = sl.NewExternalCodeLabel((LPVOID)UnframedAllocateString);
+ }
+ else
+ {
+ // Jump to the framed helper
+ target = sl.NewExternalCodeLabel((LPVOID)FramedAllocateString);
+ }
+ sl.X86EmitNearJump(target);
+
+ Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap());
+
+ return (void *)pStub->GetEntryPoint();
+}
+
+
+FastStringAllocatorFuncPtr fastStringAllocator = UnframedAllocateString;
+
+FastObjectArrayAllocatorFuncPtr fastObjectArrayAllocator = UnframedAllocateObjectArray;
+
+FastPrimitiveArrayAllocatorFuncPtr fastPrimitiveArrayAllocator = UnframedAllocatePrimitiveArray;
+
+// For this helper,
+// If bCCtorCheck == true
+// ECX contains the domain neutral module ID
+// EDX contains the class domain ID, and the
+// else
+// ECX contains the domain neutral module ID
+// EDX is junk
+// shared static base is returned in EAX.
+
+// "init" should be the address of a routine which takes an argument of
+// the module domain ID, the class domain ID, and returns the static base pointer
+void EmitFastGetSharedStaticBase(CPUSTUBLINKER *psl, CodeLabel *init, bool bCCtorCheck, bool bGCStatic, bool bSingleAppDomain)
+{
+ STANDARD_VM_CONTRACT;
+
+ CodeLabel *DoInit = 0;
+ if (bCCtorCheck)
+ {
+ DoInit = psl->NewCodeLabel();
+ }
+
+ // mov eax, ecx
+ psl->Emit8(0x89);
+ psl->Emit8(0xc8);
+
+ if(!bSingleAppDomain)
+ {
+ // Check tag
+ CodeLabel *cctorCheck = psl->NewCodeLabel();
+
+
+ // test eax, 1
+ psl->Emit8(0xa9);
+ psl->Emit32(1);
+
+ // jz cctorCheck
+ psl->X86EmitCondJump(cctorCheck, X86CondCode::kJZ);
+
+ // mov eax GetAppDomain()
+ psl->X86EmitCurrentAppDomainFetch(kEAX, (1<<kECX)|(1<<kEDX));
+
+ // mov eax [eax->m_sDomainLocalBlock.m_pModuleSlots]
+ psl->X86EmitIndexRegLoad(kEAX, kEAX, (__int32) AppDomain::GetOffsetOfModuleSlotsPointer());
+
+ // Note: weird address arithmetic effectively does:
+ // shift over 1 to remove tag bit (which is always 1), then multiply by 4.
+ // mov eax [eax + ecx*2 - 2]
+ psl->X86EmitOp(0x8b, kEAX, kEAX, -2, kECX, 2);
+
+ // cctorCheck:
+ psl->EmitLabel(cctorCheck);
+
+ }
+
+ if (bCCtorCheck)
+ {
+ // test [eax + edx + offsetof(DomainLocalModule, m_pDataBlob], ClassInitFlags::INITIALIZED_FLAG // Is class inited
+ _ASSERTE(FitsInI1(ClassInitFlags::INITIALIZED_FLAG));
+ _ASSERTE(FitsInI1(DomainLocalModule::GetOffsetOfDataBlob()));
+
+ BYTE testClassInit[] = { 0xF6, 0x44, 0x10,
+ (BYTE) DomainLocalModule::GetOffsetOfDataBlob(), (BYTE)ClassInitFlags::INITIALIZED_FLAG };
+
+ psl->EmitBytes(testClassInit, sizeof(testClassInit));
+
+ // jz init // no, init it
+ psl->X86EmitCondJump(DoInit, X86CondCode::kJZ);
+ }
+
+ if (bGCStatic)
+ {
+ // Indirect to get the pointer to the first GC Static
+ psl->X86EmitIndexRegLoad(kEAX, kEAX, (__int32) DomainLocalModule::GetOffsetOfGCStaticPointer());
+ }
+
+ // ret
+ psl->X86EmitReturn(0);
+
+ if (bCCtorCheck)
+ {
+ // DoInit:
+ psl->EmitLabel(DoInit);
+
+ // push edx (must be preserved)
+ psl->X86EmitPushReg(kEDX);
+
+ // call init
+ psl->X86EmitCall(init, 0);
+
+ // pop edx
+ psl->X86EmitPopReg(kEDX);
+
+ // ret
+ psl->X86EmitReturn(0);
+ }
+
+}
+
+void *GenFastGetSharedStaticBase(bool bCheckCCtor, bool bGCStatic, bool bSingleAppDomain)
+{
+ STANDARD_VM_CONTRACT;
+
+ CPUSTUBLINKER sl;
+
+ CodeLabel *init;
+ if (bGCStatic)
+ {
+ init = sl.NewExternalCodeLabel((LPVOID)JIT_GetSharedGCStaticBase);
+ }
+ else
+ {
+ init = sl.NewExternalCodeLabel((LPVOID)JIT_GetSharedNonGCStaticBase);
+ }
+
+ EmitFastGetSharedStaticBase(&sl, init, bCheckCCtor, bGCStatic, bSingleAppDomain);
+
+ Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap());
+
+ return (void*) pStub->GetEntryPoint();
+}
+
+
+#ifdef ENABLE_FAST_GCPOLL_HELPER
+void EnableJitGCPoll()
+{
+ SetJitHelperFunction(CORINFO_HELP_POLL_GC, (void*)JIT_PollGC);
+}
+void DisableJitGCPoll()
+{
+ SetJitHelperFunction(CORINFO_HELP_POLL_GC, (void*)JIT_PollGC_Nop);
+}
+#endif
+
+#define NUM_WRITE_BARRIERS 6
+
+static const BYTE c_rgWriteBarrierRegs[NUM_WRITE_BARRIERS] = {
+ 0, // EAX
+ 1, // ECX
+ 3, // EBX
+ 6, // ESI
+ 7, // EDI
+ 5, // EBP
+};
+
+static const void * const c_rgWriteBarriers[NUM_WRITE_BARRIERS] = {
+ (void *)JIT_WriteBarrierEAX,
+ (void *)JIT_WriteBarrierECX,
+ (void *)JIT_WriteBarrierEBX,
+ (void *)JIT_WriteBarrierESI,
+ (void *)JIT_WriteBarrierEDI,
+ (void *)JIT_WriteBarrierEBP,
+};
+
+#ifdef WRITE_BARRIER_CHECK
+static const void * const c_rgDebugWriteBarriers[NUM_WRITE_BARRIERS] = {
+ (void *)JIT_DebugWriteBarrierEAX,
+ (void *)JIT_DebugWriteBarrierECX,
+ (void *)JIT_DebugWriteBarrierEBX,
+ (void *)JIT_DebugWriteBarrierESI,
+ (void *)JIT_DebugWriteBarrierEDI,
+ (void *)JIT_DebugWriteBarrierEBP,
+};
+#endif // WRITE_BARRIER_CHECK
+
+#define DEBUG_RANDOM_BARRIER_CHECK DbgGetEXETimeStamp() % 7 == 4
+
+/*********************************************************************/
+// Initialize the part of the JIT helpers that require very little of
+// EE infrastructure to be in place.
+/*********************************************************************/
+void InitJITHelpers1()
+{
+ STANDARD_VM_CONTRACT;
+
+#define ETW_NUM_JIT_HELPERS 10
+ static const LPCWSTR pHelperNames[ETW_NUM_JIT_HELPERS] = {
+ W("@NewObject"),
+ W("@NewObjectAlign8"),
+ W("@Box"),
+ W("@NewArray1Object"),
+ W("@NewArray1ValueType"),
+ W("@NewArray1ObjectAlign8"),
+ W("@StaticBaseObject"),
+ W("@StaticBaseNonObject"),
+ W("@StaticBaseObjectNoCCtor"),
+ W("@StaticBaseNonObjectNoCCtor")
+ };
+
+ PVOID pMethodAddresses[ETW_NUM_JIT_HELPERS]={0};
+
+ _ASSERTE(g_SystemInfo.dwNumberOfProcessors != 0);
+
+ JIT_TrialAlloc::Flags flags = GCHeap::UseAllocationContexts() ?
+ JIT_TrialAlloc::MP_ALLOCATOR : JIT_TrialAlloc::NORMAL;
+
+ // Get CPU features and check for SSE2 support.
+ // This code should eventually probably be moved into codeman.cpp,
+ // where we set the cpu feature flags for the JIT based on CPU type and features.
+ DWORD dwCPUFeaturesECX;
+ DWORD dwCPUFeaturesEDX;
+
+ __asm
+ {
+ pushad
+ mov eax, 1
+ cpuid
+ mov dwCPUFeaturesECX, ecx
+ mov dwCPUFeaturesEDX, edx
+ popad
+ }
+
+ // If bit 26 (SSE2) is set, then we can use the SSE2 flavors
+ // and faster x87 implementation for the P4 of Dbl2Lng.
+ if (dwCPUFeaturesEDX & (1<<26))
+ {
+ SetJitHelperFunction(CORINFO_HELP_DBL2INT, JIT_Dbl2IntSSE2);
+ if (dwCPUFeaturesECX & 1) // check SSE3
+ {
+ SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngSSE3);
+ SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngSSE3);
+ }
+ else
+ {
+ SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngP4x87); // SSE2 only for signed
+ SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngP4x87);
+ }
+ }
+
+ if (!(TrackAllocationsEnabled()
+ || LoggingOn(LF_GCALLOC, LL_INFO10)
+#ifdef _DEBUG
+ || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0)
+#endif
+ )
+ )
+ {
+ // Replace the slow helpers with faster version
+
+ pMethodAddresses[0] = JIT_TrialAlloc::GenAllocSFast(flags);
+ SetJitHelperFunction(CORINFO_HELP_NEWSFAST, pMethodAddresses[0]);
+ pMethodAddresses[1] = JIT_TrialAlloc::GenAllocSFast((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::ALIGN8 | JIT_TrialAlloc::ALIGN8OBJ));
+ SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, pMethodAddresses[1]);
+ pMethodAddresses[2] = JIT_TrialAlloc::GenBox(flags);
+ SetJitHelperFunction(CORINFO_HELP_BOX, pMethodAddresses[2]);
+ pMethodAddresses[3] = JIT_TrialAlloc::GenAllocArray((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::OBJ_ARRAY));
+ SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, pMethodAddresses[3]);
+ pMethodAddresses[4] = JIT_TrialAlloc::GenAllocArray(flags);
+ SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, pMethodAddresses[4]);
+ pMethodAddresses[5] = JIT_TrialAlloc::GenAllocArray((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::ALIGN8));
+ SetJitHelperFunction(CORINFO_HELP_NEWARR_1_ALIGN8, pMethodAddresses[5]);
+
+ fastObjectArrayAllocator = (FastObjectArrayAllocatorFuncPtr)JIT_TrialAlloc::GenAllocArray((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::NO_FRAME|JIT_TrialAlloc::OBJ_ARRAY));
+ fastPrimitiveArrayAllocator = (FastPrimitiveArrayAllocatorFuncPtr)JIT_TrialAlloc::GenAllocArray((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::NO_FRAME));
+
+ // If allocation logging is on, then we divert calls to FastAllocateString to an Ecall method, not this
+ // generated method. Find this workaround in Ecall::Init() in ecall.cpp.
+ ECall::DynamicallyAssignFCallImpl((PCODE) JIT_TrialAlloc::GenAllocString(flags), ECall::FastAllocateString);
+
+ // generate another allocator for use from unmanaged code (won't need a frame)
+ fastStringAllocator = (FastStringAllocatorFuncPtr) JIT_TrialAlloc::GenAllocString((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::NO_FRAME));
+ //UnframedAllocateString;
+ }
+
+ bool bSingleAppDomain = IsSingleAppDomain();
+
+ // Replace static helpers with faster assembly versions
+ pMethodAddresses[6] = GenFastGetSharedStaticBase(true, true, bSingleAppDomain);
+ SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE, pMethodAddresses[6]);
+ pMethodAddresses[7] = GenFastGetSharedStaticBase(true, false, bSingleAppDomain);
+ SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE, pMethodAddresses[7]);
+ pMethodAddresses[8] = GenFastGetSharedStaticBase(false, true, bSingleAppDomain);
+ SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR, pMethodAddresses[8]);
+ pMethodAddresses[9] = GenFastGetSharedStaticBase(false, false, bSingleAppDomain);
+ SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR, pMethodAddresses[9]);
+
+ ETW::MethodLog::StubsInitialized(pMethodAddresses, (PVOID *)pHelperNames, ETW_NUM_JIT_HELPERS);
+
+#ifdef ENABLE_FAST_GCPOLL_HELPER
+ // code:JIT_PollGC_Nop
+ SetJitHelperFunction(CORINFO_HELP_POLL_GC, (void*)JIT_PollGC_Nop);
+#endif //ENABLE_FAST_GCPOLL_HELPER
+
+ // All write barrier helpers should fit into one page.
+ // If you hit this assert on retail build, there is most likely problem with BBT script.
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (BYTE*)JIT_WriteBarrierLast - (BYTE*)JIT_WriteBarrierStart < PAGE_SIZE);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (BYTE*)JIT_PatchedWriteBarrierLast - (BYTE*)JIT_PatchedWriteBarrierStart < PAGE_SIZE);
+
+ // Copy the write barriers to their final resting place.
+ for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++)
+ {
+ BYTE * pfunc = (BYTE *) JIT_WriteBarrierReg_PreGrow;
+
+ BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier];
+ int reg = c_rgWriteBarrierRegs[iBarrier];
+
+ memcpy(pBuf, pfunc, 34);
+
+ // assert the copied code ends in a ret to make sure we got the right length
+ _ASSERTE(pBuf[33] == 0xC3);
+
+ // We need to adjust registers in a couple of instructions
+ // It would be nice to have the template contain all zeroes for
+ // the register fields (corresponding to EAX), but that doesn't
+ // work because then we get a smaller encoding for the compares
+ // that only works for EAX but not the other registers.
+ // So we always have to clear the register fields before updating them.
+
+ // First instruction to patch is a mov [edx], reg
+
+ _ASSERTE(pBuf[0] == 0x89);
+ // Update the reg field (bits 3..5) of the ModR/M byte of this instruction
+ pBuf[1] &= 0xc7;
+ pBuf[1] |= reg << 3;
+
+ // Second instruction to patch is cmp reg, imm32 (low bound)
+
+ _ASSERTE(pBuf[2] == 0x81);
+ // Here the lowest three bits in ModR/M field are the register
+ pBuf[3] &= 0xf8;
+ pBuf[3] |= reg;
+
+#ifdef WRITE_BARRIER_CHECK
+ // Don't do the fancy optimization just jump to the old one
+ // Use the slow one from time to time in a debug build because
+ // there are some good asserts in the unoptimized one
+ if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DEBUG_RANDOM_BARRIER_CHECK) {
+ pfunc = &pBuf[0];
+ *pfunc++ = 0xE9; // JMP c_rgDebugWriteBarriers[iBarrier]
+ *((DWORD*) pfunc) = (BYTE*) c_rgDebugWriteBarriers[iBarrier] - (pfunc + sizeof(DWORD));
+ }
+#endif // WRITE_BARRIER_CHECK
+ }
+
+#ifndef CODECOVERAGE
+ ValidateWriteBarrierHelpers();
+#endif
+
+ // Leave the patched region writable for StompWriteBarrierEphemeral(), StompWriteBarrierResize()
+ // and CTPMethodTable::ActivatePrecodeRemotingThunk
+
+ // Initialize g_TailCallFrameVptr for JIT_TailCall helper
+ g_TailCallFrameVptr = (void*)TailCallFrame::GetMethodFrameVPtr();
+}
+
+// these constans are offsets into our write barrier helpers for values that get updated as the bounds of the managed heap change.
+// ephemeral region
+const int AnyGrow_EphemeralLowerBound = 4; // offset is the same for both pre and post grow functions
+const int PostGrow_EphemeralUpperBound = 12;
+
+// card table
+const int PreGrow_CardTableFirstLocation = 16;
+const int PreGrow_CardTableSecondLocation = 28;
+const int PostGrow_CardTableFirstLocation = 24;
+const int PostGrow_CardTableSecondLocation = 36;
+
+
+#ifndef CODECOVERAGE // Deactivate alignment validation for code coverage builds
+ // because the instrumented binaries will not preserve alignmant constraits and we will fail.
+
+void ValidateWriteBarrierHelpers()
+{
+ // we have an invariant that the addresses of all the values that we update in our write barrier
+ // helpers must be naturally aligned, this is so that the update can happen atomically since there
+ // are places where we update these values while the EE is running
+
+#ifdef WRITE_BARRIER_CHECK
+ // write barrier checking uses the slower helpers that we don't bash so there is no need for validation
+ if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DEBUG_RANDOM_BARRIER_CHECK)
+ return;
+#endif // WRITE_BARRIER_CHECK
+
+ // first validate the PreGrow helper
+ BYTE* pWriteBarrierFunc = reinterpret_cast<BYTE*>(JIT_WriteBarrierEAX);
+
+ // ephemeral region
+ DWORD* pLocation = reinterpret_cast<DWORD*>(&pWriteBarrierFunc[AnyGrow_EphemeralLowerBound]);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast<DWORD>(pLocation) & 0x3) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0);
+
+ // card table
+ pLocation = reinterpret_cast<DWORD*>(&pWriteBarrierFunc[PreGrow_CardTableFirstLocation]);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast<DWORD>(pLocation) & 0x3) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0);
+ pLocation = reinterpret_cast<DWORD*>(&pWriteBarrierFunc[PreGrow_CardTableSecondLocation]);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast<DWORD>(pLocation) & 0x3) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0);
+
+ // now validate the PostGrow helper
+ pWriteBarrierFunc = reinterpret_cast<BYTE*>(JIT_WriteBarrierReg_PostGrow);
+
+ // ephemeral region
+ pLocation = reinterpret_cast<DWORD*>(&pWriteBarrierFunc[AnyGrow_EphemeralLowerBound]);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast<DWORD>(pLocation) & 0x3) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0);
+ pLocation = reinterpret_cast<DWORD*>(&pWriteBarrierFunc[PostGrow_EphemeralUpperBound]);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast<DWORD>(pLocation) & 0x3) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0);
+
+ // card table
+ pLocation = reinterpret_cast<DWORD*>(&pWriteBarrierFunc[PostGrow_CardTableFirstLocation]);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast<DWORD>(pLocation) & 0x3) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0);
+ pLocation = reinterpret_cast<DWORD*>(&pWriteBarrierFunc[PostGrow_CardTableSecondLocation]);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast<DWORD>(pLocation) & 0x3) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0);
+}
+
+#endif //CODECOVERAGE
+/*********************************************************************/
+
+#define WriteBarrierIsPreGrow() (((BYTE *)JIT_WriteBarrierEAX)[10] == 0xc1)
+
+
+/*********************************************************************/
+// When a GC happens, the upper and lower bounds of the ephemeral
+// generation change. This routine updates the WriteBarrier thunks
+// with the new values.
+void StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */)
+{
+ CONTRACTL {
+ NOTHROW;
+ GC_NOTRIGGER;
+ } CONTRACTL_END;
+
+#ifdef WRITE_BARRIER_CHECK
+ // Don't do the fancy optimization if we are checking write barrier
+ if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9) // we are using slow write barrier
+ return;
+#endif // WRITE_BARRIER_CHECK
+
+ BOOL flushICache = FALSE;
+
+ // Update the lower bound.
+ for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++)
+ {
+ BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier];
+
+ // assert there is in fact a cmp r/m32, imm32 there
+ _ASSERTE(pBuf[2] == 0x81);
+
+ // Update the immediate which is the lower bound of the ephemeral generation
+ size_t *pfunc = (size_t *) &pBuf[AnyGrow_EphemeralLowerBound];
+ //avoid trivial self modifying code
+ if (*pfunc != (size_t) g_ephemeral_low)
+ {
+ flushICache = TRUE;
+ *pfunc = (size_t) g_ephemeral_low;
+ }
+ if (!WriteBarrierIsPreGrow())
+ {
+ // assert there is in fact a cmp r/m32, imm32 there
+ _ASSERTE(pBuf[10] == 0x81);
+
+ // Update the upper bound if we are using the PostGrow thunk.
+ pfunc = (size_t *) &pBuf[PostGrow_EphemeralUpperBound];
+ //avoid trivial self modifying code
+ if (*pfunc != (size_t) g_ephemeral_high)
+ {
+ flushICache = TRUE;
+ *pfunc = (size_t) g_ephemeral_high;
+ }
+ }
+ }
+
+ if (flushICache)
+ FlushInstructionCache(GetCurrentProcess(), (void *)JIT_PatchedWriteBarrierStart,
+ (BYTE*)JIT_PatchedWriteBarrierLast - (BYTE*)JIT_PatchedWriteBarrierStart);
+}
+
+/*********************************************************************/
+// When the GC heap grows, the ephemeral generation may no longer
+// be after the older generations. If this happens, we need to switch
+// to the PostGrow thunk that checks both upper and lower bounds.
+// regardless we need to update the thunk with the
+// card_table - lowest_address.
+void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
+{
+ CONTRACTL {
+ NOTHROW;
+ if (GetThread()) {GC_TRIGGERS;} else {GC_NOTRIGGER;}
+ } CONTRACTL_END;
+
+#ifdef WRITE_BARRIER_CHECK
+ // Don't do the fancy optimization if we are checking write barrier
+ if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9) // we are using slow write barrier
+ return;
+#endif // WRITE_BARRIER_CHECK
+
+ bool bWriteBarrierIsPreGrow = WriteBarrierIsPreGrow();
+ bool bStompWriteBarrierEphemeral = false;
+
+ BOOL bEESuspendedHere = FALSE;
+
+ for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++)
+ {
+ BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier];
+ int reg = c_rgWriteBarrierRegs[iBarrier];
+
+ size_t *pfunc;
+
+ // Check if we are still using the pre-grow version of the write barrier.
+ if (bWriteBarrierIsPreGrow)
+ {
+ // Check if we need to use the upper bounds checking barrier stub.
+ if (bReqUpperBoundsCheck)
+ {
+ GCX_MAYBE_COOP_NO_THREAD_BROKEN((GetThread()!=NULL));
+ if( !isRuntimeSuspended && !bEESuspendedHere) {
+ ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_FOR_GC_PREP);
+ bEESuspendedHere = TRUE;
+ }
+
+ pfunc = (size_t *) JIT_WriteBarrierReg_PostGrow;
+ memcpy(pBuf, pfunc, 42);
+
+ // assert the copied code ends in a ret to make sure we got the right length
+ _ASSERTE(pBuf[41] == 0xC3);
+
+ // We need to adjust registers in a couple of instructions
+ // It would be nice to have the template contain all zeroes for
+ // the register fields (corresponding to EAX), but that doesn't
+ // work because then we get a smaller encoding for the compares
+ // that only works for EAX but not the other registers
+ // So we always have to clear the register fields before updating them.
+
+ // First instruction to patch is a mov [edx], reg
+
+ _ASSERTE(pBuf[0] == 0x89);
+ // Update the reg field (bits 3..5) of the ModR/M byte of this instruction
+ pBuf[1] &= 0xc7;
+ pBuf[1] |= reg << 3;
+
+ // Second instruction to patch is cmp reg, imm32 (low bound)
+
+ _ASSERTE(pBuf[2] == 0x81);
+ // Here the lowest three bits in ModR/M field are the register
+ pBuf[3] &= 0xf8;
+ pBuf[3] |= reg;
+
+ // Third instruction to patch is another cmp reg, imm32 (high bound)
+
+ _ASSERTE(pBuf[10] == 0x81);
+ // Here the lowest three bits in ModR/M field are the register
+ pBuf[11] &= 0xf8;
+ pBuf[11] |= reg;
+
+ bStompWriteBarrierEphemeral = true;
+ // What we're trying to update is the offset field of a
+
+ // cmp offset[edx], 0ffh instruction
+ _ASSERTE(pBuf[22] == 0x80);
+ pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation];
+ *pfunc = (size_t) g_card_table;
+
+ // What we're trying to update is the offset field of a
+ // mov offset[edx], 0ffh instruction
+ _ASSERTE(pBuf[34] == 0xC6);
+ pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation];
+
+ }
+ else
+ {
+ // What we're trying to update is the offset field of a
+
+ // cmp offset[edx], 0ffh instruction
+ _ASSERTE(pBuf[14] == 0x80);
+ pfunc = (size_t *) &pBuf[PreGrow_CardTableFirstLocation];
+ *pfunc = (size_t) g_card_table;
+
+ // What we're trying to update is the offset field of a
+
+ // mov offset[edx], 0ffh instruction
+ _ASSERTE(pBuf[26] == 0xC6);
+ pfunc = (size_t *) &pBuf[PreGrow_CardTableSecondLocation];
+ }
+ }
+ else
+ {
+ // What we're trying to update is the offset field of a
+
+ // cmp offset[edx], 0ffh instruction
+ _ASSERTE(pBuf[22] == 0x80);
+ pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation];
+ *pfunc = (size_t) g_card_table;
+
+ // What we're trying to update is the offset field of a
+ // mov offset[edx], 0ffh instruction
+ _ASSERTE(pBuf[34] == 0xC6);
+ pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation];
+ }
+
+ // Stick in the adjustment value.
+ *pfunc = (size_t) g_card_table;
+ }
+
+ if (bStompWriteBarrierEphemeral)
+ {
+ _ASSERTE(isRuntimeSuspended || bEESuspendedHere);
+ StompWriteBarrierEphemeral(true);
+ }
+ else
+ {
+ FlushInstructionCache(GetCurrentProcess(), (void *)JIT_PatchedWriteBarrierStart,
+ (BYTE*)JIT_PatchedWriteBarrierLast - (BYTE*)JIT_PatchedWriteBarrierStart);
+ }
+
+ if(bEESuspendedHere)
+ ThreadSuspend::RestartEE(FALSE, TRUE);
+}
+
diff --git a/src/vm/i386/profiler.cpp b/src/vm/i386/profiler.cpp
new file mode 100644
index 0000000000..11d4247aef
--- /dev/null
+++ b/src/vm/i386/profiler.cpp
@@ -0,0 +1,336 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// FILE: profiler.cpp
+//
+
+//
+
+//
+// ======================================================================================
+
+#include "common.h"
+
+#ifdef PROFILING_SUPPORTED
+#include "proftoeeinterfaceimpl.h"
+
+//
+// The following structure is the format on x86 builds of the data
+// being passed in plaformSpecificHandle for ProfileEnter/Leave/Tailcall
+//
+typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA
+{
+ FunctionID functionId;
+ DWORD doubleBuffer1;
+ DWORD doubleBuffer2;
+ DWORD floatBuffer;
+ DWORD floatingPointValuePresent;
+ UINT_PTR eax; // eax and edx must be continuous in this structure to make getting 64 bit return values easier.
+ UINT_PTR edx;
+ UINT_PTR ecx;
+ UINT_PTR esp;
+ UINT_PTR ip;
+} PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA;
+
+
+/*
+ * ProfileGetIPFromPlatformSpecificHandle
+ *
+ * This routine takes the platformSpecificHandle and retrieves from it the
+ * IP value.
+ *
+ * Parameters:
+ * handle - the platformSpecificHandle passed to ProfileEnter/Leave/Tailcall
+ *
+ * Returns:
+ * The IP value stored in the handle.
+ */
+UINT_PTR ProfileGetIPFromPlatformSpecificHandle(void *handle)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ return ((PROFILE_PLATFORM_SPECIFIC_DATA *)handle)->ip;
+}
+
+
+/*
+ * ProfileSetFunctionIDInPlatformSpecificHandle
+ *
+ * This routine takes the platformSpecificHandle and functionID, and assign
+ * functionID to functionID field of platformSpecificHandle.
+ *
+ * Parameters:
+ * pPlatformSpecificHandle - the platformSpecificHandle passed to ProfileEnter/Leave/Tailcall
+ * functionID - the FunctionID to be assigned
+ *
+ * Returns:
+ * None
+ */
+void ProfileSetFunctionIDInPlatformSpecificHandle(void * pPlatformSpecificHandle, FunctionID functionID)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ _ASSERTE(pPlatformSpecificHandle != NULL);
+ _ASSERTE(functionID != NULL);
+
+ PROFILE_PLATFORM_SPECIFIC_DATA * pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA *>(pPlatformSpecificHandle);
+ pData->functionId = functionID;
+}
+
+/*
+ * ProfileArgIterator::ProfileArgIterator
+ *
+ * Constructor. Initializes for arg iteration.
+ *
+ * Parameters:
+ * pMetaSig - The signature of the method we are going iterate over
+ * platformSpecificHandle - the value passed to ProfileEnter/Leave/Tailcall
+ *
+ * Returns:
+ * None.
+ */
+ProfileArgIterator::ProfileArgIterator(MetaSig * pMetaSig, void * platformSpecificHandle):
+ m_argIterator(pMetaSig)
+{
+ //
+ // It would be really nice to contract this, but the underlying functions are convolutedly
+ // contracted. Basically everything should be loaded by the time the profiler gets a call
+ // back, so everything is NOTHROW/NOTRIGGER, but there is not mechanism for saying that the
+ // contracts in called functions should be for the best case, not the worst case, now.
+ //
+ WRAPPER_NO_CONTRACT;
+
+ m_handle = platformSpecificHandle;
+}
+
+/*
+ * ProfileArgIterator::~ProfileArgIterator
+ *
+ * Destructor, releases all resources.
+ *
+ */
+ProfileArgIterator::~ProfileArgIterator()
+{
+ LIMITED_METHOD_CONTRACT;
+}
+
+/*
+ * ProfileArgIterator::GetNextArgAddr
+ *
+ * After initialization, this method is called repeatedly until it
+ * returns NULL to get the address of each arg. Note: this address
+ * could be anywhere on the stack.
+ *
+ * Returns:
+ * Address of the argument, or NULL if iteration is complete.
+ */
+LPVOID ProfileArgIterator::GetNextArgAddr()
+{
+ //
+ // It would be really nice to contract this, but the underlying functions are convolutedly
+ // contracted. Basically everything should be loaded by the time the profiler gets a call
+ // back, so everything is NOTHROW/NOTRIGGER, but there is not mechanism for saying that the
+ // contracts in called functions should be for the best case, not the worst case, now.
+ //
+ WRAPPER_NO_CONTRACT;
+
+ int argOffset = m_argIterator.GetNextOffset();
+
+ //
+ // Value is enregistered, figure out where and return that.
+ //
+ PROFILE_PLATFORM_SPECIFIC_DATA *pData = (PROFILE_PLATFORM_SPECIFIC_DATA *)m_handle;
+
+ //
+ // Zero indicates the end of the args.
+ //
+ if (argOffset == TransitionBlock::InvalidOffset)
+ {
+ return NULL;
+ }
+
+ if (pData == NULL)
+ {
+ //
+ // Something wrong.
+ //
+ _ASSERTE(!"Why do we have a NULL data pointer here?");
+ return NULL;
+ }
+
+ //
+ // If this is not enregistered, return the value
+ //
+ if (TransitionBlock::IsStackArgumentOffset(argOffset))
+ {
+ return ((LPBYTE)pData->esp) + (argOffset - TransitionBlock::GetOffsetOfArgs());
+ }
+
+ switch (argOffset - TransitionBlock::GetOffsetOfArgumentRegisters())
+ {
+ case offsetof(ArgumentRegisters, ECX):
+ return &(pData->ecx);
+ case offsetof(ArgumentRegisters, EDX):
+ return &(pData->edx);
+ }
+
+ _ASSERTE(!"Arg is an unsaved register!");
+ return NULL;
+}
+
+/*
+ * ProfileArgIterator::GetHiddenArgValue
+ *
+ * Called after initialization, any number of times, to retrieve any
+ * hidden argument, so that resolution for Generics can be done.
+ *
+ * Parameters:
+ * None.
+ *
+ * Returns:
+ * Value of the hidden parameter, or NULL if none exists.
+ */
+LPVOID ProfileArgIterator::GetHiddenArgValue(void)
+{
+ //
+ // It would be really nice to contract this, but the underlying functions are convolutedly
+ // contracted. Basically everything should be loaded by the time the profiler gets a call
+ // back, so everything is NOTHROW/NOTRIGGER, but there is not mechanism for saying that the
+ // contracts in called functions should be for the best case, not the worst case, now.
+ //
+ WRAPPER_NO_CONTRACT;
+
+ PROFILE_PLATFORM_SPECIFIC_DATA *pData = (PROFILE_PLATFORM_SPECIFIC_DATA *)m_handle;
+
+ MethodDesc *pMethodDesc = FunctionIdToMethodDesc(pData->functionId);
+
+ if (!pMethodDesc->RequiresInstArg())
+ {
+ return NULL;
+ }
+
+ //
+ // The ArgIterator::GetParamTypeOffset() can only be called after calling GetNextOffset until the
+ // entire signature has been walked, but *before* GetNextOffset returns TransitionBlock::InvalidOffset
+ // - indicating the end.
+ //
+
+ //
+ // Get the offset of the hidden arg
+ //
+ int argOffset = m_argIterator.GetParamTypeArgOffset();
+
+ //
+ // If this is not enregistered, return the value
+ //
+ if (TransitionBlock::IsStackArgumentOffset(argOffset))
+ {
+ return *(LPVOID *)(((LPBYTE)pData->esp) + (argOffset - TransitionBlock::GetOffsetOfArgs()));
+ }
+
+ switch (argOffset - TransitionBlock::GetOffsetOfArgumentRegisters())
+ {
+ case offsetof(ArgumentRegisters, ECX):
+ return (LPVOID)(pData->ecx);
+ case offsetof(ArgumentRegisters, EDX):
+ return (LPVOID)(pData->edx);
+ }
+
+ _ASSERTE(!"Arg is an unsaved register!");
+ return NULL;
+}
+
+/*
+ * ProfileArgIterator::GetThis
+ *
+ * Called after initialization, any number of times, to retrieve the
+ * value of 'this'.
+ *
+ * Parameters:
+ * None.
+ *
+ * Returns:
+ * value of the 'this' parameter, or NULL if none exists.
+ */
+LPVOID ProfileArgIterator::GetThis(void)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ }
+ CONTRACTL_END;
+
+ PROFILE_PLATFORM_SPECIFIC_DATA *pData = (PROFILE_PLATFORM_SPECIFIC_DATA *)m_handle;
+
+ if (pData->ip == 0)
+ {
+ return NULL;
+ }
+
+ if (!m_argIterator.HasThis())
+ {
+ return NULL;
+ }
+
+ switch (offsetof(ArgumentRegisters, THIS_REG))
+ {
+ case offsetof(ArgumentRegisters, ECX):
+ return (LPVOID)pData->ecx;
+
+ case offsetof(ArgumentRegisters, EDX):
+ return (LPVOID)pData->edx;
+ }
+
+ _ASSERTE(!"This is an unsaved register!");
+ return NULL;
+}
+
+
+
+/*
+ * ProfileArgIterator::GetReturnBufferAddr
+ *
+ * Called after initialization, any number of times, to retrieve the
+ * address of the return buffer. NULL indicates no return value.
+ *
+ * Parameters:
+ * None.
+ *
+ * Returns:
+ * Address of the return buffer, or NULL if none exists.
+ */
+LPVOID ProfileArgIterator::GetReturnBufferAddr(void)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ }
+ CONTRACTL_END;
+
+ PROFILE_PLATFORM_SPECIFIC_DATA *pData = (PROFILE_PLATFORM_SPECIFIC_DATA *)m_handle;
+
+ if (m_argIterator.HasRetBuffArg())
+ {
+ return (void *)(pData->eax);
+ }
+
+ switch (m_argIterator.GetSig()->GetReturnType())
+ {
+ case ELEMENT_TYPE_R8:
+ _ASSERTE(pData->floatingPointValuePresent);
+ return (void *)(&(pData->doubleBuffer1));
+
+ case ELEMENT_TYPE_R4:
+ _ASSERTE(pData->floatingPointValuePresent);
+ return (void *)(&(pData->floatBuffer));
+
+ default:
+ return &(pData->eax);
+ }
+}
+
+#endif // PROFILING_SUPPORTED
+
diff --git a/src/vm/i386/remotingx86.cpp b/src/vm/i386/remotingx86.cpp
new file mode 100644
index 0000000000..3a9e891267
--- /dev/null
+++ b/src/vm/i386/remotingx86.cpp
@@ -0,0 +1,225 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+//
+//
+// File: remotingx86.cpp
+//
+
+//
+//
+// Purpose: Defines various remoting related functions for the x86 architecture
+//
+
+//
+//
+
+//
+
+#include "common.h"
+
+#ifdef FEATURE_REMOTING
+
+#include "excep.h"
+#include "comdelegate.h"
+#include "remoting.h"
+#include "field.h"
+#include "siginfo.hpp"
+#include "stackbuildersink.h"
+#include "threads.h"
+#include "method.hpp"
+#include "asmconstants.h"
+#include "interoputil.h"
+#include "virtualcallstub.h"
+
+#ifdef FEATURE_COMINTEROP
+#include "comcallablewrapper.h"
+#include "comcache.h"
+#endif // FEATURE_COMINTEROP
+
+//+----------------------------------------------------------------------------
+//
+// Method: CTPMethodTable::CreateThunkForVirtualMethod private
+//
+// Synopsis: Creates the thunk that pushes the supplied slot number and jumps
+// to TP Stub
+//
+//+----------------------------------------------------------------------------
+PCODE CTPMethodTable::CreateThunkForVirtualMethod(DWORD dwSlot, BYTE *startaddr)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ PRECONDITION(CheckPointer(startaddr));
+ }
+ CONTRACTL_END;
+
+ BYTE *pCode = startaddr;
+
+ // 0000 B8 67 45 23 01 MOV EAX, dwSlot
+ // 0005 E9 ?? ?? ?? ?? JMP TransparentProxyStub
+ *pCode++ = 0xB8;
+ *((DWORD *) pCode) = dwSlot;
+ pCode += sizeof(DWORD);
+ *pCode++ = 0xE9;
+ // self-relative call, based on the start of the next instruction.
+ *((LONG *) pCode) = (LONG)((size_t)GetTPStubEntryPoint() - (size_t) (pCode + sizeof(LONG)));
+
+ _ASSERTE(CVirtualThunkMgr::IsThunkByASM((PCODE)startaddr));
+
+ return (PCODE)startaddr;
+}
+
+
+//+----------------------------------------------------------------------------
+//
+// Method: CTPMethodTable::ActivatePrecodeRemotingThunk private
+//
+// Synopsis: Patch the precode remoting thunk to begin interception
+//
+//+----------------------------------------------------------------------------
+void CTPMethodTable::ActivatePrecodeRemotingThunk()
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ MODE_ANY;
+ }
+ CONTRACTL_END;
+
+ // Before activation:
+ // 0000 C3 ret
+ // 0001 90 nop
+
+ // After activation:
+ // 0000 85 C9 test ecx,ecx
+
+ // 0002 74 XX je RemotingDone
+ // 0004 81 39 XX XX XX XX cmp dword ptr [ecx],11111111h
+ // 000A 74 XX je RemotingCheck
+
+ // Switch offset and size of patch based on the jump opcode used.
+ BYTE* pCode = (BYTE*)PrecodeRemotingThunk;
+
+ SIZE_T mtOffset = 0x0006;
+ SIZE_T size = 0x000A;
+
+ // Patch "ret + nop" to "test ecx,ecx"
+ *(UINT16 *)pCode = 0xC985;
+
+ // Replace placeholder value with the actual address of TP method table
+ _ASSERTE(*(PVOID*)(pCode+mtOffset) == (PVOID*)0x11111111);
+ *(PVOID*)(pCode+mtOffset) = GetMethodTable();
+
+ FlushInstructionCache(GetCurrentProcess(), pCode, size);
+}
+
+//+----------------------------------------------------------------------------
+//
+// Method: CVirtualThunkMgr::DoTraceStub public
+//
+// Synopsis: Traces the stub given the starting address
+//
+//+----------------------------------------------------------------------------
+BOOL CVirtualThunkMgr::DoTraceStub(PCODE stubStartAddress, TraceDestination *trace)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ PRECONDITION(stubStartAddress != NULL);
+ PRECONDITION(CheckPointer(trace));
+ }
+ CONTRACTL_END;
+
+ BOOL bIsStub = FALSE;
+
+ // Find a thunk whose code address matching the starting address
+ LPBYTE pThunk = FindThunk((LPBYTE)stubStartAddress);
+ if(NULL != pThunk)
+ {
+ LPBYTE pbAddr = NULL;
+ LONG destAddress = 0;
+ if((LPBYTE)stubStartAddress == pThunk)
+ {
+
+ // Extract the long which gives the self relative address
+ // of the destination
+ pbAddr = pThunk + sizeof(BYTE) + sizeof(DWORD) + sizeof(BYTE);
+ destAddress = *(LONG *)pbAddr;
+
+ // Calculate the absolute address by adding the offset of the next
+ // instruction after the call instruction
+ destAddress += (LONG)(size_t)(pbAddr + sizeof(LONG));
+
+ }
+
+ // We cannot tell where the stub will end up until OnCall is reached.
+ // So we tell the debugger to run till OnCall is reached and then
+ // come back and ask us again for the actual destination address of
+ // the call
+
+ Stub *stub = Stub::RecoverStub((TADDR)destAddress);
+
+ trace->InitForFramePush(stub->GetPatchAddress());
+ bIsStub = TRUE;
+ }
+
+ return bIsStub;
+}
+
+//+----------------------------------------------------------------------------
+//
+// Method: CVirtualThunkMgr::IsThunkByASM public
+//
+// Synopsis: Check assembly to see if this one of our thunks
+//
+//+----------------------------------------------------------------------------
+BOOL CVirtualThunkMgr::IsThunkByASM(PCODE startaddr)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ PRECONDITION(startaddr != NULL);
+ }
+ CONTRACTL_END;
+
+ PTR_BYTE pbCode = PTR_BYTE(startaddr);
+
+ return ((pbCode[0] == 0xB8) &&
+ (pbCode[5] == 0xe9) &&
+ (rel32Decode((TADDR)(pbCode + 6)) == CTPMethodTable::GetTPStubEntryPoint()));
+}
+
+//+----------------------------------------------------------------------------
+//
+// Method: CVirtualThunkMgr::GetMethodDescByASM public
+//
+// Synopsis: Parses MethodDesc out of assembly code
+//
+//+----------------------------------------------------------------------------
+MethodDesc *CVirtualThunkMgr::GetMethodDescByASM(PCODE startaddr, MethodTable *pMT)
+{
+ CONTRACT (MethodDesc*)
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ PRECONDITION(startaddr != NULL);
+ PRECONDITION(CheckPointer(pMT));
+ POSTCONDITION(CheckPointer(RETVAL));
+ }
+ CONTRACT_END;
+
+ RETURN (pMT->GetMethodDescForSlot(*((DWORD *) (startaddr + 1))));
+}
+
+#endif// FEATURE_REMOTING
+
diff --git a/src/vm/i386/stublinkerx86.cpp b/src/vm/i386/stublinkerx86.cpp
new file mode 100644
index 0000000000..0037a7d3e6
--- /dev/null
+++ b/src/vm/i386/stublinkerx86.cpp
@@ -0,0 +1,6806 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+// NOTE on Frame Size C_ASSERT usage in this file
+// if the frame size changes then the stubs have to be revisited for correctness
+// kindly revist the logic and then update the constants so that the C_ASSERT will again fire
+// if someone changes the frame size. You are expected to keep this hard coded constant
+// up to date so that changes in the frame size trigger errors at compile time if the code is not altered
+
+// Precompiled Header
+
+#include "common.h"
+
+#include "field.h"
+#include "stublink.h"
+
+#include "tls.h"
+#include "frames.h"
+#include "excep.h"
+#include "dllimport.h"
+#include "log.h"
+#include "security.h"
+#include "comdelegate.h"
+#include "array.h"
+#include "jitinterface.h"
+#include "codeman.h"
+#ifdef FEATURE_REMOTING
+#include "remoting.h"
+#endif
+#include "dbginterface.h"
+#include "eeprofinterfaces.h"
+#include "eeconfig.h"
+#include "securitydeclarative.h"
+#ifdef _TARGET_X86_
+#include "asmconstants.h"
+#endif // _TARGET_X86_
+#include "class.h"
+#include "stublink.inl"
+
+#ifdef FEATURE_COMINTEROP
+#include "comtoclrcall.h"
+#include "runtimecallablewrapper.h"
+#include "comcache.h"
+#include "olevariant.h"
+#include "notifyexternals.h"
+#endif // FEATURE_COMINTEROP
+
+#ifdef FEATURE_PREJIT
+#include "compile.h"
+#endif
+
+#if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
+#include <psapi.h>
+#endif
+
+
+#ifndef DACCESS_COMPILE
+
+extern "C" VOID __cdecl StubRareEnable(Thread *pThread);
+#ifdef FEATURE_COMINTEROP
+extern "C" HRESULT __cdecl StubRareDisableHR(Thread *pThread);
+#endif // FEATURE_COMINTEROP
+extern "C" VOID __cdecl StubRareDisableTHROW(Thread *pThread, Frame *pFrame);
+
+extern "C" VOID __cdecl ArrayOpStubNullException(void);
+extern "C" VOID __cdecl ArrayOpStubRangeException(void);
+extern "C" VOID __cdecl ArrayOpStubTypeMismatchException(void);
+
+#if defined(_TARGET_AMD64_)
+#define EXCEPTION_HELPERS(base) \
+ extern "C" VOID __cdecl base##_RSIRDI_ScratchArea(void); \
+ extern "C" VOID __cdecl base##_ScratchArea(void); \
+ extern "C" VOID __cdecl base##_RSIRDI(void); \
+ extern "C" VOID __cdecl base(void)
+EXCEPTION_HELPERS(ArrayOpStubNullException);
+EXCEPTION_HELPERS(ArrayOpStubRangeException);
+EXCEPTION_HELPERS(ArrayOpStubTypeMismatchException);
+#undef EXCEPTION_HELPERS
+
+#if defined(_DEBUG)
+extern "C" VOID __cdecl DebugCheckStubUnwindInfo();
+#endif
+#endif // _TARGET_AMD64_
+
+// Presumably this code knows what it is doing with TLS. If we are hiding these
+// services from normal code, reveal them here.
+#ifdef TlsGetValue
+#undef TlsGetValue
+#endif
+
+#ifdef FEATURE_COMINTEROP
+Thread* __stdcall CreateThreadBlockReturnHr(ComMethodFrame *pFrame);
+#endif
+
+
+
+#ifdef _TARGET_AMD64_
+
+BOOL IsPreservedReg (X86Reg reg)
+{
+ UINT16 PreservedRegMask =
+ (1 << kRBX)
+ | (1 << kRBP)
+ | (1 << kRSI)
+ | (1 << kRDI)
+ | (1 << kR12)
+ | (1 << kR13)
+ | (1 << kR14)
+ | (1 << kR15);
+ return PreservedRegMask & (1 << reg);
+}
+
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_AMD64_
+//-----------------------------------------------------------------------
+// InstructionFormat for near Jump and short Jump
+//-----------------------------------------------------------------------
+
+//X64EmitTailcallWithRSPAdjust
+class X64NearJumpSetup : public InstructionFormat
+{
+ public:
+ X64NearJumpSetup() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
+ | InstructionFormat::k64Small | InstructionFormat::k64
+ )
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT
+ switch (refsize)
+ {
+ case k8:
+ return 0;
+
+ case k32:
+ return 0;
+
+ case k64Small:
+ return 5;
+
+ case k64:
+ return 10;
+
+ default:
+ _ASSERTE(!"unexpected refsize");
+ return 0;
+
+ }
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT
+ if (k8 == refsize)
+ {
+ // do nothing, X64NearJump will take care of this
+ }
+ else if (k32 == refsize)
+ {
+ // do nothing, X64NearJump will take care of this
+ }
+ else if (k64Small == refsize)
+ {
+ UINT64 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
+ _ASSERTE(FitsInU4(TargetAddress));
+
+ // mov eax, imm32 ; zero-extended
+ pOutBuffer[0] = 0xB8;
+ *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
+ }
+ else if (k64 == refsize)
+ {
+ // mov rax, imm64
+ pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ pOutBuffer[1] = 0xB8;
+ *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
+ }
+ else
+ {
+ _ASSERTE(!"unreached");
+ }
+ }
+
+ virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
+ {
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_FORBID_FAULT;
+
+
+ if (fExternal)
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ // For external, we don't have enough info to predict
+ // the offset.
+ return FALSE;
+
+ case InstructionFormat::k32:
+ return sizeof(PVOID) <= sizeof(UINT32);
+
+ case InstructionFormat::k64Small:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ else
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ return FitsInI1(offset);
+
+ case InstructionFormat::k32:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64Small:
+ // EmitInstruction emits a non-relative jmp for
+ // k64Small. We don't have enough info to predict the
+ // target address. (Even if we did, this would only
+ // handle the set of unsigned offsets with bit 31 set
+ // and no higher bits set, too uncommon/hard to test.)
+ return FALSE;
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ }
+};
+
+class X64NearJumpExecute : public InstructionFormat
+{
+ public:
+ X64NearJumpExecute() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
+ | InstructionFormat::k64Small | InstructionFormat::k64
+ )
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT
+ switch (refsize)
+ {
+ case k8:
+ return 2;
+
+ case k32:
+ return 5;
+
+ case k64Small:
+ return 3;
+
+ case k64:
+ return 3;
+
+ default:
+ _ASSERTE(!"unexpected refsize");
+ return 0;
+
+ }
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT
+ if (k8 == refsize)
+ {
+ pOutBuffer[0] = 0xeb;
+ *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
+ }
+ else if (k32 == refsize)
+ {
+ pOutBuffer[0] = 0xe9;
+ *((__int32*)(pOutBuffer+1)) = (__int32)fixedUpReference;
+ }
+ else if (k64Small == refsize)
+ {
+ // REX.W jmp rax
+ pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ pOutBuffer[1] = 0xFF;
+ pOutBuffer[2] = 0xE0;
+ }
+ else if (k64 == refsize)
+ {
+ // REX.W jmp rax
+ pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ pOutBuffer[1] = 0xFF;
+ pOutBuffer[2] = 0xE0;
+ }
+ else
+ {
+ _ASSERTE(!"unreached");
+ }
+ }
+
+ virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
+ {
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_FORBID_FAULT;
+
+
+ if (fExternal)
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ // For external, we don't have enough info to predict
+ // the offset.
+ return FALSE;
+
+ case InstructionFormat::k32:
+ return sizeof(PVOID) <= sizeof(UINT32);
+
+ case InstructionFormat::k64Small:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ else
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ return FitsInI1(offset);
+
+ case InstructionFormat::k32:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64Small:
+ // EmitInstruction emits a non-relative jmp for
+ // k64Small. We don't have enough info to predict the
+ // target address. (Even if we did, this would only
+ // handle the set of unsigned offsets with bit 31 set
+ // and no higher bits set, too uncommon/hard to test.)
+ return FALSE;
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ }
+};
+
+#endif
+
+//-----------------------------------------------------------------------
+// InstructionFormat for near Jump and short Jump
+//-----------------------------------------------------------------------
+class X86NearJump : public InstructionFormat
+{
+ public:
+ X86NearJump() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
+#ifdef _TARGET_AMD64_
+ | InstructionFormat::k64Small | InstructionFormat::k64
+#endif // _TARGET_AMD64_
+ )
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT
+ switch (refsize)
+ {
+ case k8:
+ return 2;
+
+ case k32:
+ return 5;
+#ifdef _TARGET_AMD64_
+ case k64Small:
+ return 5 + 2;
+
+ case k64:
+ return 12;
+#endif // _TARGET_AMD64_
+ default:
+ _ASSERTE(!"unexpected refsize");
+ return 0;
+
+ }
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT
+ if (k8 == refsize)
+ {
+ pOutBuffer[0] = 0xeb;
+ *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
+ }
+ else if (k32 == refsize)
+ {
+ pOutBuffer[0] = 0xe9;
+ *((__int32*)(pOutBuffer+1)) = (__int32)fixedUpReference;
+ }
+#ifdef _TARGET_AMD64_
+ else if (k64Small == refsize)
+ {
+ UINT64 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
+ _ASSERTE(FitsInU4(TargetAddress));
+
+ // mov eax, imm32 ; zero-extended
+ pOutBuffer[0] = 0xB8;
+ *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
+
+ // jmp rax
+ pOutBuffer[5] = 0xFF;
+ pOutBuffer[6] = 0xE0;
+ }
+ else if (k64 == refsize)
+ {
+ // mov rax, imm64
+ pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ pOutBuffer[1] = 0xB8;
+ *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
+
+ // jmp rax
+ pOutBuffer[10] = 0xFF;
+ pOutBuffer[11] = 0xE0;
+ }
+#endif // _TARGET_AMD64_
+ else
+ {
+ _ASSERTE(!"unreached");
+ }
+ }
+
+ virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
+ {
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_FORBID_FAULT;
+
+
+ if (fExternal)
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ // For external, we don't have enough info to predict
+ // the offset.
+ return FALSE;
+
+ case InstructionFormat::k32:
+ return sizeof(PVOID) <= sizeof(UINT32);
+
+#ifdef _TARGET_AMD64_
+ case InstructionFormat::k64Small:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+#endif
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ else
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ return FitsInI1(offset);
+
+ case InstructionFormat::k32:
+#ifdef _TARGET_AMD64_
+ return FitsInI4(offset);
+#else
+ return TRUE;
+#endif
+
+#ifdef _TARGET_AMD64_
+ case InstructionFormat::k64Small:
+ // EmitInstruction emits a non-relative jmp for
+ // k64Small. We don't have enough info to predict the
+ // target address. (Even if we did, this would only
+ // handle the set of unsigned offsets with bit 31 set
+ // and no higher bits set, too uncommon/hard to test.)
+ return FALSE;
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+#endif
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ }
+};
+
+
+//-----------------------------------------------------------------------
+// InstructionFormat for conditional jump. Set the variationCode
+// to members of X86CondCode.
+//-----------------------------------------------------------------------
+class X86CondJump : public InstructionFormat
+{
+ public:
+ X86CondJump(UINT allowedSizes) : InstructionFormat(allowedSizes)
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT
+ return (refsize == k8 ? 2 : 6);
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT
+ if (refsize == k8)
+ {
+ pOutBuffer[0] = static_cast<BYTE>(0x70 | variationCode);
+ *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
+ }
+ else
+ {
+ pOutBuffer[0] = 0x0f;
+ pOutBuffer[1] = static_cast<BYTE>(0x80 | variationCode);
+ *((__int32*)(pOutBuffer+2)) = (__int32)fixedUpReference;
+ }
+ }
+};
+
+
+//-----------------------------------------------------------------------
+// InstructionFormat for near call.
+//-----------------------------------------------------------------------
+class X86Call : public InstructionFormat
+{
+ public:
+ X86Call ()
+ : InstructionFormat( InstructionFormat::k32
+#ifdef _TARGET_AMD64_
+ | InstructionFormat::k64Small | InstructionFormat::k64
+#endif // _TARGET_AMD64_
+ )
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ switch (refsize)
+ {
+ case k32:
+ return 5;
+
+#ifdef _TARGET_AMD64_
+ case k64Small:
+ return 5 + 2;
+
+ case k64:
+ return 10 + 2;
+#endif // _TARGET_AMD64_
+
+ default:
+ _ASSERTE(!"unexpected refsize");
+ return 0;
+ }
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT
+
+ switch (refsize)
+ {
+ case k32:
+ pOutBuffer[0] = 0xE8;
+ *((__int32*)(1+pOutBuffer)) = (__int32)fixedUpReference;
+ break;
+
+#ifdef _TARGET_AMD64_
+ case k64Small:
+ UINT64 TargetAddress;
+
+ TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
+ _ASSERTE(FitsInU4(TargetAddress));
+
+ // mov eax,<fixedUpReference> ; zero-extends
+ pOutBuffer[0] = 0xB8;
+ *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
+
+ // call rax
+ pOutBuffer[5] = 0xff;
+ pOutBuffer[6] = 0xd0;
+ break;
+
+ case k64:
+ // mov rax,<fixedUpReference>
+ pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ pOutBuffer[1] = 0xB8;
+ *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
+
+ // call rax
+ pOutBuffer[10] = 0xff;
+ pOutBuffer[11] = 0xd0;
+ break;
+#endif // _TARGET_AMD64_
+
+ default:
+ _ASSERTE(!"unreached");
+ break;
+ }
+ }
+
+// For x86, the default CanReach implementation will suffice. It only needs
+// to handle k32.
+#ifdef _TARGET_AMD64_
+ virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
+ {
+ if (fExternal)
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k32:
+ // For external, we don't have enough info to predict
+ // the offset.
+ return FALSE;
+
+ case InstructionFormat::k64Small:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ else
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k32:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64Small:
+ // EmitInstruction emits a non-relative jmp for
+ // k64Small. We don't have enough info to predict the
+ // target address. (Even if we did, this would only
+ // handle the set of unsigned offsets with bit 31 set
+ // and no higher bits set, too uncommon/hard to test.)
+ return FALSE;
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ }
+#endif // _TARGET_AMD64_
+};
+
+
+//-----------------------------------------------------------------------
+// InstructionFormat for push imm32.
+//-----------------------------------------------------------------------
+class X86PushImm32 : public InstructionFormat
+{
+ public:
+ X86PushImm32(UINT allowedSizes) : InstructionFormat(allowedSizes)
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ return 5;
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ pOutBuffer[0] = 0x68;
+ // only support absolute pushimm32 of the label address. The fixedUpReference is
+ // the offset to the label from the current point, so add to get address
+ *((__int32*)(1+pOutBuffer)) = (__int32)(fixedUpReference);
+ }
+};
+
+#if defined(_TARGET_AMD64_)
+//-----------------------------------------------------------------------
+// InstructionFormat for lea reg, [RIP relative].
+//-----------------------------------------------------------------------
+class X64LeaRIP : public InstructionFormat
+{
+ public:
+ X64LeaRIP() : InstructionFormat(InstructionFormat::k64Small)
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ return 7;
+ }
+
+ virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
+ {
+ if (fExternal)
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k64Small:
+ // For external, we don't have enough info to predict
+ // the offset.
+ return FALSE;
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ else
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k64Small:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ X86Reg reg = (X86Reg)variationCode;
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+
+ pOutBuffer[0] = rex;
+ pOutBuffer[1] = 0x8D;
+ pOutBuffer[2] = 0x05 | (reg << 3);
+ // only support absolute pushimm32 of the label address. The fixedUpReference is
+ // the offset to the label from the current point, so add to get address
+ *((__int32*)(3+pOutBuffer)) = (__int32)(fixedUpReference);
+ }
+};
+
+#endif // _TARGET_AMD64_
+
+#if defined(_TARGET_AMD64_)
+static BYTE gX64NearJumpSetup[sizeof(X64NearJumpSetup)];
+static BYTE gX64NearJumpExecute[sizeof(X64NearJumpExecute)];
+static BYTE gX64LeaRIP[sizeof(X64LeaRIP)];
+#endif
+
+static BYTE gX86NearJump[sizeof(X86NearJump)];
+static BYTE gX86CondJump[sizeof(X86CondJump)];
+static BYTE gX86Call[sizeof(X86Call)];
+static BYTE gX86PushImm32[sizeof(X86PushImm32)];
+
+/* static */ void StubLinkerCPU::Init()
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_NOTRIGGER;
+ INJECT_FAULT(COMPlusThrowOM(););
+ }
+ CONTRACTL_END;
+ new (gX86NearJump) X86NearJump();
+ new (gX86CondJump) X86CondJump( InstructionFormat::k8|InstructionFormat::k32);
+ new (gX86Call) X86Call();
+ new (gX86PushImm32) X86PushImm32(InstructionFormat::k32);
+
+#if defined(_TARGET_AMD64_)
+ new (gX64NearJumpSetup) X64NearJumpSetup();
+ new (gX64NearJumpExecute) X64NearJumpExecute();
+ new (gX64LeaRIP) X64LeaRIP();
+#endif
+}
+
+//---------------------------------------------------------------
+// Emits:
+// mov destReg, srcReg
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitMovRegReg(X86Reg destReg, X86Reg srcReg)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (destReg >= kR8)
+ {
+ rex |= REX_MODRM_RM_EXT;
+ destReg = X86RegFromAMD64Reg(destReg);
+ }
+ if (srcReg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ srcReg = X86RegFromAMD64Reg(srcReg);
+ }
+ Emit8(rex);
+#endif
+
+ Emit8(0x89);
+ Emit8(static_cast<UINT8>(0xC0 | (srcReg << 3) | destReg));
+}
+
+//---------------------------------------------------------------
+
+VOID StubLinkerCPU::X86EmitMovSPReg(X86Reg srcReg)
+{
+ STANDARD_VM_CONTRACT;
+ const X86Reg kESP = (X86Reg)4;
+ X86EmitMovRegReg(kESP, srcReg);
+}
+
+VOID StubLinkerCPU::X86EmitMovRegSP(X86Reg destReg)
+{
+ STANDARD_VM_CONTRACT;
+ const X86Reg kESP = (X86Reg)4;
+ X86EmitMovRegReg(destReg, kESP);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// PUSH <reg32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPushReg(X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef STUBLINKER_GENERATES_UNWIND_INFO
+ X86Reg origReg = reg;
+#endif
+
+#ifdef _TARGET_AMD64_
+ if (reg >= kR8)
+ {
+ Emit8(REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT);
+ reg = X86RegFromAMD64Reg(reg);
+ }
+#endif
+ Emit8(static_cast<UINT8>(0x50 + reg));
+
+#ifdef STUBLINKER_GENERATES_UNWIND_INFO
+ if (IsPreservedReg(origReg))
+ {
+ UnwindPushedReg(origReg);
+ }
+ else
+#endif
+ {
+ Push(sizeof(void*));
+ }
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// POP <reg32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPopReg(X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ if (reg >= kR8)
+ {
+ Emit8(REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT);
+ reg = X86RegFromAMD64Reg(reg);
+ }
+#endif // _TARGET_AMD64_
+
+ Emit8(static_cast<UINT8>(0x58 + reg));
+ Pop(sizeof(void*));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// PUSH <imm32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPushImm32(UINT32 value)
+{
+ STANDARD_VM_CONTRACT;
+
+ Emit8(0x68);
+ Emit32(value);
+ Push(sizeof(void*));
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// PUSH <imm32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPushImm32(CodeLabel &target)
+{
+ STANDARD_VM_CONTRACT;
+
+ EmitLabelRef(&target, reinterpret_cast<X86PushImm32&>(gX86PushImm32), 0);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// PUSH <imm8>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPushImm8(BYTE value)
+{
+ STANDARD_VM_CONTRACT;
+
+ Emit8(0x6a);
+ Emit8(value);
+ Push(sizeof(void*));
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// PUSH <ptr>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPushImmPtr(LPVOID value WIN64_ARG(X86Reg tmpReg /*=kR10*/))
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ X86EmitRegLoad(tmpReg, (UINT_PTR) value);
+ X86EmitPushReg(tmpReg);
+#else
+ X86EmitPushImm32((UINT_PTR) value);
+#endif
+}
+
+//---------------------------------------------------------------
+// Emits:
+// XOR <reg32>,<reg32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitZeroOutReg(X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ // 32-bit results are zero-extended, so we only need the REX byte if
+ // it's an extended register.
+ if (reg >= kR8)
+ {
+ Emit8(REX_PREFIX_BASE | REX_MODRM_REG_EXT | REX_MODRM_RM_EXT);
+ reg = X86RegFromAMD64Reg(reg);
+ }
+#endif
+ Emit8(0x33);
+ Emit8(static_cast<UINT8>(0xc0 | (reg << 3) | reg));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// jmp [reg]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitJumpReg(X86Reg reg)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ }
+ CONTRACTL_END;
+
+ Emit8(0xff);
+ Emit8(static_cast<BYTE>(0xe0) | static_cast<BYTE>(reg));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// CMP <reg32>,imm32
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitCmpRegImm32(X86Reg reg, INT32 imm32)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION((int) reg < NumX86Regs);
+ }
+ CONTRACTL_END;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_OPCODE_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+#endif
+
+ if (FitsInI1(imm32)) {
+ Emit8(0x83);
+ Emit8(static_cast<UINT8>(0xF8 | reg));
+ Emit8((INT8)imm32);
+ } else {
+ Emit8(0x81);
+ Emit8(static_cast<UINT8>(0xF8 | reg));
+ Emit32(imm32);
+ }
+}
+
+#ifdef _TARGET_AMD64_
+//---------------------------------------------------------------
+// Emits:
+// CMP [reg+offs], imm32
+// CMP [reg], imm32
+//---------------------------------------------------------------
+VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
+{
+ STANDARD_VM_CONTRACT;
+
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_OPCODE_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+
+ X64EmitCmp32RegIndexImm32(reg, offs, imm32);
+}
+
+VOID StubLinkerCPU:: X64EmitCmp32RegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
+#else // _TARGET_AMD64_
+VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
+#endif // _TARGET_AMD64_
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION((int) reg < NumX86Regs);
+ }
+ CONTRACTL_END;
+
+ //
+ // The binary representation of "cmp [mem], imm32" is :
+ // 1000-00sw mod11-1r/m
+ //
+
+ unsigned wBit = (FitsInI1(imm32) ? 0 : 1);
+ Emit8(static_cast<UINT8>(0x80 | wBit));
+
+ unsigned modBits;
+ if (offs == 0)
+ modBits = 0;
+ else if (FitsInI1(offs))
+ modBits = 1;
+ else
+ modBits = 2;
+
+ Emit8(static_cast<UINT8>((modBits << 6) | 0x38 | reg));
+
+ if (offs)
+ {
+ if (FitsInI1(offs))
+ Emit8((INT8)offs);
+ else
+ Emit32(offs);
+ }
+
+ if (FitsInI1(imm32))
+ Emit8((INT8)imm32);
+ else
+ Emit32(imm32);
+}
+
+//---------------------------------------------------------------
+// Emits:
+#if defined(_TARGET_AMD64_)
+// mov rax, <target>
+// add rsp, imm32
+// jmp rax
+#else
+// add rsp, imm32
+// jmp <target>
+#endif
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitTailcallWithESPAdjust(CodeLabel *pTarget, INT32 imm32)
+{
+ STANDARD_VM_CONTRACT;
+
+#if defined(_TARGET_AMD64_)
+ EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpSetup&>(gX64NearJumpSetup), 0);
+ X86EmitAddEsp(imm32);
+ EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpExecute&>(gX64NearJumpExecute), 0);
+#else
+ X86EmitAddEsp(imm32);
+ X86EmitNearJump(pTarget);
+#endif
+}
+
+//---------------------------------------------------------------
+// Emits:
+#if defined(_TARGET_AMD64_)
+// mov rax, <target>
+// pop reg
+// jmp rax
+#else
+// pop reg
+// jmp <target>
+#endif
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitTailcallWithSinglePop(CodeLabel *pTarget, X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+
+#if defined(_TARGET_AMD64_)
+ EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpSetup&>(gX64NearJumpSetup), 0);
+ X86EmitPopReg(reg);
+ EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpExecute&>(gX64NearJumpExecute), 0);
+#else
+ X86EmitPopReg(reg);
+ X86EmitNearJump(pTarget);
+#endif
+}
+
+//---------------------------------------------------------------
+// Emits:
+// JMP <ofs8> or
+// JMP <ofs32}
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitNearJump(CodeLabel *target)
+{
+ STANDARD_VM_CONTRACT;
+ EmitLabelRef(target, reinterpret_cast<X86NearJump&>(gX86NearJump), 0);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// Jcc <ofs8> or
+// Jcc <ofs32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitCondJump(CodeLabel *target, X86CondCode::cc condcode)
+{
+ STANDARD_VM_CONTRACT;
+ EmitLabelRef(target, reinterpret_cast<X86CondJump&>(gX86CondJump), condcode);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// call <ofs32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitCall(CodeLabel *target, int iArgBytes)
+{
+ STANDARD_VM_CONTRACT;
+
+ EmitLabelRef(target, reinterpret_cast<X86Call&>(gX86Call), 0);
+
+ INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
+ // we know that this is a call that can directly call
+ // managed code
+#ifndef _TARGET_AMD64_
+ Pop(iArgBytes);
+#endif // !_TARGET_AMD64_
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// ret n
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitReturn(WORD wArgBytes)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+#ifdef _TARGET_AMD64_
+ PRECONDITION(wArgBytes == 0);
+#endif
+
+ }
+ CONTRACTL_END;
+
+ if (wArgBytes == 0)
+ Emit8(0xc3);
+ else
+ {
+ Emit8(0xc2);
+ Emit16(wArgBytes);
+ }
+
+ Pop(wArgBytes);
+}
+
+#ifdef _TARGET_AMD64_
+//---------------------------------------------------------------
+// Emits:
+// JMP <ofs8> or
+// JMP <ofs32}
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitLeaRIP(CodeLabel *target, X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+ EmitLabelRef(target, reinterpret_cast<X64LeaRIP&>(gX64LeaRIP), reg);
+}
+#endif // _TARGET_AMD64_
+
+
+
+VOID StubLinkerCPU::X86EmitPushRegs(unsigned regSet)
+{
+ STANDARD_VM_CONTRACT;
+
+ for (X86Reg r = kEAX; r <= NumX86Regs; r = (X86Reg)(r+1))
+ if (regSet & (1U<<r))
+ {
+ X86EmitPushReg(r);
+ }
+}
+
+
+VOID StubLinkerCPU::X86EmitPopRegs(unsigned regSet)
+{
+ STANDARD_VM_CONTRACT;
+
+ for (X86Reg r = NumX86Regs; r >= kEAX; r = (X86Reg)(r-1))
+ if (regSet & (1U<<r))
+ X86EmitPopReg(r);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// mov <dstreg>, [<srcreg> + <ofs>]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexRegLoad(X86Reg dstreg,
+ X86Reg srcreg,
+ __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X86EmitOffsetModRM(0x8b, dstreg, srcreg, ofs);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// mov [<dstreg> + <ofs>],<srcreg>
+//
+// Note: If you intend to use this to perform 64bit moves to a RSP
+// based offset, then this method may not work. Consider
+// using X86EmitIndexRegStoreRSP.
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexRegStore(X86Reg dstreg,
+ __int32 ofs,
+ X86Reg srcreg)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (dstreg != kESP_Unsafe)
+ X86EmitOffsetModRM(0x89, srcreg, dstreg, ofs);
+ else
+ X86EmitOp(0x89, srcreg, (X86Reg)kESP_Unsafe, ofs);
+}
+
+#if defined(_TARGET_AMD64_)
+//---------------------------------------------------------------
+// Emits:
+// mov [RSP + <ofs>],<srcreg>
+//
+// It marks the instruction has 64bit so that the processor
+// performs a 8byte data move to a RSP based stack location.
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexRegStoreRSP(__int32 ofs,
+ X86Reg srcreg)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitOp(0x89, srcreg, (X86Reg)kESP_Unsafe, ofs, (X86Reg)0, 0, k64BitOp);
+}
+
+//---------------------------------------------------------------
+// Emits:
+// mov [R12 + <ofs>],<srcreg>
+//
+// It marks the instruction has 64bit so that the processor
+// performs a 8byte data move to a R12 based stack location.
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexRegStoreR12(__int32 ofs,
+ X86Reg srcreg)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitOp(0x89, srcreg, (X86Reg)kR12, ofs, (X86Reg)0, 0, k64BitOp);
+}
+#endif // defined(_TARGET_AMD64_)
+
+//---------------------------------------------------------------
+// Emits:
+// push dword ptr [<srcreg> + <ofs>]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexPush(X86Reg srcreg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ if(srcreg != kESP_Unsafe)
+ X86EmitOffsetModRM(0xff, (X86Reg)0x6, srcreg, ofs);
+ else
+ X86EmitOp(0xff,(X86Reg)0x6, srcreg, ofs);
+
+ Push(sizeof(void*));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// push dword ptr [<baseReg> + <indexReg>*<scale> + <ofs>]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitBaseIndexPush(
+ X86Reg baseReg,
+ X86Reg indexReg,
+ __int32 scale,
+ __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitOffsetModRmSIB(0xff, (X86Reg)0x6, baseReg, indexReg, scale, ofs);
+ Push(sizeof(void*));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// push dword ptr [ESP + <ofs>]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitSPIndexPush(__int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ __int8 ofs8 = (__int8) ofs;
+ if (ofs == (__int32) ofs8)
+ {
+ // The offset can be expressed in a byte (can use the byte
+ // form of the push esp instruction)
+
+ BYTE code[] = {0xff, 0x74, 0x24, ofs8};
+ EmitBytes(code, sizeof(code));
+ }
+ else
+ {
+ // The offset requires 4 bytes (need to use the long form
+ // of the push esp instruction)
+
+ BYTE code[] = {0xff, 0xb4, 0x24, 0x0, 0x0, 0x0, 0x0};
+ *(__int32 *)(&code[3]) = ofs;
+ EmitBytes(code, sizeof(code));
+ }
+
+ Push(sizeof(void*));
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// pop dword ptr [<srcreg> + <ofs>]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexPop(X86Reg srcreg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ if(srcreg != kESP_Unsafe)
+ X86EmitOffsetModRM(0x8f, (X86Reg)0x0, srcreg, ofs);
+ else
+ X86EmitOp(0x8f,(X86Reg)0x0, srcreg, ofs);
+
+ Pop(sizeof(void*));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// lea <dstreg>, [<srcreg> + <ofs>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexLea(X86Reg dstreg, X86Reg srcreg, __int32 ofs)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION((int) dstreg < NumX86Regs);
+ PRECONDITION((int) srcreg < NumX86Regs);
+ }
+ CONTRACTL_END;
+
+ X86EmitOffsetModRM(0x8d, dstreg, srcreg, ofs);
+}
+
+#if defined(_TARGET_AMD64_)
+VOID StubLinkerCPU::X86EmitIndexLeaRSP(X86Reg dstreg, X86Reg srcreg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitOp(0x8d, dstreg, (X86Reg)kESP_Unsafe, ofs, (X86Reg)0, 0, k64BitOp);
+}
+#endif // defined(_TARGET_AMD64_)
+
+//---------------------------------------------------------------
+// Emits:
+// sub esp, IMM
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitSubEsp(INT32 imm32)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (imm32 < 0x1000-100)
+ {
+ // As long as the esp size is less than 1 page plus a small
+ // safety fudge factor, we can just bump esp.
+ X86EmitSubEspWorker(imm32);
+ }
+ else
+ {
+ // Otherwise, must touch at least one byte for each page.
+ while (imm32 >= 0x1000)
+ {
+
+ X86EmitSubEspWorker(0x1000-4);
+ X86EmitPushReg(kEAX);
+
+ imm32 -= 0x1000;
+ }
+ if (imm32 < 500)
+ {
+ X86EmitSubEspWorker(imm32);
+ }
+ else
+ {
+ // If the remainder is large, touch the last byte - again,
+ // as a fudge factor.
+ X86EmitSubEspWorker(imm32-4);
+ X86EmitPushReg(kEAX);
+ }
+ }
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// sub esp, IMM
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitSubEspWorker(INT32 imm32)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // On Win32, stacks must be faulted in one page at a time.
+ PRECONDITION(imm32 < 0x1000);
+ }
+ CONTRACTL_END;
+
+ if (!imm32)
+ {
+ // nop
+ }
+ else
+ {
+ X86_64BitOperands();
+
+ if (FitsInI1(imm32))
+ {
+ Emit16(0xec83);
+ Emit8((INT8)imm32);
+ }
+ else
+ {
+ Emit16(0xec81);
+ Emit32(imm32);
+ }
+
+ Push(imm32);
+ }
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// add esp, IMM
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitAddEsp(INT32 imm32)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (!imm32)
+ {
+ // nop
+ }
+ else
+ {
+ X86_64BitOperands();
+
+ if (FitsInI1(imm32))
+ {
+ Emit16(0xc483);
+ Emit8((INT8)imm32);
+ }
+ else
+ {
+ Emit16(0xc481);
+ Emit32(imm32);
+ }
+ }
+ Pop(imm32);
+}
+
+VOID StubLinkerCPU::X86EmitAddReg(X86Reg reg, INT32 imm32)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION((int) reg < NumX86Regs);
+ }
+ CONTRACTL_END;
+
+ if (imm32 == 0)
+ return;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_OPCODE_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+#endif
+
+ if (FitsInI1(imm32)) {
+ Emit8(0x83);
+ Emit8(static_cast<UINT8>(0xC0 | reg));
+ Emit8(static_cast<UINT8>(imm32));
+ } else {
+ Emit8(0x81);
+ Emit8(static_cast<UINT8>(0xC0 | reg));
+ Emit32(imm32);
+ }
+}
+
+//---------------------------------------------------------------
+// Emits: add destReg, srcReg
+//---------------------------------------------------------------
+
+VOID StubLinkerCPU::X86EmitAddRegReg(X86Reg destReg, X86Reg srcReg)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitR2ROp(0x01, srcReg, destReg);
+}
+
+
+
+
+VOID StubLinkerCPU::X86EmitSubReg(X86Reg reg, INT32 imm32)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION((int) reg < NumX86Regs);
+ }
+ CONTRACTL_END;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_OPCODE_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+#endif
+
+ if (FitsInI1(imm32)) {
+ Emit8(0x83);
+ Emit8(static_cast<UINT8>(0xE8 | reg));
+ Emit8(static_cast<UINT8>(imm32));
+ } else {
+ Emit8(0x81);
+ Emit8(static_cast<UINT8>(0xE8 | reg));
+ Emit32(imm32);
+ }
+}
+
+//---------------------------------------------------------------
+// Emits: sub destReg, srcReg
+//---------------------------------------------------------------
+
+VOID StubLinkerCPU::X86EmitSubRegReg(X86Reg destReg, X86Reg srcReg)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitR2ROp(0x29, srcReg, destReg);
+}
+
+#if defined(_TARGET_AMD64_)
+
+//---------------------------------------------------------------
+// movdqa destXmmreg, srcXmmReg
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovXmmXmm(X86Reg destXmmreg, X86Reg srcXmmReg)
+{
+ STANDARD_VM_CONTRACT;
+ // There are several that could be used to mov xmm registers. MovAps is
+ // what C++ compiler uses so let's use it here too.
+ X86EmitR2ROp(X86_INSTR_MOVAPS_R_RM, destXmmreg, srcXmmReg, k32BitOp);
+}
+
+//---------------------------------------------------------------
+// movdqa XmmN, [baseReg + offset]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovdqaFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0x66, 0x6F, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// movdqa [baseReg + offset], XmmN
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovdqaToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0x66, 0x7F, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// movsd XmmN, [baseReg + offset]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovSDFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0xF2, 0x10, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// movsd [baseReg + offset], XmmN
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovSDToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0xF2, 0x11, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// movss XmmN, [baseReg + offset]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovSSFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0xF3, 0x10, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// movss [baseReg + offset], XmmN
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovSSToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0xF3, 0x11, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// Helper method for emitting of XMM from/to memory moves
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovXmmWorker(BYTE prefix, BYTE opcode, X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ BYTE codeBuffer[10];
+ unsigned int nBytes = 0;
+
+ // Setup the legacyPrefix for movsd
+ codeBuffer[nBytes++] = prefix;
+
+ // By default, assume we dont have to emit the REX byte.
+ bool fEmitRex = false;
+
+ BYTE rex = REX_PREFIX_BASE;
+
+ if (baseReg >= kR8)
+ {
+ rex |= REX_MODRM_RM_EXT;
+ baseReg = X86RegFromAMD64Reg(baseReg);
+ fEmitRex = true;
+ }
+ if (Xmmreg >= kXMM8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ Xmmreg = X86RegFromAMD64Reg(Xmmreg);
+ fEmitRex = true;
+ }
+
+ if (fEmitRex == true)
+ {
+ codeBuffer[nBytes++] = rex;
+ }
+
+ // Next, specify the two byte opcode - first byte is always 0x0F.
+ codeBuffer[nBytes++] = 0x0F;
+ codeBuffer[nBytes++] = opcode;
+
+ BYTE modrm = static_cast<BYTE>((Xmmreg << 3) | baseReg);
+ bool fOffsetFitsInSignedByte = FitsInI1(ofs)?true:false;
+
+ if (fOffsetFitsInSignedByte)
+ codeBuffer[nBytes++] = 0x40|modrm;
+ else
+ codeBuffer[nBytes++] = 0x80|modrm;
+
+ // If we are dealing with RSP or R12 as the baseReg, we need to emit the SIB byte.
+ if ((baseReg == (X86Reg)4 /*kRSP*/) || (baseReg == kR12))
+ {
+ codeBuffer[nBytes++] = 0x24;
+ }
+
+ // Finally, specify the offset
+ if (fOffsetFitsInSignedByte)
+ {
+ codeBuffer[nBytes++] = (BYTE)ofs;
+ }
+ else
+ {
+ *((__int32*)(codeBuffer+nBytes)) = ofs;
+ nBytes += 4;
+ }
+
+ _ASSERTE(nBytes <= _countof(codeBuffer));
+
+ // Lastly, emit the encoded bytes
+ EmitBytes(codeBuffer, nBytes);
+}
+
+#endif // defined(_TARGET_AMD64_)
+
+//---------------------------------------------------------------
+// Emits a MOD/RM for accessing a dword at [<indexreg> + ofs32]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitOffsetModRM(BYTE opcode, X86Reg opcodereg, X86Reg indexreg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ BYTE codeBuffer[7];
+ BYTE* code = codeBuffer;
+ int nBytes = 0;
+#ifdef _TARGET_AMD64_
+ code++;
+ //
+ // code points to base X86 instruction,
+ // codeBuffer points to full AMD64 instruction
+ //
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (indexreg >= kR8)
+ {
+ rex |= REX_MODRM_RM_EXT;
+ indexreg = X86RegFromAMD64Reg(indexreg);
+ }
+ if (opcodereg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ opcodereg = X86RegFromAMD64Reg(opcodereg);
+ }
+
+ nBytes++;
+ code[-1] = rex;
+#endif
+ code[0] = opcode;
+ nBytes++;
+ BYTE modrm = static_cast<BYTE>((opcodereg << 3) | indexreg);
+ if (ofs == 0 && indexreg != kEBP)
+ {
+ code[1] = modrm;
+ nBytes++;
+ EmitBytes(codeBuffer, nBytes);
+ }
+ else if (FitsInI1(ofs))
+ {
+ code[1] = 0x40|modrm;
+ code[2] = (BYTE)ofs;
+ nBytes += 2;
+ EmitBytes(codeBuffer, nBytes);
+ }
+ else
+ {
+ code[1] = 0x80|modrm;
+ *((__int32*)(2+code)) = ofs;
+ nBytes += 5;
+ EmitBytes(codeBuffer, nBytes);
+ }
+}
+
+//---------------------------------------------------------------
+// Emits a MOD/RM for accessing a dword at [<baseReg> + <indexReg>*<scale> + ofs32]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitOffsetModRmSIB(BYTE opcode, X86Reg opcodeOrReg, X86Reg baseReg, X86Reg indexReg, __int32 scale, __int32 ofs)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION(scale == 1 || scale == 2 || scale == 4 || scale == 8);
+ PRECONDITION(indexReg != kESP_Unsafe);
+ }
+ CONTRACTL_END;
+
+ BYTE codeBuffer[8];
+ BYTE* code = codeBuffer;
+ int nBytes = 0;
+
+#ifdef _TARGET_AMD64_
+ _ASSERTE(!"NYI");
+#endif
+ code[0] = opcode;
+ nBytes++;
+
+ BYTE scaleEnc = 0;
+ switch(scale)
+ {
+ case 1: scaleEnc = 0; break;
+ case 2: scaleEnc = 1; break;
+ case 4: scaleEnc = 2; break;
+ case 8: scaleEnc = 3; break;
+ default: _ASSERTE(!"Unexpected");
+ }
+
+ BYTE sib = static_cast<BYTE>((scaleEnc << 6) | (indexReg << 3) | baseReg);
+
+ if (FitsInI1(ofs))
+ {
+ code[1] = static_cast<BYTE>(0x44 | (opcodeOrReg << 3));
+ code[2] = sib;
+ code[3] = (BYTE)ofs;
+ nBytes += 3;
+ EmitBytes(codeBuffer, nBytes);
+ }
+ else
+ {
+ code[1] = static_cast<BYTE>(0x84 | (opcodeOrReg << 3));
+ code[2] = sib;
+ *(__int32*)(&code[3]) = ofs;
+ nBytes += 6;
+ EmitBytes(codeBuffer, nBytes);
+ }
+}
+
+
+
+VOID StubLinkerCPU::X86EmitRegLoad(X86Reg reg, UINT_PTR imm)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (!imm)
+ {
+ X86EmitZeroOutReg(reg);
+ return;
+ }
+
+ UINT cbimm = sizeof(void*);
+
+#ifdef _TARGET_AMD64_
+ // amd64 zero-extends all 32-bit operations. If the immediate will fit in
+ // 32 bits, use the smaller encoding.
+
+ if (reg >= kR8 || !FitsInU4(imm))
+ {
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ if (reg >= kR8)
+ {
+ rex |= REX_MODRM_RM_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+ }
+ else
+ {
+ // amd64 is little endian, so the &imm below will correctly read off
+ // the low 4 bytes.
+ cbimm = sizeof(UINT32);
+ }
+#endif // _TARGET_AMD64_
+ Emit8(0xB8 | (BYTE)reg);
+ EmitBytes((BYTE*)&imm, cbimm);
+}
+
+
+//---------------------------------------------------------------
+// Emits the most efficient form of the operation:
+//
+// opcode altreg, [basereg + scaledreg*scale + ofs]
+//
+// or
+//
+// opcode [basereg + scaledreg*scale + ofs], altreg
+//
+// (the opcode determines which comes first.)
+//
+//
+// Limitations:
+//
+// scale must be 0,1,2,4 or 8.
+// if scale == 0, scaledreg is ignored.
+// basereg and altreg may be equal to 4 (ESP) but scaledreg cannot
+// for some opcodes, "altreg" may actually select an operation
+// rather than a second register argument.
+// if basereg is EBP, scale must be 0.
+//
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitOp(WORD opcode,
+ X86Reg altreg,
+ X86Reg basereg,
+ __int32 ofs /*=0*/,
+ X86Reg scaledreg /*=0*/,
+ BYTE scale /*=0*/
+ AMD64_ARG(X86OperandSize OperandSize /*= k32BitOp*/))
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // All 2-byte opcodes start with 0x0f.
+ PRECONDITION(!(opcode >> 8) || (opcode & 0xff) == 0x0f);
+
+ PRECONDITION(scale == 0 || scale == 1 || scale == 2 || scale == 4 || scale == 8);
+ PRECONDITION(scaledreg != (X86Reg)4);
+ PRECONDITION(!(basereg == kEBP && scale != 0));
+
+ PRECONDITION( ((UINT)basereg) < NumX86Regs );
+ PRECONDITION( ((UINT)scaledreg) < NumX86Regs );
+ PRECONDITION( ((UINT)altreg) < NumX86Regs );
+ }
+ CONTRACTL_END;
+
+#ifdef _TARGET_AMD64_
+ if ( k64BitOp == OperandSize
+ || altreg >= kR8
+ || basereg >= kR8
+ || scaledreg >= kR8)
+ {
+ BYTE rex = REX_PREFIX_BASE;
+
+ if (k64BitOp == OperandSize)
+ rex |= REX_OPERAND_SIZE_64BIT;
+
+ if (altreg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ altreg = X86RegFromAMD64Reg(altreg);
+ }
+
+ if (basereg >= kR8)
+ {
+ // basereg might be in the modrm or sib fields. This will be
+ // decided below, but the encodings are the same either way.
+ _ASSERTE(REX_SIB_BASE_EXT == REX_MODRM_RM_EXT);
+ rex |= REX_SIB_BASE_EXT;
+ basereg = X86RegFromAMD64Reg(basereg);
+ }
+
+ if (scaledreg >= kR8)
+ {
+ rex |= REX_SIB_INDEX_EXT;
+ scaledreg = X86RegFromAMD64Reg(scaledreg);
+ }
+
+ Emit8(rex);
+ }
+#endif // _TARGET_AMD64_
+
+ BYTE modrmbyte = static_cast<BYTE>(altreg << 3);
+ BOOL fNeedSIB = FALSE;
+ BYTE SIBbyte = 0;
+ BYTE ofssize;
+ BYTE scaleselect= 0;
+
+ if (ofs == 0 && basereg != kEBP)
+ {
+ ofssize = 0; // Don't change this constant!
+ }
+ else if (FitsInI1(ofs))
+ {
+ ofssize = 1; // Don't change this constant!
+ }
+ else
+ {
+ ofssize = 2; // Don't change this constant!
+ }
+
+ switch (scale)
+ {
+ case 1: scaleselect = 0; break;
+ case 2: scaleselect = 1; break;
+ case 4: scaleselect = 2; break;
+ case 8: scaleselect = 3; break;
+ }
+
+ if (scale == 0 && basereg != (X86Reg)4 /*ESP*/)
+ {
+ // [basereg + ofs]
+ modrmbyte |= basereg | (ofssize << 6);
+ }
+ else if (scale == 0)
+ {
+ // [esp + ofs]
+ _ASSERTE(basereg == (X86Reg)4);
+ fNeedSIB = TRUE;
+ SIBbyte = 0044;
+
+ modrmbyte |= 4 | (ofssize << 6);
+ }
+ else
+ {
+
+ //[basereg + scaledreg*scale + ofs]
+
+ modrmbyte |= 0004 | (ofssize << 6);
+ fNeedSIB = TRUE;
+ SIBbyte = static_cast<BYTE>((scaleselect << 6) | (scaledreg << 3) | basereg);
+
+ }
+
+ //Some sanity checks:
+ _ASSERTE(!(fNeedSIB && basereg == kEBP)); // EBP not valid as a SIB base register.
+ _ASSERTE(!( (!fNeedSIB) && basereg == (X86Reg)4 )) ; // ESP addressing requires SIB byte
+
+ Emit8((BYTE)opcode);
+
+ if (opcode >> 8)
+ Emit8(opcode >> 8);
+
+ Emit8(modrmbyte);
+ if (fNeedSIB)
+ {
+ Emit8(SIBbyte);
+ }
+ switch (ofssize)
+ {
+ case 0: break;
+ case 1: Emit8( (__int8)ofs ); break;
+ case 2: Emit32( ofs ); break;
+ default: _ASSERTE(!"Can't get here.");
+ }
+}
+
+
+// Emits
+//
+// opcode altreg, modrmreg
+//
+// or
+//
+// opcode modrmreg, altreg
+//
+// (the opcode determines which one comes first)
+//
+// For single-operand opcodes, "altreg" actually selects
+// an operation rather than a register.
+
+VOID StubLinkerCPU::X86EmitR2ROp (WORD opcode,
+ X86Reg altreg,
+ X86Reg modrmreg
+ AMD64_ARG(X86OperandSize OperandSize /*= k64BitOp*/)
+ )
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // All 2-byte opcodes start with 0x0f.
+ PRECONDITION(!(opcode >> 8) || (opcode & 0xff) == 0x0f);
+
+ PRECONDITION( ((UINT)altreg) < NumX86Regs );
+ PRECONDITION( ((UINT)modrmreg) < NumX86Regs );
+ }
+ CONTRACTL_END;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = 0;
+
+ if (modrmreg >= kR8)
+ {
+ rex |= REX_MODRM_RM_EXT;
+ modrmreg = X86RegFromAMD64Reg(modrmreg);
+ }
+
+ if (altreg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ altreg = X86RegFromAMD64Reg(altreg);
+ }
+
+ if (k64BitOp == OperandSize)
+ rex |= REX_OPERAND_SIZE_64BIT;
+
+ if (rex)
+ Emit8(REX_PREFIX_BASE | rex);
+#endif // _TARGET_AMD64_
+
+ Emit8((BYTE)opcode);
+
+ if (opcode >> 8)
+ Emit8(opcode >> 8);
+
+ Emit8(static_cast<UINT8>(0300 | (altreg << 3) | modrmreg));
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// op altreg, [esp+ofs]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitEspOffset(BYTE opcode,
+ X86Reg altreg,
+ __int32 ofs
+ AMD64_ARG(X86OperandSize OperandSize /*= k64BitOp*/)
+ )
+{
+ STANDARD_VM_CONTRACT;
+
+ BYTE codeBuffer[8];
+ BYTE *code = codeBuffer;
+ int nBytes;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = 0;
+
+ if (k64BitOp == OperandSize)
+ rex |= REX_OPERAND_SIZE_64BIT;
+
+ if (altreg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ altreg = X86RegFromAMD64Reg(altreg);
+ }
+
+ if (rex)
+ {
+ *code = (REX_PREFIX_BASE | rex);
+ code++;
+ nBytes = 1;
+ }
+ else
+#endif // _TARGET_AMD64_
+ {
+ nBytes = 0;
+ }
+
+ code[0] = opcode;
+ BYTE modrm = static_cast<BYTE>((altreg << 3) | 004);
+ if (ofs == 0)
+ {
+ code[1] = modrm;
+ code[2] = 0044;
+ EmitBytes(codeBuffer, 3 + nBytes);
+ }
+ else if (FitsInI1(ofs))
+ {
+ code[1] = 0x40|modrm;
+ code[2] = 0044;
+ code[3] = (BYTE)ofs;
+ EmitBytes(codeBuffer, 4 + nBytes);
+ }
+ else
+ {
+ code[1] = 0x80|modrm;
+ code[2] = 0044;
+ *((__int32*)(3+code)) = ofs;
+ EmitBytes(codeBuffer, 7 + nBytes);
+ }
+
+}
+
+//---------------------------------------------------------------
+
+VOID StubLinkerCPU::X86EmitPushEBPframe()
+{
+ STANDARD_VM_CONTRACT;
+
+ // push ebp
+ X86EmitPushReg(kEBP);
+ // mov ebp,esp
+ X86EmitMovRegSP(kEBP);
+}
+
+#ifdef _DEBUG
+//---------------------------------------------------------------
+// Emits:
+// mov <reg32>,0xcccccccc
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitDebugTrashReg(X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_OPCODE_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+ Emit8(0xb8|reg);
+ Emit64(0xcccccccccccccccc);
+#else
+ Emit8(static_cast<UINT8>(0xb8 | reg));
+ Emit32(0xcccccccc);
+#endif
+}
+#endif //_DEBUG
+
+
+// Get X86Reg indexes of argument registers based on offset into ArgumentRegister
+X86Reg GetX86ArgumentRegisterFromOffset(size_t ofs)
+{
+ CONTRACT(X86Reg)
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+
+ }
+ CONTRACT_END;
+
+ #define ARGUMENT_REGISTER(reg) if (ofs == offsetof(ArgumentRegisters, reg)) RETURN k##reg ;
+ ENUM_ARGUMENT_REGISTERS();
+ #undef ARGUMENT_REGISTER
+
+ _ASSERTE(0);//Can't get here.
+ RETURN kEBP;
+}
+
+
+#ifdef _TARGET_AMD64_
+static const X86Reg c_argRegs[] = {
+ #define ARGUMENT_REGISTER(regname) k##regname,
+ ENUM_ARGUMENT_REGISTERS()
+ #undef ARGUMENT_REGISTER
+};
+#endif
+
+
+#ifndef CROSSGEN_COMPILE
+
+#if defined(_DEBUG) && (defined(_TARGET_AMD64_) || defined(_TARGET_X86_)) && !defined(FEATURE_PAL)
+void StubLinkerCPU::EmitJITHelperLoggingThunk(PCODE pJitHelper, LPVOID helperFuncCount)
+{
+ STANDARD_VM_CONTRACT;
+
+ VMHELPCOUNTDEF* pHelperFuncCount = (VMHELPCOUNTDEF*)helperFuncCount;
+/*
+ push rcx
+ mov rcx, &(pHelperFuncCount->count)
+ lock inc [rcx]
+ pop rcx
+#ifdef _TARGET_AMD64_
+ mov rax, <pJitHelper>
+ jmp rax
+#else
+ jmp <pJitHelper>
+#endif
+*/
+
+ // push rcx
+ // mov rcx, &(pHelperFuncCount->count)
+ X86EmitPushReg(kECX);
+ X86EmitRegLoad(kECX, (UINT_PTR)(&(pHelperFuncCount->count)));
+
+ // lock inc [rcx]
+ BYTE lock_inc_RCX[] = { 0xf0, 0xff, 0x01 };
+ EmitBytes(lock_inc_RCX, sizeof(lock_inc_RCX));
+
+#if defined(_TARGET_AMD64_)
+ // mov rax, <pJitHelper>
+ // pop rcx
+ // jmp rax
+#else
+ // pop rcx
+ // jmp <pJitHelper>
+#endif
+ X86EmitTailcallWithSinglePop(NewExternalCodeLabel(pJitHelper), kECX);
+}
+#endif // _DEBUG && (_TARGET_AMD64_ || _TARGET_X86_) && !FEATURE_PAL
+
+#ifndef FEATURE_IMPLICIT_TLS
+//---------------------------------------------------------------
+// Emit code to store the current Thread structure in dstreg
+// preservedRegSet is a set of registers to be preserved
+// TRASHES EAX, EDX, ECX unless they are in preservedRegSet.
+// RESULTS dstreg = current Thread
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitTLSFetch(DWORD idx, X86Reg dstreg, unsigned preservedRegSet)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // It doesn't make sense to have the destination register be preserved
+ PRECONDITION((preservedRegSet & (1<<dstreg)) == 0);
+ AMD64_ONLY(PRECONDITION(dstreg < 8)); // code below doesn't support high registers
+ }
+ CONTRACTL_END;
+
+ TLSACCESSMODE mode = GetTLSAccessMode(idx);
+
+#ifdef _DEBUG
+ {
+ static BOOL f = TRUE;
+ f = !f;
+ if (f)
+ {
+ mode = TLSACCESS_GENERIC;
+ }
+ }
+#endif
+
+ switch (mode)
+ {
+ case TLSACCESS_WNT:
+ {
+ unsigned __int32 tlsofs = offsetof(TEB, TlsSlots) + (idx * sizeof(void*));
+#ifdef _TARGET_AMD64_
+ BYTE code[] = {0x65,0x48,0x8b,0x04,0x25}; // mov dstreg, qword ptr gs:[IMM32]
+ static const int regByteIndex = 3;
+#elif defined(_TARGET_X86_)
+ BYTE code[] = {0x64,0x8b,0x05}; // mov dstreg, dword ptr fs:[IMM32]
+ static const int regByteIndex = 2;
+#endif
+ code[regByteIndex] |= (dstreg << 3);
+
+ EmitBytes(code, sizeof(code));
+ Emit32(tlsofs);
+ }
+ break;
+
+ case TLSACCESS_GENERIC:
+
+ X86EmitPushRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+ X86EmitPushImm32(idx);
+#ifdef _TARGET_AMD64_
+ X86EmitPopReg (kECX); // arg in reg
+#endif
+
+ // call TLSGetValue
+ X86EmitCall(NewExternalCodeLabel((LPVOID) TlsGetValue), sizeof(void*));
+
+ // mov dstreg, eax
+ X86EmitMovRegReg(dstreg, kEAX);
+
+ X86EmitPopRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+ break;
+
+ default:
+ _ASSERTE(0);
+ }
+
+#ifdef _DEBUG
+ // Trash caller saved regs that we were not told to preserve, and that aren't the dstreg.
+ preservedRegSet |= 1<<dstreg;
+ if (!(preservedRegSet & (1<<kEAX)))
+ X86EmitDebugTrashReg(kEAX);
+ if (!(preservedRegSet & (1<<kEDX)))
+ X86EmitDebugTrashReg(kEDX);
+ if (!(preservedRegSet & (1<<kECX)))
+ X86EmitDebugTrashReg(kECX);
+#endif
+
+}
+#endif // FEATURE_IMPLICIT_TLS
+
+VOID StubLinkerCPU::X86EmitCurrentThreadFetch(X86Reg dstreg, unsigned preservedRegSet)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // It doesn't make sense to have the destination register be preserved
+ PRECONDITION((preservedRegSet & (1<<dstreg)) == 0);
+ AMD64_ONLY(PRECONDITION(dstreg < 8)); // code below doesn't support high registers
+ }
+ CONTRACTL_END;
+
+#ifdef FEATURE_IMPLICIT_TLS
+
+ X86EmitPushRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+ //TODO: Inline the instruction instead of a call
+ // call GetThread
+ X86EmitCall(NewExternalCodeLabel((LPVOID) GetThread), sizeof(void*));
+
+ // mov dstreg, eax
+ X86EmitMovRegReg(dstreg, kEAX);
+
+ X86EmitPopRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+#ifdef _DEBUG
+ // Trash caller saved regs that we were not told to preserve, and that aren't the dstreg.
+ preservedRegSet |= 1<<dstreg;
+ if (!(preservedRegSet & (1<<kEAX)))
+ X86EmitDebugTrashReg(kEAX);
+ if (!(preservedRegSet & (1<<kEDX)))
+ X86EmitDebugTrashReg(kEDX);
+ if (!(preservedRegSet & (1<<kECX)))
+ X86EmitDebugTrashReg(kECX);
+#endif // _DEBUG
+
+#else // FEATURE_IMPLICIT_TLS
+
+ X86EmitTLSFetch(GetThreadTLSIndex(), dstreg, preservedRegSet);
+
+#endif // FEATURE_IMPLICIT_TLS
+
+}
+
+VOID StubLinkerCPU::X86EmitCurrentAppDomainFetch(X86Reg dstreg, unsigned preservedRegSet)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // It doesn't make sense to have the destination register be preserved
+ PRECONDITION((preservedRegSet & (1<<dstreg)) == 0);
+ AMD64_ONLY(PRECONDITION(dstreg < 8)); // code below doesn't support high registers
+ }
+ CONTRACTL_END;
+
+#ifdef FEATURE_IMPLICIT_TLS
+ X86EmitPushRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+ //TODO: Inline the instruction instead of a call
+ // call GetThread
+ X86EmitCall(NewExternalCodeLabel((LPVOID) GetAppDomain), sizeof(void*));
+
+ // mov dstreg, eax
+ X86EmitMovRegReg(dstreg, kEAX);
+
+ X86EmitPopRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+#ifdef _DEBUG
+ // Trash caller saved regs that we were not told to preserve, and that aren't the dstreg.
+ preservedRegSet |= 1<<dstreg;
+ if (!(preservedRegSet & (1<<kEAX)))
+ X86EmitDebugTrashReg(kEAX);
+ if (!(preservedRegSet & (1<<kEDX)))
+ X86EmitDebugTrashReg(kEDX);
+ if (!(preservedRegSet & (1<<kECX)))
+ X86EmitDebugTrashReg(kECX);
+#endif
+
+#else // FEATURE_IMPLICIT_TLS
+
+ X86EmitTLSFetch(GetAppDomainTLSIndex(), dstreg, preservedRegSet);
+
+#endif // FEATURE_IMPLICIT_TLS
+}
+
+#ifdef _TARGET_X86_
+
+#ifdef PROFILING_SUPPORTED
+VOID StubLinkerCPU::EmitProfilerComCallProlog(TADDR pFrameVptr, X86Reg regFrame)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // Load the methoddesc into ECX (UMThkCallFrame->m_pvDatum->m_pMD)
+ X86EmitIndexRegLoad(kECX, regFrame, UMThkCallFrame::GetOffsetOfDatum());
+ X86EmitIndexRegLoad(kECX, kECX, UMEntryThunk::GetOffsetOfMethodDesc());
+
+ // Push arguments and notify profiler
+ X86EmitPushImm32(COR_PRF_TRANSITION_CALL); // Reason
+ X86EmitPushReg(kECX); // MethodDesc*
+ X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerUnmanagedToManagedTransitionMD), 2*sizeof(void*));
+ }
+
+#ifdef FEATURE_COMINTEROP
+ else if (pFrameVptr == ComMethodFrame::GetMethodFrameVPtr())
+ {
+ // Load the methoddesc into ECX (Frame->m_pvDatum->m_pMD)
+ X86EmitIndexRegLoad(kECX, regFrame, ComMethodFrame::GetOffsetOfDatum());
+ X86EmitIndexRegLoad(kECX, kECX, ComCallMethodDesc::GetOffsetOfMethodDesc());
+
+ // Push arguments and notify profiler
+ X86EmitPushImm32(COR_PRF_TRANSITION_CALL); // Reason
+ X86EmitPushReg(kECX); // MethodDesc*
+ X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerUnmanagedToManagedTransitionMD), 2*sizeof(void*));
+ }
+#endif // FEATURE_COMINTEROP
+
+ // Unrecognized frame vtbl
+ else
+ {
+ _ASSERTE(!"Unrecognized vtble passed to EmitComMethodStubProlog with profiling turned on.");
+ }
+}
+
+
+VOID StubLinkerCPU::EmitProfilerComCallEpilog(TADDR pFrameVptr, X86Reg regFrame)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+#ifdef FEATURE_COMINTEROP
+ PRECONDITION(pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr() || pFrameVptr == ComMethodFrame::GetMethodFrameVPtr());
+#else
+ PRECONDITION(pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr());
+#endif // FEATURE_COMINTEROP
+ }
+ CONTRACTL_END;
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // Load the methoddesc into ECX (UMThkCallFrame->m_pvDatum->m_pMD)
+ X86EmitIndexRegLoad(kECX, regFrame, UMThkCallFrame::GetOffsetOfDatum());
+ X86EmitIndexRegLoad(kECX, kECX, UMEntryThunk::GetOffsetOfMethodDesc());
+
+ // Push arguments and notify profiler
+ X86EmitPushImm32(COR_PRF_TRANSITION_RETURN); // Reason
+ X86EmitPushReg(kECX); // MethodDesc*
+ X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerManagedToUnmanagedTransitionMD), 2*sizeof(void*));
+ }
+
+#ifdef FEATURE_COMINTEROP
+ else if (pFrameVptr == ComMethodFrame::GetMethodFrameVPtr())
+ {
+ // Load the methoddesc into ECX (Frame->m_pvDatum->m_pMD)
+ X86EmitIndexRegLoad(kECX, regFrame, ComMethodFrame::GetOffsetOfDatum());
+ X86EmitIndexRegLoad(kECX, kECX, ComCallMethodDesc::GetOffsetOfMethodDesc());
+
+ // Push arguments and notify profiler
+ X86EmitPushImm32(COR_PRF_TRANSITION_RETURN); // Reason
+ X86EmitPushReg(kECX); // MethodDesc*
+ X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerManagedToUnmanagedTransitionMD), 2*sizeof(void*));
+ }
+#endif // FEATURE_COMINTEROP
+
+ // Unrecognized frame vtbl
+ else
+ {
+ _ASSERTE(!"Unrecognized vtble passed to EmitComMethodStubEpilog with profiling turned on.");
+ }
+}
+#endif // PROFILING_SUPPORTED
+
+
+//========================================================================
+// Prolog for entering managed code from COM
+// pushes the appropriate frame ptr
+// sets up a thread and returns a label that needs to be emitted by the caller
+// At the end:
+// ESI will hold the pointer to the ComMethodFrame or UMThkCallFrame
+// EBX will hold the result of GetThread()
+// EDI will hold the previous Frame ptr
+
+void StubLinkerCPU::EmitComMethodStubProlog(TADDR pFrameVptr,
+ CodeLabel** rgRareLabels,
+ CodeLabel** rgRejoinLabels,
+ BOOL bShouldProfile)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ PRECONDITION(rgRareLabels != NULL);
+ PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
+ PRECONDITION(rgRejoinLabels != NULL);
+ PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
+ }
+ CONTRACTL_END;
+
+ // push ebp ;; save callee-saved register
+ // push ebx ;; save callee-saved register
+ // push esi ;; save callee-saved register
+ // push edi ;; save callee-saved register
+ X86EmitPushEBPframe();
+
+ X86EmitPushReg(kEBX);
+ X86EmitPushReg(kESI);
+ X86EmitPushReg(kEDI);
+
+ // push eax ; datum
+ X86EmitPushReg(kEAX);
+
+ // push edx ;leave room for m_next (edx is an arbitrary choice)
+ X86EmitPushReg(kEDX);
+
+ // push IMM32 ; push Frame vptr
+ X86EmitPushImmPtr((LPVOID) pFrameVptr);
+
+ X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
+
+ // lea esi, [esp+4] ;; set ESI -> new frame
+ X86EmitEspOffset(0x8d, kESI, 4); // lea ESI, [ESP+4]
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // Preserve argument registers for thiscall/fastcall
+ X86EmitPushReg(kECX);
+ X86EmitPushReg(kEDX);
+ }
+
+ // Emit Setup thread
+ EmitSetup(rgRareLabels[0]); // rareLabel for rare setup
+ EmitLabel(rgRejoinLabels[0]); // rejoin label for rare setup
+
+#ifdef PROFILING_SUPPORTED
+ // If profiling is active, emit code to notify profiler of transition
+ // Must do this before preemptive GC is disabled, so no problem if the
+ // profiler blocks.
+ if (CORProfilerTrackTransitions() && bShouldProfile)
+ {
+ EmitProfilerComCallProlog(pFrameVptr, /*Frame*/ kESI);
+ }
+#endif // PROFILING_SUPPORTED
+
+ //-----------------------------------------------------------------------
+ // Generate the inline part of disabling preemptive GC. It is critical
+ // that this part happen before we link in the frame. That's because
+ // we won't be able to unlink the frame from preemptive mode. And during
+ // shutdown, we cannot switch to cooperative mode under some circumstances
+ //-----------------------------------------------------------------------
+ EmitDisable(rgRareLabels[1], /*fCallIn=*/TRUE, kEBX); // rare disable gc
+ EmitLabel(rgRejoinLabels[1]); // rejoin for rare disable gc
+
+ // If we take an SO after installing the new frame but before getting the exception
+ // handlers in place, we will have a corrupt frame stack. So probe-by-touch first for
+ // sufficient stack space to erect the handler. Because we know we will be touching
+ // that stack right away when install the handler, this probe-by-touch will not incur
+ // unnecessary cache misses. And this allows us to do the probe with one instruction.
+
+ // Note that for Win64, the personality routine will handle unlinking the frame, so
+ // we don't need to probe in the Win64 stubs. The exception is ComToCLRWorker
+ // where we don't setup a personality routine. However, we push the frame inside
+ // that function and it is probe-protected with an entry point probe first, so we are
+ // OK there too.
+
+ // We push two registers to setup the EH handler and none to setup the frame
+ // so probe for double that to give ourselves a small margin for error.
+ // mov eax, [esp+n] ;; probe for sufficient stack to setup EH
+ X86EmitEspOffset(0x8B, kEAX, -0x20);
+ // mov edi,[ebx + Thread.GetFrame()] ;; get previous frame
+ X86EmitIndexRegLoad(kEDI, kEBX, Thread::GetOffsetOfCurrentFrame());
+
+ // mov [esi + Frame.m_next], edi
+ X86EmitIndexRegStore(kESI, Frame::GetOffsetOfNextLink(), kEDI);
+
+ // mov [ebx + Thread.GetFrame()], esi
+ X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kESI);
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // push UnmanagedToManagedExceptHandler
+ X86EmitPushImmPtr((LPVOID)UMThunkPrestubHandler);
+
+ // mov eax, fs:[0]
+ static const BYTE codeSEH1[] = { 0x64, 0xA1, 0x0, 0x0, 0x0, 0x0};
+ EmitBytes(codeSEH1, sizeof(codeSEH1));
+
+ // push eax
+ X86EmitPushReg(kEAX);
+
+ // mov dword ptr fs:[0], esp
+ static const BYTE codeSEH2[] = { 0x64, 0x89, 0x25, 0x0, 0x0, 0x0, 0x0};
+ EmitBytes(codeSEH2, sizeof(codeSEH2));
+ }
+
+#if _DEBUG
+ if (Frame::ShouldLogTransitions())
+ {
+ // call LogTransition
+ X86EmitPushReg(kESI);
+ X86EmitCall(NewExternalCodeLabel((LPVOID) Frame::LogTransition), sizeof(void*));
+ }
+#endif
+}
+
+//========================================================================
+// Epilog for stubs that enter managed code from COM
+//
+// At this point of the stub, the state should be as follows:
+// ESI holds the ComMethodFrame or UMThkCallFrame ptr
+// EBX holds the result of GetThread()
+// EDI holds the previous Frame ptr
+//
+void StubLinkerCPU::EmitComMethodStubEpilog(TADDR pFrameVptr,
+ CodeLabel** rgRareLabels,
+ CodeLabel** rgRejoinLabels,
+ BOOL bShouldProfile)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ PRECONDITION(rgRareLabels != NULL);
+ PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
+ PRECONDITION(rgRejoinLabels != NULL);
+ PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
+ }
+ CONTRACTL_END;
+
+ EmitCheckGSCookie(kESI, UnmanagedToManagedFrame::GetOffsetOfGSCookie());
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // if we are using exceptions, unlink the SEH
+ // mov ecx,[esp] ;;pointer to the next exception record
+ X86EmitEspOffset(0x8b, kECX, 0);
+
+ // mov dword ptr fs:[0], ecx
+ static const BYTE codeSEH[] = { 0x64, 0x89, 0x0D, 0x0, 0x0, 0x0, 0x0 };
+ EmitBytes(codeSEH, sizeof(codeSEH));
+
+ X86EmitAddEsp(sizeof(EXCEPTION_REGISTRATION_RECORD));
+ }
+
+ // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
+ X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
+
+ //-----------------------------------------------------------------------
+ // Generate the inline part of disabling preemptive GC
+ //-----------------------------------------------------------------------
+ EmitEnable(rgRareLabels[2]); // rare gc
+ EmitLabel(rgRejoinLabels[2]); // rejoin for rare gc
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // Restore argument registers for thiscall/fastcall
+ X86EmitPopReg(kEDX);
+ X86EmitPopReg(kECX);
+ }
+
+ // add esp, popstack
+ X86EmitAddEsp(sizeof(GSCookie) + UnmanagedToManagedFrame::GetOffsetOfCalleeSavedRegisters());
+
+ // pop edi ; restore callee-saved registers
+ // pop esi
+ // pop ebx
+ // pop ebp
+ X86EmitPopReg(kEDI);
+ X86EmitPopReg(kESI);
+ X86EmitPopReg(kEBX);
+ X86EmitPopReg(kEBP);
+
+ // jmp eax //reexecute!
+ X86EmitR2ROp(0xff, (X86Reg)4, kEAX);
+
+ // ret
+ // This will never be executed. It is just to help out stack-walking logic
+ // which disassembles the epilog to unwind the stack. A "ret" instruction
+ // indicates that no more code needs to be disassembled, if the stack-walker
+ // keeps on going past the previous "jmp eax".
+ X86EmitReturn(0);
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of enabling preemptive GC - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[2]); // label for rare enable gc
+ EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of disabling preemptive GC - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[1]); // label for rare disable gc
+ EmitRareDisable(rgRejoinLabels[1]); // emit rare disable gc
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of setup thread - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[0]); // label for rare setup thread
+ EmitRareSetup(rgRejoinLabels[0], /*fThrow*/ TRUE); // emit rare setup thread
+}
+
+//---------------------------------------------------------------
+// Emit code to store the setup current Thread structure in eax.
+// TRASHES eax,ecx&edx.
+// RESULTS ebx = current Thread
+//---------------------------------------------------------------
+VOID StubLinkerCPU::EmitSetup(CodeLabel *pForwardRef)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef FEATURE_IMPLICIT_TLS
+ DWORD idx = 0;
+ TLSACCESSMODE mode = TLSACCESS_GENERIC;
+#else
+ DWORD idx = GetThreadTLSIndex();
+ TLSACCESSMODE mode = GetTLSAccessMode(idx);
+#endif
+
+#ifdef _DEBUG
+ {
+ static BOOL f = TRUE;
+ f = !f;
+ if (f)
+ {
+ mode = TLSACCESS_GENERIC;
+ }
+ }
+#endif
+
+ switch (mode)
+ {
+ case TLSACCESS_WNT:
+ {
+ unsigned __int32 tlsofs = offsetof(TEB, TlsSlots) + (idx * sizeof(void*));
+
+ static const BYTE code[] = {0x64,0x8b,0x1d}; // mov ebx, dword ptr fs:[IMM32]
+ EmitBytes(code, sizeof(code));
+ Emit32(tlsofs);
+ }
+ break;
+
+ case TLSACCESS_GENERIC:
+#ifdef FEATURE_IMPLICIT_TLS
+ X86EmitCall(NewExternalCodeLabel((LPVOID) GetThread), sizeof(void*));
+#else
+ X86EmitPushImm32(idx);
+
+ // call TLSGetValue
+ X86EmitCall(NewExternalCodeLabel((LPVOID) TlsGetValue), sizeof(void*));
+#endif
+ // mov ebx,eax
+ Emit16(0xc389);
+ break;
+ default:
+ _ASSERTE(0);
+ }
+
+ // cmp ebx, 0
+ static const BYTE b[] = { 0x83, 0xFB, 0x0};
+
+ EmitBytes(b, sizeof(b));
+
+ // jz RarePath
+ X86EmitCondJump(pForwardRef, X86CondCode::kJZ);
+
+#ifdef _DEBUG
+ X86EmitDebugTrashReg(kECX);
+ X86EmitDebugTrashReg(kEDX);
+#endif
+
+}
+
+VOID StubLinkerCPU::EmitRareSetup(CodeLabel *pRejoinPoint, BOOL fThrow)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifndef FEATURE_COMINTEROP
+ _ASSERTE(fThrow);
+#else // !FEATURE_COMINTEROP
+ if (!fThrow)
+ {
+ X86EmitPushReg(kESI);
+ X86EmitCall(NewExternalCodeLabel((LPVOID) CreateThreadBlockReturnHr), sizeof(void*));
+ }
+ else
+#endif // !FEATURE_COMINTEROP
+ {
+ X86EmitCall(NewExternalCodeLabel((LPVOID) CreateThreadBlockThrow), 0);
+ }
+
+ // mov ebx,eax
+ Emit16(0xc389);
+ X86EmitNearJump(pRejoinPoint);
+}
+
+//========================================================================
+#endif // _TARGET_X86_
+//========================================================================
+#if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
+//========================================================================
+// Epilog for stubs that enter managed code from COM
+//
+// On entry, ESI points to the Frame
+// ESP points to below FramedMethodFrame::m_vc5Frame
+// EBX hold GetThread()
+// EDI holds the previous Frame
+
+void StubLinkerCPU::EmitSharedComMethodStubEpilog(TADDR pFrameVptr,
+ CodeLabel** rgRareLabels,
+ CodeLabel** rgRejoinLabels,
+ unsigned offsetRetThunk,
+ BOOL bShouldProfile)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ PRECONDITION(rgRareLabels != NULL);
+ PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
+ PRECONDITION(rgRejoinLabels != NULL);
+ PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
+ }
+ CONTRACTL_END;
+
+ CodeLabel *NoEntryLabel;
+ NoEntryLabel = NewCodeLabel();
+
+ EmitCheckGSCookie(kESI, UnmanagedToManagedFrame::GetOffsetOfGSCookie());
+
+ // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
+ X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
+
+ //-----------------------------------------------------------------------
+ // Generate the inline part of enabling preemptive GC
+ //-----------------------------------------------------------------------
+ EmitLabel(NoEntryLabel); // need to enable preemp mode even when we fail the disable as rare disable will return in coop mode
+
+ EmitEnable(rgRareLabels[2]); // rare enable gc
+ EmitLabel(rgRejoinLabels[2]); // rejoin for rare enable gc
+
+#ifdef PROFILING_SUPPORTED
+ // If profiling is active, emit code to notify profiler of transition
+ if (CORProfilerTrackTransitions() && bShouldProfile)
+ {
+ // Save return value
+ X86EmitPushReg(kEAX);
+ X86EmitPushReg(kEDX);
+
+ EmitProfilerComCallEpilog(pFrameVptr, kESI);
+
+ // Restore return value
+ X86EmitPopReg(kEDX);
+ X86EmitPopReg(kEAX);
+ }
+#endif // PROFILING_SUPPORTED
+
+ X86EmitAddEsp(sizeof(GSCookie) + UnmanagedToManagedFrame::GetOffsetOfDatum());
+
+ // pop ecx
+ X86EmitPopReg(kECX); // pop the MethodDesc*
+
+ // pop edi ; restore callee-saved registers
+ // pop esi
+ // pop ebx
+ // pop ebp
+ X86EmitPopReg(kEDI);
+ X86EmitPopReg(kESI);
+ X86EmitPopReg(kEBX);
+ X86EmitPopReg(kEBP);
+
+ // add ecx, offsetRetThunk
+ X86EmitAddReg(kECX, offsetRetThunk);
+
+ // jmp ecx
+ // This will jump to the "ret cbStackArgs" instruction in COMMETHOD_PREPAD.
+ static const BYTE bjmpecx[] = { 0xff, 0xe1 };
+ EmitBytes(bjmpecx, sizeof(bjmpecx));
+
+ // ret
+ // This will never be executed. It is just to help out stack-walking logic
+ // which disassembles the epilog to unwind the stack. A "ret" instruction
+ // indicates that no more code needs to be disassembled, if the stack-walker
+ // keeps on going past the previous "jmp ecx".
+ X86EmitReturn(0);
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of enabling preemptive GC - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[2]); // label for rare enable gc
+ EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of disabling preemptive GC - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[1]); // label for rare disable gc
+ EmitRareDisableHRESULT(rgRejoinLabels[1], NoEntryLabel);
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of setup thread - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[0]); // label for rare setup thread
+ EmitRareSetup(rgRejoinLabels[0],/*fThrow*/ FALSE); // emit rare setup thread
+}
+
+//========================================================================
+#endif // defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
+
+#ifndef FEATURE_STUBS_AS_IL
+/*==============================================================================
+ Pushes a TransitionFrame on the stack
+ If you make any changes to the prolog instruction sequence, be sure
+ to update UpdateRegdisplay, too!! This service should only be called from
+ within the runtime. It should not be called for any unmanaged -> managed calls in.
+
+ At the end of the generated prolog stub code:
+ pFrame is in ESI/RSI.
+ the previous pFrame is in EDI/RDI
+ The current Thread* is in EBX/RBX.
+ For x86, ESP points to TransitionFrame
+ For amd64, ESP points to the space reserved for the outgoing argument registers
+*/
+
+VOID StubLinkerCPU::EmitMethodStubProlog(TADDR pFrameVptr, int transitionBlockOffset)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ X86EmitPushReg(kR15); // CalleeSavedRegisters
+ X86EmitPushReg(kR14);
+ X86EmitPushReg(kR13);
+ X86EmitPushReg(kR12);
+ X86EmitPushReg(kRBP);
+ X86EmitPushReg(kRBX);
+ X86EmitPushReg(kRSI);
+ X86EmitPushReg(kRDI);
+
+ // Push m_datum
+ X86EmitPushReg(SCRATCH_REGISTER_X86REG);
+
+ // push edx ;leave room for m_next (edx is an arbitrary choice)
+ X86EmitPushReg(kEDX);
+
+ // push Frame vptr
+ X86EmitPushImmPtr((LPVOID) pFrameVptr);
+
+ // mov rsi, rsp
+ X86EmitR2ROp(0x8b, kRSI, (X86Reg)4 /*kESP*/);
+ UnwindSetFramePointer(kRSI);
+
+ // Save ArgumentRegisters
+ #define ARGUMENT_REGISTER(regname) X86EmitRegSave(k##regname, SecureDelegateFrame::GetOffsetOfTransitionBlock() + \
+ sizeof(TransitionBlock) + offsetof(ArgumentRegisters, regname));
+ ENUM_ARGUMENT_REGISTERS();
+ #undef ARGUMENT_REGISTER
+
+ _ASSERTE(((Frame*)&pFrameVptr)->GetGSCookiePtr() == PTR_GSCookie(PBYTE(&pFrameVptr) - sizeof(GSCookie)));
+ X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
+
+ // sub rsp, 4*sizeof(void*) ;; allocate callee scratch area and ensure rsp is 16-byte-aligned
+ const INT32 padding = sizeof(ArgumentRegisters) + ((sizeof(FramedMethodFrame) % (2 * sizeof(LPVOID))) ? 0 : sizeof(LPVOID));
+ X86EmitSubEsp(padding);
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_X86_
+ // push ebp ;; save callee-saved register
+ // mov ebp,esp
+ // push ebx ;; save callee-saved register
+ // push esi ;; save callee-saved register
+ // push edi ;; save callee-saved register
+ X86EmitPushEBPframe();
+
+ X86EmitPushReg(kEBX);
+ X86EmitPushReg(kESI);
+ X86EmitPushReg(kEDI);
+
+ // Push & initialize ArgumentRegisters
+ #define ARGUMENT_REGISTER(regname) X86EmitPushReg(k##regname);
+ ENUM_ARGUMENT_REGISTERS();
+ #undef ARGUMENT_REGISTER
+
+ // Push m_datum
+ X86EmitPushReg(kEAX);
+
+ // push edx ;leave room for m_next (edx is an arbitrary choice)
+ X86EmitPushReg(kEDX);
+
+ // push Frame vptr
+ X86EmitPushImmPtr((LPVOID) pFrameVptr);
+
+ // mov esi,esp
+ X86EmitMovRegSP(kESI);
+
+ X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
+#endif // _TARGET_X86_
+
+ // ebx <-- GetThread()
+ // Trashes X86TLSFetch_TRASHABLE_REGS
+ X86EmitCurrentThreadFetch(kEBX, 0);
+
+#if _DEBUG
+
+ // call ObjectRefFlush
+#ifdef _TARGET_AMD64_
+
+ // mov rcx, rbx
+ X86EmitR2ROp(0x8b, kECX, kEBX); // arg in reg
+
+#else // !_TARGET_AMD64_
+ X86EmitPushReg(kEBX); // arg on stack
+#endif // _TARGET_AMD64_
+
+ // Make the call
+ X86EmitCall(NewExternalCodeLabel((LPVOID) Thread::ObjectRefFlush), sizeof(void*));
+
+#endif // _DEBUG
+
+ // mov edi,[ebx + Thread.GetFrame()] ;; get previous frame
+ X86EmitIndexRegLoad(kEDI, kEBX, Thread::GetOffsetOfCurrentFrame());
+
+ // mov [esi + Frame.m_next], edi
+ X86EmitIndexRegStore(kESI, Frame::GetOffsetOfNextLink(), kEDI);
+
+ // mov [ebx + Thread.GetFrame()], esi
+ X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kESI);
+
+#if _DEBUG
+
+ if (Frame::ShouldLogTransitions())
+ {
+ // call LogTransition
+#ifdef _TARGET_AMD64_
+
+ // mov rcx, rsi
+ X86EmitR2ROp(0x8b, kECX, kESI); // arg in reg
+
+#else // !_TARGET_AMD64_
+ X86EmitPushReg(kESI); // arg on stack
+#endif // _TARGET_AMD64_
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) Frame::LogTransition), sizeof(void*));
+
+#ifdef _TARGET_AMD64_
+ // Reload parameter registers
+ // mov r, [esp+offs]
+ #define ARGUMENT_REGISTER(regname) X86EmitEspOffset(0x8b, k##regname, sizeof(ArgumentRegisters) + \
+ sizeof(TransitionFrame) + offsetof(ArgumentRegisters, regname));
+ ENUM_ARGUMENT_REGISTERS();
+ #undef ARGUMENT_REGISTER
+
+#endif // _TARGET_AMD64_
+ }
+
+#endif // _DEBUG
+
+
+#ifdef _TARGET_AMD64_
+ // OK for the debugger to examine the new frame now
+ // (Note that if it's not OK yet for some stub, another patch label
+ // can be emitted later which will override this one.)
+ EmitPatchLabel();
+#else
+ // For x86, the patch label can be specified only after the GSCookie is pushed
+ // Otherwise the debugger will see a Frame without a valid GSCookie
+#endif
+}
+
+/*==============================================================================
+ EmitMethodStubEpilog generates the part of the stub that will pop off the
+ Frame
+
+ restoreArgRegs - indicates whether the argument registers need to be
+ restored from m_argumentRegisters
+
+ At this point of the stub:
+ pFrame is in ESI/RSI.
+ the previous pFrame is in EDI/RDI
+ The current Thread* is in EBX/RBX.
+ For x86, ESP points to the FramedMethodFrame::NegInfo
+*/
+
+VOID StubLinkerCPU::EmitMethodStubEpilog(WORD numArgBytes, int transitionBlockOffset)
+{
+ STANDARD_VM_CONTRACT;
+
+ // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
+ X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
+
+#ifdef _TARGET_X86_
+ // deallocate Frame
+ X86EmitAddEsp(sizeof(GSCookie) + transitionBlockOffset + TransitionBlock::GetOffsetOfCalleeSavedRegisters());
+
+#elif defined(_TARGET_AMD64_)
+ // lea rsp, [rsi + <offset of preserved registers>]
+ X86EmitOffsetModRM(0x8d, (X86Reg)4 /*kRSP*/, kRSI, transitionBlockOffset + TransitionBlock::GetOffsetOfCalleeSavedRegisters());
+#endif // _TARGET_AMD64_
+
+ // pop edi ; restore callee-saved registers
+ // pop esi
+ // pop ebx
+ // pop ebp
+ X86EmitPopReg(kEDI);
+ X86EmitPopReg(kESI);
+ X86EmitPopReg(kEBX);
+ X86EmitPopReg(kEBP);
+
+#ifdef _TARGET_AMD64_
+ X86EmitPopReg(kR12);
+ X86EmitPopReg(kR13);
+ X86EmitPopReg(kR14);
+ X86EmitPopReg(kR15);
+#endif
+
+#ifdef _TARGET_AMD64_
+ // Caller deallocates argument space. (Bypasses ASSERT in
+ // X86EmitReturn.)
+ numArgBytes = 0;
+#endif
+
+ X86EmitReturn(numArgBytes);
+}
+
+
+// On entry, ESI should be pointing to the Frame
+
+VOID StubLinkerCPU::EmitCheckGSCookie(X86Reg frameReg, int gsCookieOffset)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _DEBUG
+ // cmp dword ptr[frameReg-gsCookieOffset], gsCookie
+#ifdef _TARGET_X86_
+ X86EmitCmpRegIndexImm32(frameReg, gsCookieOffset, GetProcessGSCookie());
+#else
+ X64EmitCmp32RegIndexImm32(frameReg, gsCookieOffset, (INT32)GetProcessGSCookie());
+#endif
+
+ CodeLabel * pLabel = NewCodeLabel();
+ X86EmitCondJump(pLabel, X86CondCode::kJE);
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) JIT_FailFast), 0);
+
+ EmitLabel(pLabel);
+#endif
+}
+#endif // !FEATURE_STUBS_AS_IL
+
+
+// This method unboxes the THIS pointer and then calls pRealMD
+// If it's shared code for a method in a generic value class, then also extract the vtable pointer
+// and pass it as an extra argument. Thus this stub generator really covers both
+// - Unboxing, non-instantiating stubs
+// - Unboxing, method-table-instantiating stubs
+VOID StubLinkerCPU::EmitUnboxMethodStub(MethodDesc* pUnboxMD)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION(!pUnboxMD->IsStatic());
+ }
+ CONTRACTL_END;
+
+#ifdef FEATURE_STUBS_AS_IL
+ _ASSERTE(!pUnboxMD->RequiresInstMethodTableArg());
+#else
+ if (pUnboxMD->RequiresInstMethodTableArg())
+ {
+ EmitInstantiatingMethodStub(pUnboxMD, NULL);
+ return;
+ }
+#endif
+
+ //
+ // unboxing a value class simply means adding sizeof(void*) to the THIS pointer
+ //
+#ifdef _TARGET_AMD64_
+ X86EmitAddReg(THIS_kREG, sizeof(void*));
+
+ // Use direct call if possible
+ if (pUnboxMD->HasStableEntryPoint())
+ {
+ X86EmitRegLoad(kRAX, pUnboxMD->GetStableEntryPoint());// MOV RAX, DWORD
+ }
+ else
+ {
+ X86EmitRegLoad(kRAX, (UINT_PTR)pUnboxMD->GetAddrOfSlot()); // MOV RAX, DWORD
+
+ X86EmitIndexRegLoad(kRAX, kRAX); // MOV RAX, [RAX]
+ }
+
+ Emit16(X86_INSTR_JMP_EAX); // JMP EAX
+#else // _TARGET_AMD64_
+ X86EmitAddReg(THIS_kREG, sizeof(void*));
+
+ // Use direct call if possible
+ if (pUnboxMD->HasStableEntryPoint())
+ {
+ X86EmitNearJump(NewExternalCodeLabel((LPVOID) pUnboxMD->GetStableEntryPoint()));
+ }
+ else
+ {
+ // jmp [slot]
+ Emit16(0x25ff);
+ Emit32((DWORD)(size_t)pUnboxMD->GetAddrOfSlot());
+ }
+#endif //_TARGET_AMD64_
+}
+
+
+#if defined(FEATURE_SHARE_GENERIC_CODE) && !defined(FEATURE_STUBS_AS_IL)
+// The stub generated by this method passes an extra dictionary argument before jumping to
+// shared-instantiation generic code.
+//
+// pMD is either
+// * An InstantiatedMethodDesc for a generic method whose code is shared across instantiations.
+// In this case, the extra argument is the InstantiatedMethodDesc for the instantiation-specific stub itself.
+// or * A MethodDesc for a static method in a generic class whose code is shared across instantiations.
+// In this case, the extra argument is the MethodTable pointer of the instantiated type.
+// or * A MethodDesc for unboxing stub. In this case, the extra argument is null.
+VOID StubLinkerCPU::EmitInstantiatingMethodStub(MethodDesc* pMD, void* extra)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION(pMD->RequiresInstArg());
+ }
+ CONTRACTL_END;
+
+ MetaSig msig(pMD);
+ ArgIterator argit(&msig);
+
+#ifdef _TARGET_AMD64_
+ int paramTypeArgOffset = argit.GetParamTypeArgOffset();
+ int paramTypeArgIndex = TransitionBlock::GetArgumentIndexFromOffset(paramTypeArgOffset);
+
+ CorElementType argTypes[5];
+
+ int firstRealArg = paramTypeArgIndex + 1;
+ int argNum = firstRealArg;
+
+ //
+ // Compute types of the 4 register args and first stack arg
+ //
+
+ CorElementType sigType;
+ while ((sigType = msig.NextArgNormalized()) != ELEMENT_TYPE_END)
+ {
+ argTypes[argNum++] = sigType;
+ if (argNum > 4)
+ break;
+ }
+ msig.Reset();
+
+ BOOL fUseInstantiatingMethodStubWorker = FALSE;
+
+ if (argNum > 4)
+ {
+ //
+ // We will need to go through assembly helper.
+ //
+ fUseInstantiatingMethodStubWorker = TRUE;
+
+ // Allocate space for frame before pushing the arguments for the assembly helper
+ X86EmitSubEsp((INT32)(AlignUp(sizeof(void *) /* extra stack param */ + sizeof(GSCookie) + sizeof(StubHelperFrame), 16) - sizeof(void *) /* return address */));
+
+ //
+ // Store extra arg stack arg param for the helper.
+ //
+ CorElementType argType = argTypes[--argNum];
+ switch (argType)
+ {
+ case ELEMENT_TYPE_R4:
+ // movss dword ptr [rsp], xmm?
+ X64EmitMovSSToMem(kXMM3, (X86Reg)4 /*kRSP*/);
+ break;
+ case ELEMENT_TYPE_R8:
+ // movsd qword ptr [rsp], xmm?
+ X64EmitMovSDToMem(kXMM3, (X86Reg)4 /*kRSP*/);
+ break;
+ default:
+ X86EmitIndexRegStoreRSP(0, kR9);
+ break;
+ }
+ }
+
+ //
+ // Shuffle the register arguments
+ //
+ while (argNum > firstRealArg)
+ {
+ CorElementType argType = argTypes[--argNum];
+
+ switch (argType)
+ {
+ case ELEMENT_TYPE_R4:
+ case ELEMENT_TYPE_R8:
+ // mov xmm#, xmm#-1
+ X64EmitMovXmmXmm((X86Reg)argNum, (X86Reg)(argNum - 1));
+ break;
+ default:
+ //mov reg#, reg#-1
+ X86EmitMovRegReg(c_argRegs[argNum], c_argRegs[argNum-1]);
+ break;
+ }
+ }
+
+ //
+ // Setup the hidden instantiation argument
+ //
+ if (extra != NULL)
+ {
+ X86EmitRegLoad(c_argRegs[paramTypeArgIndex], (UINT_PTR)extra);
+ }
+ else
+ {
+ X86EmitIndexRegLoad(c_argRegs[paramTypeArgIndex], THIS_kREG);
+
+ X86EmitAddReg(THIS_kREG, sizeof(void*));
+ }
+
+ // Use direct call if possible
+ if (pMD->HasStableEntryPoint())
+ {
+ X86EmitRegLoad(kRAX, pMD->GetStableEntryPoint());// MOV RAX, DWORD
+ }
+ else
+ {
+ X86EmitRegLoad(kRAX, (UINT_PTR)pMD->GetAddrOfSlot()); // MOV RAX, DWORD
+
+ X86EmitIndexRegLoad(kRAX, kRAX); // MOV RAX, [RAX]
+ }
+
+ if (fUseInstantiatingMethodStubWorker)
+ {
+ X86EmitPushReg(kRAX);
+
+ UINT cbStack = argit.SizeOfArgStack();
+ _ASSERTE(cbStack > 0);
+
+ X86EmitPushImm32((AlignUp(cbStack, 16) / sizeof(void*)) - 1); // -1 for extra stack arg
+
+ X86EmitRegLoad(kRAX, GetEEFuncEntryPoint(InstantiatingMethodStubWorker));// MOV RAX, DWORD
+ }
+ else
+ {
+ _ASSERTE(argit.SizeOfArgStack() == 0);
+ }
+
+ Emit16(X86_INSTR_JMP_EAX);
+
+#else
+ int paramTypeArgOffset = argit.GetParamTypeArgOffset();
+
+ // It's on the stack
+ if (TransitionBlock::IsStackArgumentOffset(paramTypeArgOffset))
+ {
+ // Pop return address into AX
+ X86EmitPopReg(kEAX);
+
+ if (extra != NULL)
+ {
+ // Push extra dictionary argument
+ X86EmitPushImmPtr(extra);
+ }
+ else
+ {
+ // Push the vtable pointer from "this"
+ X86EmitIndexPush(THIS_kREG, 0);
+ }
+
+ // Put return address back
+ X86EmitPushReg(kEAX);
+ }
+ // It's in a register
+ else
+ {
+ X86Reg paramReg = GetX86ArgumentRegisterFromOffset(paramTypeArgOffset - TransitionBlock::GetOffsetOfArgumentRegisters());
+
+ if (extra != NULL)
+ {
+ X86EmitRegLoad(paramReg, (UINT_PTR)extra);
+ }
+ else
+ {
+ // Just extract the vtable pointer from "this"
+ X86EmitIndexRegLoad(paramReg, THIS_kREG);
+ }
+ }
+
+ if (extra == NULL)
+ {
+ // Unboxing stub case.
+ X86EmitAddReg(THIS_kREG, sizeof(void*));
+ }
+
+ // Use direct call if possible
+ if (pMD->HasStableEntryPoint())
+ {
+ X86EmitNearJump(NewExternalCodeLabel((LPVOID) pMD->GetStableEntryPoint()));
+ }
+ else
+ {
+ // jmp [slot]
+ Emit16(0x25ff);
+ Emit32((DWORD)(size_t)pMD->GetAddrOfSlot());
+ }
+#endif //
+}
+#endif // FEATURE_SHARE_GENERIC_CODE && FEATURE_STUBS_AS_IL
+
+
+#if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
+
+typedef BOOL GetModuleInformationProc(
+ HANDLE hProcess,
+ HMODULE hModule,
+ LPMODULEINFO lpmodinfo,
+ DWORD cb
+);
+
+GetModuleInformationProc *g_pfnGetModuleInformation = NULL;
+
+extern "C" VOID __cdecl DebugCheckStubUnwindInfoWorker (CONTEXT *pStubContext)
+{
+ BEGIN_ENTRYPOINT_VOIDRET;
+
+ LOG((LF_STUBS, LL_INFO1000000, "checking stub unwind info:\n"));
+
+ //
+ // Make a copy of the CONTEXT. RtlVirtualUnwind will modify this copy.
+ // DebugCheckStubUnwindInfo will need to restore registers from the
+ // original CONTEXT.
+ //
+ CONTEXT ctx = *pStubContext;
+ ctx.ContextFlags = (CONTEXT_CONTROL | CONTEXT_INTEGER);
+
+ //
+ // Find the upper bound of the stack and address range of KERNEL32. This
+ // is where we expect the unwind to stop.
+ //
+ void *pvStackTop = GetThread()->GetCachedStackBase();
+
+ if (!g_pfnGetModuleInformation)
+ {
+ HMODULE hmodPSAPI = WszGetModuleHandle(W("PSAPI.DLL"));
+
+ if (!hmodPSAPI)
+ {
+ hmodPSAPI = WszLoadLibrary(W("PSAPI.DLL"));
+ if (!hmodPSAPI)
+ {
+ _ASSERTE(!"unable to load PSAPI.DLL");
+ goto ErrExit;
+ }
+ }
+
+ g_pfnGetModuleInformation = (GetModuleInformationProc*)GetProcAddress(hmodPSAPI, "GetModuleInformation");
+ if (!g_pfnGetModuleInformation)
+ {
+ _ASSERTE(!"can't find PSAPI!GetModuleInformation");
+ goto ErrExit;
+ }
+
+ // Intentionally leak hmodPSAPI. We don't want to
+ // LoadLibrary/FreeLibrary every time, this is slow + produces lots of
+ // debugger spew. This is just debugging code after all...
+ }
+
+ HMODULE hmodKERNEL32 = WszGetModuleHandle(W("KERNEL32"));
+ _ASSERTE(hmodKERNEL32);
+
+ MODULEINFO modinfoKERNEL32;
+ if (!g_pfnGetModuleInformation(GetCurrentProcess(), hmodKERNEL32, &modinfoKERNEL32, sizeof(modinfoKERNEL32)))
+ {
+ _ASSERTE(!"unable to get bounds of KERNEL32");
+ goto ErrExit;
+ }
+
+ //
+ // Unwind until IP is 0, sp is at the stack top, and callee IP is in kernel32.
+ //
+
+ for (;;)
+ {
+ ULONG64 ControlPc = (ULONG64)GetIP(&ctx);
+
+ LOG((LF_STUBS, LL_INFO1000000, "pc %p, sp %p\n", ControlPc, GetSP(&ctx)));
+
+ ULONG64 ImageBase;
+ T_RUNTIME_FUNCTION *pFunctionEntry = RtlLookupFunctionEntry(
+ ControlPc,
+ &ImageBase,
+ NULL);
+ if (pFunctionEntry)
+ {
+ PVOID HandlerData;
+ ULONG64 EstablisherFrame;
+
+ RtlVirtualUnwind(
+ 0,
+ ImageBase,
+ ControlPc,
+ pFunctionEntry,
+ &ctx,
+ &HandlerData,
+ &EstablisherFrame,
+ NULL);
+
+ ULONG64 NewControlPc = (ULONG64)GetIP(&ctx);
+
+ LOG((LF_STUBS, LL_INFO1000000, "function %p, image %p, new pc %p, new sp %p\n", pFunctionEntry, ImageBase, NewControlPc, GetSP(&ctx)));
+
+ if (!NewControlPc)
+ {
+ if (dac_cast<PTR_BYTE>(GetSP(&ctx)) < (BYTE*)pvStackTop - 0x100)
+ {
+ _ASSERTE(!"SP did not end up at top of stack");
+ goto ErrExit;
+ }
+
+ if (!( ControlPc > (ULONG64)modinfoKERNEL32.lpBaseOfDll
+ && ControlPc < (ULONG64)modinfoKERNEL32.lpBaseOfDll + modinfoKERNEL32.SizeOfImage))
+ {
+ _ASSERTE(!"PC did not end up in KERNEL32");
+ goto ErrExit;
+ }
+
+ break;
+ }
+ }
+ else
+ {
+ // Nested functions that do not use any stack space or nonvolatile
+ // registers are not required to have unwind info (ex.
+ // USER32!ZwUserCreateWindowEx).
+ ctx.Rip = *(ULONG64*)(ctx.Rsp);
+ ctx.Rsp += sizeof(ULONG64);
+ }
+ }
+ErrExit:
+
+ END_ENTRYPOINT_VOIDRET;
+ return;
+}
+
+//virtual
+VOID StubLinkerCPU::EmitUnwindInfoCheckWorker (CodeLabel *pCheckLabel)
+{
+ STANDARD_VM_CONTRACT;
+ X86EmitCall(pCheckLabel, 0);
+}
+
+//virtual
+VOID StubLinkerCPU::EmitUnwindInfoCheckSubfunction()
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ // X86EmitCall will generate "mov rax, target/jmp rax", so we have to save
+ // rax on the stack. DO NOT use X86EmitPushReg. That will induce infinite
+ // recursion, since the push may require more unwind info. This "push rax"
+ // will be accounted for by DebugCheckStubUnwindInfo's unwind info
+ // (considered part of its locals), so there doesn't have to be unwind
+ // info for it.
+ Emit8(0x50);
+#endif
+
+ X86EmitNearJump(NewExternalCodeLabel(DebugCheckStubUnwindInfo));
+}
+
+#endif // defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
+
+
+#ifdef _TARGET_X86_
+
+//-----------------------------------------------------------------------
+// Generates the inline portion of the code to enable preemptive GC. Hopefully,
+// the inline code is all that will execute most of the time. If this code
+// path is entered at certain times, however, it will need to jump out to
+// a separate out-of-line path which is more expensive. The "pForwardRef"
+// label indicates the start of the out-of-line path.
+//
+// Assumptions:
+// ebx = Thread
+// Preserves
+// all registers except ecx.
+//
+//-----------------------------------------------------------------------
+VOID StubLinkerCPU::EmitEnable(CodeLabel *pForwardRef)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ PRECONDITION(4 == sizeof( ((Thread*)0)->m_State ));
+ PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled ));
+ }
+ CONTRACTL_END;
+
+ // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],0
+ X86EmitOffsetModRM(0xc6, (X86Reg)0, kEBX, Thread::GetOffsetOfGCFlag());
+ Emit8(0);
+
+ _ASSERTE(FitsInI1(Thread::TS_CatchAtSafePoint));
+
+ // test byte ptr [ebx + Thread.m_State], TS_CatchAtSafePoint
+ X86EmitOffsetModRM(0xf6, (X86Reg)0, kEBX, Thread::GetOffsetOfState());
+ Emit8(Thread::TS_CatchAtSafePoint);
+
+ // jnz RarePath
+ X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
+
+#ifdef _DEBUG
+ X86EmitDebugTrashReg(kECX);
+#endif
+
+}
+
+
+//-----------------------------------------------------------------------
+// Generates the out-of-line portion of the code to enable preemptive GC.
+// After the work is done, the code jumps back to the "pRejoinPoint"
+// which should be emitted right after the inline part is generated.
+//
+// Assumptions:
+// ebx = Thread
+// Preserves
+// all registers except ecx.
+//
+//-----------------------------------------------------------------------
+VOID StubLinkerCPU::EmitRareEnable(CodeLabel *pRejoinPoint)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareEnable), 0);
+#ifdef _DEBUG
+ X86EmitDebugTrashReg(kECX);
+#endif
+ if (pRejoinPoint)
+ {
+ X86EmitNearJump(pRejoinPoint);
+ }
+
+}
+
+
+//-----------------------------------------------------------------------
+// Generates the inline portion of the code to disable preemptive GC. Hopefully,
+// the inline code is all that will execute most of the time. If this code
+// path is entered at certain times, however, it will need to jump out to
+// a separate out-of-line path which is more expensive. The "pForwardRef"
+// label indicates the start of the out-of-line path.
+//
+// Assumptions:
+// ebx = Thread
+// Preserves
+// all registers except ecx.
+//
+//-----------------------------------------------------------------------
+VOID StubLinkerCPU::EmitDisable(CodeLabel *pForwardRef, BOOL fCallIn, X86Reg ThreadReg)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled ));
+ PRECONDITION(4 == sizeof(g_TrapReturningThreads));
+ }
+ CONTRACTL_END;
+
+#if defined(FEATURE_COMINTEROP) && defined(MDA_SUPPORTED)
+ // If we are checking whether the current thread is already holds the loader lock, vector
+ // such cases to the rare disable pathway, where we can check again.
+ if (fCallIn && (NULL != MDA_GET_ASSISTANT(Reentrancy)))
+ {
+ CodeLabel *pNotReentrantLabel = NewCodeLabel();
+
+ // test byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],1
+ X86EmitOffsetModRM(0xf6, (X86Reg)0, ThreadReg, Thread::GetOffsetOfGCFlag());
+ Emit8(1);
+
+ // jz NotReentrant
+ X86EmitCondJump(pNotReentrantLabel, X86CondCode::kJZ);
+
+ X86EmitPushReg(kEAX);
+ X86EmitPushReg(kEDX);
+ X86EmitPushReg(kECX);
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) HasIllegalReentrancy), 0);
+
+ // If the probe fires, we go ahead and allow the call anyway. At this point, there could be
+ // GC heap corruptions. So the probe detects the illegal case, but doesn't prevent it.
+
+ X86EmitPopReg(kECX);
+ X86EmitPopReg(kEDX);
+ X86EmitPopReg(kEAX);
+
+ EmitLabel(pNotReentrantLabel);
+ }
+#endif
+
+ // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],1
+ X86EmitOffsetModRM(0xc6, (X86Reg)0, ThreadReg, Thread::GetOffsetOfGCFlag());
+ Emit8(1);
+
+ // cmp dword ptr g_TrapReturningThreads, 0
+ Emit16(0x3d83);
+ EmitPtr((void *)&g_TrapReturningThreads);
+ Emit8(0);
+
+ // jnz RarePath
+ X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
+
+#if defined(FEATURE_COMINTEROP) && !defined(FEATURE_CORESYSTEM)
+ // If we are checking whether the current thread holds the loader lock, vector
+ // such cases to the rare disable pathway, where we can check again.
+ if (fCallIn && ShouldCheckLoaderLock())
+ {
+ X86EmitPushReg(kEAX);
+ X86EmitPushReg(kEDX);
+
+ if (ThreadReg == kECX)
+ X86EmitPushReg(kECX);
+
+ // BOOL AuxUlibIsDLLSynchronizationHeld(BOOL *IsHeld)
+ //
+ // So we need to be sure that both the return value and the passed BOOL are both TRUE.
+ // If either is FALSE, then the call failed or the lock is not held. Either way, the
+ // probe should not fire.
+
+ X86EmitPushReg(kEDX); // BOOL temp
+ Emit8(0x54); // push ESP because arg is &temp
+ X86EmitCall(NewExternalCodeLabel((LPVOID) AuxUlibIsDLLSynchronizationHeld), 0);
+
+ // callee has popped.
+ X86EmitPopReg(kEDX); // recover temp
+
+ CodeLabel *pPopLabel = NewCodeLabel();
+
+ Emit16(0xc085); // test eax, eax
+ X86EmitCondJump(pPopLabel, X86CondCode::kJZ);
+
+ Emit16(0xd285); // test edx, edx
+
+ EmitLabel(pPopLabel); // retain the conditional flags across the pops
+
+ if (ThreadReg == kECX)
+ X86EmitPopReg(kECX);
+
+ X86EmitPopReg(kEDX);
+ X86EmitPopReg(kEAX);
+
+ X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
+ }
+#endif
+
+#ifdef _DEBUG
+ if (ThreadReg != kECX)
+ X86EmitDebugTrashReg(kECX);
+#endif
+
+}
+
+
+//-----------------------------------------------------------------------
+// Generates the out-of-line portion of the code to disable preemptive GC.
+// After the work is done, the code jumps back to the "pRejoinPoint"
+// which should be emitted right after the inline part is generated. However,
+// if we cannot execute managed code at this time, an exception is thrown
+// which cannot be caught by managed code.
+//
+// Assumptions:
+// ebx = Thread
+// Preserves
+// all registers except ecx, eax.
+//
+//-----------------------------------------------------------------------
+VOID StubLinkerCPU::EmitRareDisable(CodeLabel *pRejoinPoint)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareDisableTHROW), 0);
+
+#ifdef _DEBUG
+ X86EmitDebugTrashReg(kECX);
+#endif
+ X86EmitNearJump(pRejoinPoint);
+}
+
+#ifdef FEATURE_COMINTEROP
+//-----------------------------------------------------------------------
+// Generates the out-of-line portion of the code to disable preemptive GC.
+// After the work is done, the code normally jumps back to the "pRejoinPoint"
+// which should be emitted right after the inline part is generated. However,
+// if we cannot execute managed code at this time, an HRESULT is returned
+// via the ExitPoint.
+//
+// Assumptions:
+// ebx = Thread
+// Preserves
+// all registers except ecx, eax.
+//
+//-----------------------------------------------------------------------
+VOID StubLinkerCPU::EmitRareDisableHRESULT(CodeLabel *pRejoinPoint, CodeLabel *pExitPoint)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareDisableHR), 0);
+
+#ifdef _DEBUG
+ X86EmitDebugTrashReg(kECX);
+#endif
+
+ // test eax, eax ;; test the result of StubRareDisableHR
+ Emit16(0xc085);
+
+ // JZ pRejoinPoint
+ X86EmitCondJump(pRejoinPoint, X86CondCode::kJZ);
+
+ X86EmitNearJump(pExitPoint);
+}
+#endif // FEATURE_COMINTEROP
+
+#endif // _TARGET_X86_
+
+#endif // CROSSGEN_COMPILE
+
+
+VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+
+ // mov SCRATCHREG,rsp
+ X86_64BitOperands();
+ Emit8(0x8b);
+ Emit8(0304 | (SCRATCH_REGISTER_X86REG << 3));
+
+ // save the real target in r11, will jump to it later. r10 is used below.
+ // Windows: mov r11, rcx
+ // Unix: mov r11, rdi
+ X86EmitMovRegReg(kR11, THIS_kREG);
+
+#ifdef UNIX_AMD64_ABI
+ for (ShuffleEntry* pEntry = pShuffleEntryArray; pEntry->srcofs != ShuffleEntry::SENTINEL; pEntry++)
+ {
+ if (pEntry->srcofs & ShuffleEntry::REGMASK)
+ {
+ // If source is present in register then destination must also be a register
+ _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK);
+ // Both the srcofs and dstofs must be of the same kind of registers - float or general purpose.
+ _ASSERTE((pEntry->dstofs & ShuffleEntry::FPREGMASK) == (pEntry->srcofs & ShuffleEntry::FPREGMASK));
+
+ int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK;
+ int srcRegIndex = pEntry->srcofs & ShuffleEntry::OFSREGMASK;
+
+ if (pEntry->srcofs & ShuffleEntry::FPREGMASK)
+ {
+ // movdqa dstReg, srcReg
+ X64EmitMovXmmXmm((X86Reg)(kXMM0 + dstRegIndex), (X86Reg)(kXMM0 + srcRegIndex));
+ }
+ else
+ {
+ // mov dstReg, srcReg
+ X86EmitMovRegReg(c_argRegs[dstRegIndex], c_argRegs[srcRegIndex]);
+ }
+ }
+ else if (pEntry->dstofs & ShuffleEntry::REGMASK)
+ {
+ // source must be on the stack
+ _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK));
+
+ int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK;
+ int srcOffset = (pEntry->srcofs + 1) * sizeof(void*);
+
+ if (pEntry->dstofs & ShuffleEntry::FPREGMASK)
+ {
+ if (pEntry->dstofs & ShuffleEntry::FPSINGLEMASK)
+ {
+ // movss dstReg, [rax + src]
+ X64EmitMovSSFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset);
+ }
+ else
+ {
+ // movsd dstReg, [rax + src]
+ X64EmitMovSDFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset);
+ }
+ }
+ else
+ {
+ // mov dstreg, [rax + src]
+ X86EmitIndexRegLoad(c_argRegs[dstRegIndex], SCRATCH_REGISTER_X86REG, srcOffset);
+ }
+ }
+ else
+ {
+ // source must be on the stack
+ _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK));
+
+ // dest must be on the stack
+ _ASSERTE(!(pEntry->dstofs & ShuffleEntry::REGMASK));
+
+ // mov r10, [rax + src]
+ X86EmitIndexRegLoad (kR10, SCRATCH_REGISTER_X86REG, (pEntry->srcofs + 1) * sizeof(void*));
+
+ // mov [rax + dst], r10
+ X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, (pEntry->dstofs + 1) * sizeof(void*), kR10);
+ }
+ }
+#else // UNIX_AMD64_ABI
+ UINT step = 1;
+
+ if (pShuffleEntryArray->argtype == ELEMENT_TYPE_END)
+ {
+ // Special handling of open instance methods with return buffer. Move "this"
+ // by two slots, and leave the "retbufptr" between the two slots intact.
+
+ // mov rcx, r8
+ X86EmitMovRegReg(kRCX, kR8);
+
+ // Skip this entry
+ pShuffleEntryArray++;
+
+ // Skip this entry and leave retbufptr intact
+ step += 2;
+ }
+
+ // Now shuffle the args by one position:
+ // steps 1-3 : reg args (rcx, rdx, r8)
+ // step 4 : stack->reg arg (r9)
+ // step >4 : stack args
+
+ for(;
+ pShuffleEntryArray->srcofs != ShuffleEntry::SENTINEL;
+ step++, pShuffleEntryArray++)
+ {
+ switch (step)
+ {
+ case 1:
+ case 2:
+ case 3:
+ switch (pShuffleEntryArray->argtype)
+ {
+ case ELEMENT_TYPE_R4:
+ case ELEMENT_TYPE_R8:
+ // mov xmm-1#, xmm#
+ X64EmitMovXmmXmm((X86Reg)(step - 1), (X86Reg)(step));
+ break;
+ default:
+ // mov argRegs[step-1], argRegs[step]
+ X86EmitMovRegReg(c_argRegs[step-1], c_argRegs[step]);
+ break;
+ }
+ break;
+
+ case 4:
+ {
+ switch (pShuffleEntryArray->argtype)
+ {
+ case ELEMENT_TYPE_R4:
+ X64EmitMovSSFromMem(kXMM3, kRAX, 0x28);
+ break;
+
+ case ELEMENT_TYPE_R8:
+ X64EmitMovSDFromMem(kXMM3, kRAX, 0x28);
+ break;
+
+ default:
+ // mov r9, [rax + 28h]
+ X86EmitIndexRegLoad (kR9, SCRATCH_REGISTER_X86REG, 5*sizeof(void*));
+ }
+ break;
+ }
+ default:
+
+ // mov r10, [rax + (step+1)*sizeof(void*)]
+ X86EmitIndexRegLoad (kR10, SCRATCH_REGISTER_X86REG, (step+1)*sizeof(void*));
+
+ // mov [rax + step*sizeof(void*)], r10
+ X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, step*sizeof(void*), kR10);
+ }
+ }
+#endif // UNIX_AMD64_ABI
+
+ // mov r10, [r11 + Delegate._methodptraux]
+ X86EmitIndexRegLoad(kR10, kR11, DelegateObject::GetOffsetOfMethodPtrAux());
+ // add r11, DelegateObject::GetOffsetOfMethodPtrAux() - load the indirection cell into r11
+ X86EmitAddReg(kR11, DelegateObject::GetOffsetOfMethodPtrAux());
+ // Now jump to real target
+ // jmp r10
+ X86EmitR2ROp(0xff, (X86Reg)4, kR10);
+
+#else // _TARGET_AMD64_
+
+ UINT espadjust = 0;
+ BOOL haveMemMemMove = FALSE;
+
+ ShuffleEntry *pWalk = NULL;
+ for (pWalk = pShuffleEntryArray; pWalk->srcofs != ShuffleEntry::SENTINEL; pWalk++)
+ {
+ if (!(pWalk->dstofs & ShuffleEntry::REGMASK) &&
+ !(pWalk->srcofs & ShuffleEntry::REGMASK) &&
+ pWalk->srcofs != pWalk->dstofs)
+ {
+ haveMemMemMove = TRUE;
+ espadjust = sizeof(void*);
+ break;
+ }
+ }
+
+ if (haveMemMemMove)
+ {
+ // push ecx
+ X86EmitPushReg(THIS_kREG);
+ }
+ else
+ {
+ // mov eax, ecx
+ Emit8(0x8b);
+ Emit8(0300 | SCRATCH_REGISTER_X86REG << 3 | THIS_kREG);
+ }
+
+ UINT16 emptySpot = 0x4 | ShuffleEntry::REGMASK;
+
+ while (true)
+ {
+ for (pWalk = pShuffleEntryArray; pWalk->srcofs != ShuffleEntry::SENTINEL; pWalk++)
+ if (pWalk->dstofs == emptySpot)
+ break;
+
+ if (pWalk->srcofs == ShuffleEntry::SENTINEL)
+ break;
+
+ if ((pWalk->dstofs & ShuffleEntry::REGMASK))
+ {
+ if (pWalk->srcofs & ShuffleEntry::REGMASK)
+ {
+ // mov <dstReg>,<srcReg>
+ Emit8(0x8b);
+ Emit8(static_cast<UINT8>(0300 |
+ (GetX86ArgumentRegisterFromOffset( pWalk->dstofs & ShuffleEntry::OFSMASK ) << 3) |
+ (GetX86ArgumentRegisterFromOffset( pWalk->srcofs & ShuffleEntry::OFSMASK ))));
+ }
+ else
+ {
+ X86EmitEspOffset(0x8b, GetX86ArgumentRegisterFromOffset( pWalk->dstofs & ShuffleEntry::OFSMASK ), pWalk->srcofs+espadjust);
+ }
+ }
+ else
+ {
+ // if the destination is not a register, the source shouldn't be either.
+ _ASSERTE(!(pWalk->srcofs & ShuffleEntry::REGMASK));
+ if (pWalk->srcofs != pWalk->dstofs)
+ {
+ X86EmitEspOffset(0x8b, kEAX, pWalk->srcofs+espadjust);
+ X86EmitEspOffset(0x89, kEAX, pWalk->dstofs+espadjust);
+ }
+ }
+ emptySpot = pWalk->srcofs;
+ }
+
+ // Capture the stacksizedelta while we're at the end of the list.
+ _ASSERTE(pWalk->srcofs == ShuffleEntry::SENTINEL);
+
+ if (haveMemMemMove)
+ X86EmitPopReg(SCRATCH_REGISTER_X86REG);
+
+ if (pWalk->stacksizedelta)
+ X86EmitAddEsp(pWalk->stacksizedelta);
+
+ // Now jump to real target
+ // JMP [SCRATCHREG]
+ // we need to jump indirect so that for virtual delegates eax contains a pointer to the indirection cell
+ X86EmitAddReg(SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtrAux());
+ static const BYTE bjmpeax[] = { 0xff, 0x20 };
+ EmitBytes(bjmpeax, sizeof(bjmpeax));
+
+#endif // _TARGET_AMD64_
+}
+
+
+#if !defined(CROSSGEN_COMPILE) && !defined(FEATURE_STUBS_AS_IL)
+
+//===========================================================================
+// Computes hash code for MulticastDelegate.Invoke()
+UINT_PTR StubLinkerCPU::HashMulticastInvoke(MetaSig* pSig)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ ArgIterator argit(pSig);
+
+ UINT numStackBytes = argit.SizeOfArgStack();
+
+ if (numStackBytes > 0x7FFF)
+ COMPlusThrow(kNotSupportedException, W("NotSupported_TooManyArgs"));
+
+#ifdef _TARGET_AMD64_
+ // Generate a hash key as follows:
+ // UINT Arg0Type:2; // R4 (1), R8 (2), other (3)
+ // UINT Arg1Type:2; // R4 (1), R8 (2), other (3)
+ // UINT Arg2Type:2; // R4 (1), R8 (2), other (3)
+ // UINT Arg3Type:2; // R4 (1), R8 (2), other (3)
+ // UINT NumArgs:24; // number of arguments
+ // (This should cover all the prestub variations)
+
+ _ASSERTE(!(numStackBytes & 7));
+ UINT hash = (numStackBytes / sizeof(void*)) << 8;
+
+ UINT argNum = 0;
+
+ // NextArg() doesn't take into account the "this" pointer.
+ // That's why we have to special case it here.
+ if (argit.HasThis())
+ {
+ hash |= 3 << (2*argNum);
+ argNum++;
+ }
+
+ if (argit.HasRetBuffArg())
+ {
+ hash |= 3 << (2*argNum);
+ argNum++;
+ }
+
+ for (; argNum < 4; argNum++)
+ {
+ switch (pSig->NextArgNormalized())
+ {
+ case ELEMENT_TYPE_END:
+ argNum = 4;
+ break;
+ case ELEMENT_TYPE_R4:
+ hash |= 1 << (2*argNum);
+ break;
+ case ELEMENT_TYPE_R8:
+ hash |= 2 << (2*argNum);
+ break;
+ default:
+ hash |= 3 << (2*argNum);
+ break;
+ }
+ }
+
+#else // _TARGET_AMD64_
+
+ // check if the function is returning a float, in which case the stub has to take
+ // care of popping the floating point stack except for the last invocation
+
+ _ASSERTE(!(numStackBytes & 3));
+
+ UINT hash = numStackBytes;
+
+ if (CorTypeInfo::IsFloat(pSig->GetReturnType()))
+ {
+ hash |= 2;
+ }
+#endif // _TARGET_AMD64_
+
+ return hash;
+}
+
+#ifdef _TARGET_X86_
+//===========================================================================
+// Emits code for MulticastDelegate.Invoke()
+VOID StubLinkerCPU::EmitDelegateInvoke()
+{
+ STANDARD_VM_CONTRACT;
+
+ CodeLabel *pNullLabel = NewCodeLabel();
+
+ // test THISREG, THISREG
+ X86EmitR2ROp(0x85, THIS_kREG, THIS_kREG);
+
+ // jz null
+ X86EmitCondJump(pNullLabel, X86CondCode::kJZ);
+
+ // mov SCRATCHREG, [THISREG + Delegate.FP] ; Save target stub in register
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfMethodPtr());
+
+ // mov THISREG, [THISREG + Delegate.OR] ; replace "this" pointer
+ X86EmitIndexRegLoad(THIS_kREG, THIS_kREG, DelegateObject::GetOffsetOfTarget());
+
+ // jmp SCRATCHREG
+ Emit16(0xe0ff | (SCRATCH_REGISTER_X86REG<<8));
+
+ // Do a null throw
+ EmitLabel(pNullLabel);
+
+ // mov ECX, CORINFO_NullReferenceException
+ Emit8(0xb8+kECX);
+ Emit32(CORINFO_NullReferenceException);
+
+ X86EmitCall(NewExternalCodeLabel(GetEEFuncEntryPoint(JIT_InternalThrowFromHelper)), 0);
+
+ X86EmitReturn(0);
+}
+#endif // _TARGET_X86_
+
+VOID StubLinkerCPU::EmitMulticastInvoke(UINT_PTR hash)
+{
+ STANDARD_VM_CONTRACT;
+
+ int thisRegOffset = MulticastFrame::GetOffsetOfTransitionBlock() +
+ TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, THIS_REG);
+
+ // push the methoddesc on the stack
+ // mov eax, [ecx + offsetof(_methodAuxPtr)]
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfMethodPtrAux());
+
+ // Push a MulticastFrame on the stack.
+ EmitMethodStubProlog(MulticastFrame::GetMethodFrameVPtr(), MulticastFrame::GetOffsetOfTransitionBlock());
+
+#ifdef _TARGET_X86_
+ // Frame is ready to be inspected by debugger for patch location
+ EmitPatchLabel();
+#else // _TARGET_AMD64_
+
+ // Save register arguments in their home locations.
+ // Non-FP registers are already saved by EmitMethodStubProlog.
+ // (Assumes Sig.NextArg() does not enum RetBuffArg or "this".)
+
+ int argNum = 0;
+ __int32 argOfs = MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ CorElementType argTypes[4];
+ CorElementType argType;
+
+ // 'this'
+ argOfs += sizeof(void*);
+ argTypes[argNum] = ELEMENT_TYPE_I8;
+ argNum++;
+
+ do
+ {
+ argType = ELEMENT_TYPE_END;
+
+ switch ((hash >> (2 * argNum)) & 3)
+ {
+ case 0:
+ argType = ELEMENT_TYPE_END;
+ break;
+ case 1:
+ argType = ELEMENT_TYPE_R4;
+
+ // movss dword ptr [rsp + argOfs], xmm?
+ X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ case 2:
+ argType = ELEMENT_TYPE_R8;
+
+ // movsd qword ptr [rsp + argOfs], xmm?
+ X64EmitMovSDToMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ default:
+ argType = ELEMENT_TYPE_I;
+ break;
+ }
+
+ argOfs += sizeof(void*);
+ argTypes[argNum] = argType;
+ argNum++;
+ }
+ while (argNum < 4 && ELEMENT_TYPE_END != argType);
+
+ _ASSERTE(4 == argNum || ELEMENT_TYPE_END == argTypes[argNum-1]);
+
+#endif // _TARGET_AMD64_
+
+ // TODO: on AMD64, pick different regs for locals so don't need the pushes
+
+ // push edi ;; Save EDI (want to use it as loop index)
+ X86EmitPushReg(kEDI);
+
+ // xor edi,edi ;; Loop counter: EDI=0,1,2...
+ X86EmitZeroOutReg(kEDI);
+
+ CodeLabel *pLoopLabel = NewCodeLabel();
+ CodeLabel *pEndLoopLabel = NewCodeLabel();
+
+ EmitLabel(pLoopLabel);
+
+ // Entry:
+ // EDI == iteration counter
+
+ // mov ecx, [esi + this] ;; get delegate
+ X86EmitIndexRegLoad(THIS_kREG, kESI, thisRegOffset);
+
+ // cmp edi,[ecx]._invocationCount
+ X86EmitOp(0x3b, kEDI, THIS_kREG, DelegateObject::GetOffsetOfInvocationCount());
+
+ // je ENDLOOP
+ X86EmitCondJump(pEndLoopLabel, X86CondCode::kJZ);
+
+#ifdef _TARGET_AMD64_
+
+ INT32 numStackBytes = (INT32)((hash >> 8) * sizeof(void *));
+
+ INT32 stackUsed, numStackArgs, ofs;
+
+ // Push any stack args, plus an extra location
+ // for rsp alignment if needed
+
+ numStackArgs = numStackBytes / sizeof(void*);
+
+ // 1 push above, so stack is currently misaligned
+ const unsigned STACK_ALIGN_ADJUST = 8;
+
+ if (!numStackArgs)
+ {
+ // sub rsp, 28h ;; 4 reg arg home locs + rsp alignment
+ stackUsed = 0x20 + STACK_ALIGN_ADJUST;
+ X86EmitSubEsp(stackUsed);
+ }
+ else
+ {
+ stackUsed = numStackArgs * sizeof(void*);
+
+ // If the stack is misaligned, then an odd number of arguments
+ // will naturally align the stack.
+ if ( ((numStackArgs & 1) == 0)
+ != (STACK_ALIGN_ADJUST == 0))
+ {
+ X86EmitPushReg(kRAX);
+ stackUsed += sizeof(void*);
+ }
+
+ ofs = MulticastFrame::GetOffsetOfTransitionBlock() +
+ TransitionBlock::GetOffsetOfArgs() + sizeof(ArgumentRegisters) + numStackBytes;
+
+ while (numStackArgs--)
+ {
+ ofs -= sizeof(void*);
+
+ // push [rsi + ofs] ;; Push stack args
+ X86EmitIndexPush(kESI, ofs);
+ }
+
+ // sub rsp, 20h ;; Create 4 reg arg home locations
+ X86EmitSubEsp(0x20);
+
+ stackUsed += 0x20;
+ }
+
+ for(
+ argNum = 0, argOfs = MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ argNum < 4 && argTypes[argNum] != ELEMENT_TYPE_END;
+ argNum++, argOfs += sizeof(void*)
+ )
+ {
+ switch (argTypes[argNum])
+ {
+ case ELEMENT_TYPE_R4:
+ // movss xmm?, dword ptr [rsi + argOfs]
+ X64EmitMovSSFromMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ case ELEMENT_TYPE_R8:
+ // movsd xmm?, qword ptr [rsi + argOfs]
+ X64EmitMovSDFromMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ default:
+ if (c_argRegs[argNum] != THIS_kREG)
+ {
+ // mov r*, [rsi + dstOfs]
+ X86EmitIndexRegLoad(c_argRegs[argNum], kESI,argOfs);
+ }
+ break;
+ } // switch
+ }
+
+ // mov SCRATCHREG, [rcx+Delegate._invocationList] ;;fetch invocation list
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
+
+ // mov SCRATCHREG, [SCRATCHREG+m_Array+rdi*8] ;; index into invocation list
+ X86EmitOp(0x8b, kEAX, SCRATCH_REGISTER_X86REG, static_cast<int>(PtrArray::GetDataOffset()), kEDI, sizeof(void*), k64BitOp);
+
+ // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
+ X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
+
+ // call [SCRATCHREG+Delegate.target] ;; call current subscriber
+ X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
+
+ // add rsp, stackUsed ;; Clean up stack
+ X86EmitAddEsp(stackUsed);
+
+ // inc edi
+ Emit16(0xC7FF);
+
+#else // _TARGET_AMD64_
+
+ UINT16 numStackBytes = static_cast<UINT16>(hash & ~3);
+
+ // ..repush & reenregister args..
+ INT32 ofs = numStackBytes + MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ while (ofs != MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs())
+ {
+ ofs -= sizeof(void*);
+ X86EmitIndexPush(kESI, ofs);
+ }
+
+ #define ARGUMENT_REGISTER(regname) if (k##regname != THIS_kREG) { X86EmitIndexRegLoad(k##regname, kESI, \
+ offsetof(ArgumentRegisters, regname) + MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters()); }
+
+ ENUM_ARGUMENT_REGISTERS_BACKWARD();
+
+ #undef ARGUMENT_REGISTER
+
+ // mov SCRATCHREG, [ecx+Delegate._invocationList] ;;fetch invocation list
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
+
+ // mov SCRATCHREG, [SCRATCHREG+m_Array+edi*4] ;; index into invocation list
+ X86EmitOp(0x8b, kEAX, SCRATCH_REGISTER_X86REG, PtrArray::GetDataOffset(), kEDI, sizeof(void*));
+
+ // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
+ X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
+
+ // call [SCRATCHREG+Delegate.target] ;; call current subscriber
+ X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
+ INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
+ // we know that this is a call that can directly call
+ // managed code
+
+ // inc edi
+ Emit8(0x47);
+
+ if (hash & 2) // CorTypeInfo::IsFloat(pSig->GetReturnType())
+ {
+ // if the return value is a float/double check if we just did the last call - if not,
+ // emit the pop of the float stack
+
+ // mov SCRATCHREG, [esi + this] ;; get delegate
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, kESI, thisRegOffset);
+
+ // cmp edi,[SCRATCHREG]._invocationCount
+ X86EmitOffsetModRM(0x3b, kEDI, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfInvocationCount());
+
+ CodeLabel *pNoFloatStackPopLabel = NewCodeLabel();
+
+ // je NOFLOATSTACKPOP
+ X86EmitCondJump(pNoFloatStackPopLabel, X86CondCode::kJZ);
+
+ // fstp 0
+ Emit16(0xd8dd);
+
+ // NoFloatStackPopLabel:
+ EmitLabel(pNoFloatStackPopLabel);
+ }
+
+#endif // _TARGET_AMD64_
+
+ // The debugger may need to stop here, so grab the offset of this code.
+ EmitPatchLabel();
+
+ // jmp LOOP
+ X86EmitNearJump(pLoopLabel);
+
+ //ENDLOOP:
+ EmitLabel(pEndLoopLabel);
+
+ // pop edi ;; Restore edi
+ X86EmitPopReg(kEDI);
+
+ EmitCheckGSCookie(kESI, MulticastFrame::GetOffsetOfGSCookie());
+
+ // Epilog
+ EmitMethodStubEpilog(numStackBytes, MulticastFrame::GetOffsetOfTransitionBlock());
+}
+
+VOID StubLinkerCPU::EmitSecureDelegateInvoke(UINT_PTR hash)
+{
+ STANDARD_VM_CONTRACT;
+
+ int thisRegOffset = SecureDelegateFrame::GetOffsetOfTransitionBlock() +
+ TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, THIS_REG);
+
+ // push the methoddesc on the stack
+ // mov eax, [ecx + offsetof(_invocationCount)]
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationCount());
+
+ // Push a SecureDelegateFrame on the stack.
+ EmitMethodStubProlog(SecureDelegateFrame::GetMethodFrameVPtr(), SecureDelegateFrame::GetOffsetOfTransitionBlock());
+
+#ifdef _TARGET_X86_
+ // Frame is ready to be inspected by debugger for patch location
+ EmitPatchLabel();
+#else // _TARGET_AMD64_
+
+ // Save register arguments in their home locations.
+ // Non-FP registers are already saved by EmitMethodStubProlog.
+ // (Assumes Sig.NextArg() does not enum RetBuffArg or "this".)
+
+ int argNum = 0;
+ __int32 argOfs = SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ CorElementType argTypes[4];
+ CorElementType argType;
+
+ // 'this'
+ argOfs += sizeof(void*);
+ argTypes[argNum] = ELEMENT_TYPE_I8;
+ argNum++;
+
+ do
+ {
+ argType = ELEMENT_TYPE_END;
+
+ switch ((hash >> (2 * argNum)) & 3)
+ {
+ case 0:
+ argType = ELEMENT_TYPE_END;
+ break;
+ case 1:
+ argType = ELEMENT_TYPE_R4;
+
+ // movss dword ptr [rsp + argOfs], xmm?
+ X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ case 2:
+ argType = ELEMENT_TYPE_R8;
+
+ // movsd qword ptr [rsp + argOfs], xmm?
+ X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ default:
+ argType = ELEMENT_TYPE_I;
+ break;
+ }
+
+ argOfs += sizeof(void*);
+ argTypes[argNum] = argType;
+ argNum++;
+ }
+ while (argNum < 4 && ELEMENT_TYPE_END != argType);
+
+ _ASSERTE(4 == argNum || ELEMENT_TYPE_END == argTypes[argNum-1]);
+
+#endif // _TARGET_AMD64_
+
+ // mov ecx, [esi + this] ;; get delegate
+ X86EmitIndexRegLoad(THIS_kREG, kESI, thisRegOffset);
+
+#ifdef _TARGET_AMD64_
+
+ INT32 numStackBytes = (INT32)((hash >> 8) * sizeof(void *));
+
+ INT32 stackUsed, numStackArgs, ofs;
+
+ // Push any stack args, plus an extra location
+ // for rsp alignment if needed
+
+ numStackArgs = numStackBytes / sizeof(void*);
+
+ // 1 push above, so stack is currently misaligned
+ const unsigned STACK_ALIGN_ADJUST = 0;
+
+ if (!numStackArgs)
+ {
+ // sub rsp, 28h ;; 4 reg arg home locs + rsp alignment
+ stackUsed = 0x20 + STACK_ALIGN_ADJUST;
+ X86EmitSubEsp(stackUsed);
+ }
+ else
+ {
+ stackUsed = numStackArgs * sizeof(void*);
+
+ // If the stack is misaligned, then an odd number of arguments
+ // will naturally align the stack.
+ if ( ((numStackArgs & 1) == 0)
+ != (STACK_ALIGN_ADJUST == 0))
+ {
+ X86EmitPushReg(kRAX);
+ stackUsed += sizeof(void*);
+ }
+
+ ofs = SecureDelegateFrame::GetOffsetOfTransitionBlock() +
+ TransitionBlock::GetOffsetOfArgs() + sizeof(ArgumentRegisters) + numStackBytes;
+
+ while (numStackArgs--)
+ {
+ ofs -= sizeof(void*);
+
+ // push [rsi + ofs] ;; Push stack args
+ X86EmitIndexPush(kESI, ofs);
+ }
+
+ // sub rsp, 20h ;; Create 4 reg arg home locations
+ X86EmitSubEsp(0x20);
+
+ stackUsed += 0x20;
+ }
+
+ int thisArgNum = 0;
+
+ for(
+ argNum = 0, argOfs = SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ argNum < 4 && argTypes[argNum] != ELEMENT_TYPE_END;
+ argNum++, argOfs += sizeof(void*)
+ )
+ {
+ switch (argTypes[argNum])
+ {
+ case ELEMENT_TYPE_R4:
+ // movss xmm?, dword ptr [rsi + argOfs]
+ X64EmitMovSSFromMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ case ELEMENT_TYPE_R8:
+ // movsd xmm?, qword ptr [rsi + argOfs]
+ X64EmitMovSDFromMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ default:
+ if (c_argRegs[argNum] != THIS_kREG)
+ {
+ // mov r*, [rsi + dstOfs]
+ X86EmitIndexRegLoad(c_argRegs[argNum], kESI,argOfs);
+ }
+ break;
+ } // switch
+ }
+
+ // mov SCRATCHREG, [rcx+Delegate._invocationList] ;;fetch the inner delegate
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
+
+ // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
+ X86EmitIndexRegLoad(c_argRegs[thisArgNum], SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
+
+ // call [SCRATCHREG+Delegate.target] ;; call current subscriber
+ X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
+
+ // add rsp, stackUsed ;; Clean up stack
+ X86EmitAddEsp(stackUsed);
+
+#else // _TARGET_AMD64_
+
+ UINT16 numStackBytes = static_cast<UINT16>(hash & ~3);
+
+ // ..repush & reenregister args..
+ INT32 ofs = numStackBytes + SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ while (ofs != SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs())
+ {
+ ofs -= sizeof(void*);
+ X86EmitIndexPush(kESI, ofs);
+ }
+
+ #define ARGUMENT_REGISTER(regname) if (k##regname != THIS_kREG) { X86EmitIndexRegLoad(k##regname, kESI, \
+ offsetof(ArgumentRegisters, regname) + SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters()); }
+
+ ENUM_ARGUMENT_REGISTERS_BACKWARD();
+
+ #undef ARGUMENT_REGISTER
+
+ // mov SCRATCHREG, [ecx+Delegate._invocationList] ;;fetch the inner delegate
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
+
+ // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
+ X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
+
+ // call [SCRATCHREG+Delegate.target] ;; call current subscriber
+ X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
+ INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
+ // we know that this is a call that can directly call
+ // managed code
+
+#endif // _TARGET_AMD64_
+
+ // The debugger may need to stop here, so grab the offset of this code.
+ EmitPatchLabel();
+
+ EmitCheckGSCookie(kESI, SecureDelegateFrame::GetOffsetOfGSCookie());
+
+ // Epilog
+ EmitMethodStubEpilog(numStackBytes, SecureDelegateFrame::GetOffsetOfTransitionBlock());
+}
+
+#ifndef FEATURE_ARRAYSTUB_AS_IL
+
+// Little helper to generate code to move nbytes bytes of non Ref memory
+
+void generate_noref_copy (unsigned nbytes, StubLinkerCPU* sl)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_NOTRIGGER;
+ INJECT_FAULT(COMPlusThrowOM(););
+ }
+ CONTRACTL_END;
+
+ // If the size is pointer-aligned, we'll use movsd
+ if (IS_ALIGNED(nbytes, sizeof(void*)))
+ {
+ // If there are less than 4 pointers to copy, "unroll" the "rep movsd"
+ if (nbytes <= 3*sizeof(void*))
+ {
+ while (nbytes > 0)
+ {
+ // movsd
+ sl->X86_64BitOperands();
+ sl->Emit8(0xa5);
+
+ nbytes -= sizeof(void*);
+ }
+ }
+ else
+ {
+ // mov ECX, size / 4
+ sl->Emit8(0xb8+kECX);
+ sl->Emit32(nbytes / sizeof(void*));
+
+ // repe movsd
+ sl->Emit8(0xf3);
+ sl->X86_64BitOperands();
+ sl->Emit8(0xa5);
+ }
+ }
+ else
+ {
+ // mov ECX, size
+ sl->Emit8(0xb8+kECX);
+ sl->Emit32(nbytes);
+
+ // repe movsb
+ sl->Emit16(0xa4f3);
+ }
+}
+
+
+X86Reg LoadArrayOpArg (
+ UINT32 idxloc,
+ StubLinkerCPU *psl,
+ X86Reg kRegIfFromMem,
+ UINT ofsadjust
+ AMD64_ARG(StubLinkerCPU::X86OperandSize OperandSize = StubLinkerCPU::k64BitOp)
+ )
+{
+ STANDARD_VM_CONTRACT;
+
+ if (!TransitionBlock::IsStackArgumentOffset(idxloc))
+ return GetX86ArgumentRegisterFromOffset(idxloc - TransitionBlock::GetOffsetOfArgumentRegisters());
+
+ psl->X86EmitEspOffset(0x8b, kRegIfFromMem, idxloc + ofsadjust AMD64_ARG(OperandSize));
+ return kRegIfFromMem;
+}
+
+VOID StubLinkerCPU::EmitArrayOpStubThrow(unsigned exConst, unsigned cbRetArg)
+{
+ STANDARD_VM_CONTRACT;
+
+ //ArrayOpStub*Exception
+ X86EmitPopReg(kESI);
+ X86EmitPopReg(kEDI);
+
+ //mov CORINFO_NullReferenceException_ASM, %ecx
+ Emit8(0xb8 | kECX);
+ Emit32(exConst);
+ //InternalExceptionWorker
+
+ X86EmitPopReg(kEDX);
+ // add pArrayOpScript->m_cbretpop, %esp (was add %eax, %esp)
+ Emit8(0x81);
+ Emit8(0xc0 | 0x4);
+ Emit32(cbRetArg);
+ X86EmitPushReg(kEDX);
+ X86EmitNearJump(NewExternalCodeLabel((PVOID)JIT_InternalThrow));
+}
+
+//===========================================================================
+// Emits code to do an array operation.
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
+#endif
+VOID StubLinkerCPU::EmitArrayOpStub(const ArrayOpScript* pArrayOpScript)
+{
+ STANDARD_VM_CONTRACT;
+
+ // This is the offset to the parameters/what's already pushed on the stack:
+ // return address.
+ const INT locsize = sizeof(void*);
+
+ // ArrayOpScript's stack offsets are built using ArgIterator, which
+ // assumes a TransitionBlock has been pushed, which is not the case
+ // here. rsp + ofsadjust should point at the first argument. Any further
+ // stack modifications below need to adjust ofsadjust appropriately.
+ // baseofsadjust needs to be the stack adjustment at the entry point -
+ // this is used further below to compute how much stack space was used.
+
+ INT ofsadjust = locsize - (INT)sizeof(TransitionBlock);
+
+ // Register usage
+ //
+ // x86 AMD64
+ // Inputs:
+ // managed array THIS_kREG (ecx) THIS_kREG (rcx)
+ // index 0 edx rdx
+ // index 1/value <stack> r8
+ // index 2/value <stack> r9
+ // expected element type for LOADADDR eax rax rdx
+ // Working registers:
+ // total (accumulates unscaled offset) edi r10
+ // factor (accumulates the slice factor) esi r11
+ X86Reg kArrayRefReg = THIS_kREG;
+#ifdef _TARGET_AMD64_
+ const X86Reg kArrayMTReg = kR10;
+ const X86Reg kTotalReg = kR10;
+ const X86Reg kFactorReg = kR11;
+#else
+ const X86Reg kArrayMTReg = kESI;
+ const X86Reg kTotalReg = kEDI;
+ const X86Reg kFactorReg = kESI;
+#endif
+
+#ifdef _TARGET_AMD64_
+ // Simplifying assumption for fNeedPrologue.
+ _ASSERTE(!pArrayOpScript->m_gcDesc || (pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER));
+ // Simplifying assumption for saving rsi and rdi.
+ _ASSERTE(!(pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER) || ArgIterator::IsArgPassedByRef(pArrayOpScript->m_elemsize));
+
+ // Cases where we need to make calls
+ BOOL fNeedScratchArea = ( (pArrayOpScript->m_flags & (ArrayOpScript::NEEDSTYPECHECK | ArrayOpScript::NEEDSWRITEBARRIER))
+ && ( pArrayOpScript->m_op == ArrayOpScript::STORE
+ || ( pArrayOpScript->m_op == ArrayOpScript::LOAD
+ && (pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER))));
+
+ // Cases where we need to copy large values
+ BOOL fNeedRSIRDI = ( ArgIterator::IsArgPassedByRef(pArrayOpScript->m_elemsize)
+ && ArrayOpScript::LOADADDR != pArrayOpScript->m_op);
+
+ BOOL fNeedPrologue = ( fNeedScratchArea
+ || fNeedRSIRDI);
+#endif
+
+ X86Reg kValueReg;
+
+ CodeLabel *Epilog = NewCodeLabel();
+ CodeLabel *Inner_nullexception = NewCodeLabel();
+ CodeLabel *Inner_rangeexception = NewCodeLabel();
+ CodeLabel *Inner_typeMismatchexception = NULL;
+
+ //
+ // Set up the stack frame.
+ //
+ //
+ // x86:
+ // value
+ // <index n-1>
+ // ...
+ // <index 1>
+ // return address
+ // saved edi
+ // esp -> saved esi
+ //
+ //
+ // AMD64:
+ // value, if rank > 2
+ // ...
+ // + 0x48 more indices
+ // + 0x40 r9 home
+ // + 0x38 r8 home
+ // + 0x30 rdx home
+ // + 0x28 rcx home
+ // + 0x20 return address
+ // + 0x18 scratch area (callee's r9)
+ // + 0x10 scratch area (callee's r8)
+ // + 8 scratch area (callee's rdx)
+ // rsp -> scratch area (callee's rcx)
+ //
+ // If the element type is a value class w/ object references, then rsi
+ // and rdi will also be saved above the scratch area:
+ //
+ // ...
+ // + 0x28 saved rsi
+ // + 0x20 saved rdi
+ // + 0x18 scratch area (callee's r9)
+ // + 0x10 scratch area (callee's r8)
+ // + 8 scratch area (callee's rdx)
+ // rsp -> scratch area (callee's rcx)
+ //
+ // And if no call or movsb is necessary, then the scratch area sits
+ // directly under the MethodDesc*.
+
+ BOOL fSavedESI = FALSE;
+ BOOL fSavedEDI = FALSE;
+
+#ifdef _TARGET_AMD64_
+ if (fNeedPrologue)
+ {
+ // Save argument registers if we'll be making a call before using
+ // them. Note that in this case the element value will always be an
+ // object type, and never be in an xmm register.
+
+ if ( (pArrayOpScript->m_flags & ArrayOpScript::NEEDSTYPECHECK)
+ && ArrayOpScript::STORE == pArrayOpScript->m_op)
+ {
+ // mov [rsp+0x08], rcx
+ X86EmitEspOffset(0x89, kRCX, 0x08);
+ X86EmitEspOffset(0x89, kRDX, 0x10);
+ X86EmitEspOffset(0x89, kR8, 0x18);
+
+ if (pArrayOpScript->m_rank >= 2)
+ X86EmitEspOffset(0x89, kR9, 0x20);
+ }
+
+ if (fNeedRSIRDI)
+ {
+ X86EmitPushReg(kRSI);
+ X86EmitPushReg(kRDI);
+
+ fSavedESI = fSavedEDI = TRUE;
+
+ ofsadjust += 0x10;
+ }
+
+ if (fNeedScratchArea)
+ {
+ // Callee scratch area (0x8 for aligned esp)
+ X86EmitSubEsp(sizeof(ArgumentRegisters) + 0x8);
+ ofsadjust += sizeof(ArgumentRegisters) + 0x8;
+ }
+ }
+#else
+ // Preserve the callee-saved registers
+ // NOTE: if you change the sequence of these pushes, you must also update:
+ // ArrayOpStubNullException
+ // ArrayOpStubRangeException
+ // ArrayOpStubTypeMismatchException
+ _ASSERTE( kTotalReg == kEDI);
+ X86EmitPushReg(kTotalReg);
+ _ASSERTE( kFactorReg == kESI);
+ X86EmitPushReg(kFactorReg);
+
+ fSavedESI = fSavedEDI = TRUE;
+
+ ofsadjust += 2*sizeof(void*);
+#endif
+
+ // Check for null.
+ X86EmitR2ROp(0x85, kArrayRefReg, kArrayRefReg); // TEST ECX, ECX
+ X86EmitCondJump(Inner_nullexception, X86CondCode::kJZ); // jz Inner_nullexception
+
+ // Do Type Check if needed
+ if (pArrayOpScript->m_flags & ArrayOpScript::NEEDSTYPECHECK)
+ {
+ if (pArrayOpScript->m_op == ArrayOpScript::STORE)
+ {
+ // Get the value to be stored.
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kEAX, ofsadjust);
+
+ X86EmitR2ROp(0x85, kValueReg, kValueReg); // TEST kValueReg, kValueReg
+ CodeLabel *CheckPassed = NewCodeLabel();
+ X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // storing NULL is OK
+
+ // mov EAX, element type ; possibly trashes kValueReg
+ X86EmitOp(0x8b, kArrayMTReg, kArrayRefReg, 0 AMD64_ARG(k64BitOp)); // mov ESI/R10, [kArrayRefReg]
+
+ X86EmitOp(0x8b, kEAX, kValueReg, 0 AMD64_ARG(k64BitOp)); // mov EAX, [kValueReg] ; possibly trashes kValueReg
+ // cmp EAX, [ESI/R10+m_ElementType]
+
+ X86EmitOp(0x3b, kEAX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
+ X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // Exact match is OK
+
+ X86EmitRegLoad(kEAX, (UINT_PTR)g_pObjectClass); // mov EAX, g_pObjectMethodTable
+ // cmp EAX, [ESI/R10+m_ElementType]
+
+ X86EmitOp(0x3b, kEAX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
+ X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // Assigning to array of object is OK
+
+ // Try to call the fast helper first ( ObjIsInstanceOfNoGC ).
+ // If that fails we will fall back to calling the slow helper ( ArrayStoreCheck ) that erects a frame.
+ // See also JitInterfaceX86::JIT_Stelem_Ref
+
+#ifdef _TARGET_AMD64_
+ // RCX contains pointer to object to check (Object*)
+ // RDX contains array type handle
+
+ // mov RCX, [rsp+offsetToObject] ; RCX = Object*
+ X86EmitEspOffset(0x8b, kRCX, ofsadjust + pArrayOpScript->m_fValLoc);
+
+ // get Array TypeHandle
+ // mov RDX, [RSP+offsetOfTypeHandle]
+
+ X86EmitEspOffset(0x8b, kRDX, ofsadjust
+ + TransitionBlock::GetOffsetOfArgumentRegisters()
+ + FIELD_OFFSET(ArgumentRegisters, THIS_REG));
+
+ // mov RDX, [kArrayMTReg+offsetof(MethodTable, m_ElementType)]
+ X86EmitIndexRegLoad(kRDX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle());
+
+#else
+ X86EmitPushReg(kEDX); // Save EDX
+ X86EmitPushReg(kECX); // Pass array object
+
+ X86EmitIndexPush(kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle()); // push [kArrayMTReg + m_ElementType] ; Array element type handle
+
+ // get address of value to store
+ _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register
+ X86EmitSPIndexPush(pArrayOpScript->m_fValLoc + ofsadjust + 3*sizeof(void*)); // push [ESP+offset] ; the object pointer
+
+#endif //_AMD64
+
+
+ // emit a call to the fast helper
+ // One side effect of this is that we are going to generate a "jnz Epilog" and we DON'T need it
+ // in the fast path, however there are no side effects in emitting
+ // it in the fast path anyway. the reason for that is that it makes
+ // the cleanup code much easier ( we have only 1 place to cleanup the stack and
+ // restore it to the original state )
+ X86EmitCall(NewExternalCodeLabel((LPVOID)ObjIsInstanceOfNoGC), 0);
+ X86EmitCmpRegImm32( kEAX, TypeHandle::CanCast); // CMP EAX, CanCast ; if ObjIsInstanceOfNoGC returns CanCast, we will go the fast path
+ CodeLabel * Cleanup = NewCodeLabel();
+ X86EmitCondJump(Cleanup, X86CondCode::kJZ);
+
+#ifdef _TARGET_AMD64_
+ // get address of value to store
+ // lea rcx, [rsp+offs]
+ X86EmitEspOffset(0x8d, kRCX, ofsadjust + pArrayOpScript->m_fValLoc);
+
+ // get address of 'this'/rcx
+ // lea rdx, [rsp+offs]
+ X86EmitEspOffset(0x8d, kRDX, ofsadjust
+ + TransitionBlock::GetOffsetOfArgumentRegisters()
+ + FIELD_OFFSET(ArgumentRegisters, THIS_REG));
+
+#else
+ // The stack is already setup correctly for the slow helper.
+ _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register
+ X86EmitEspOffset(0x8d, kECX, pArrayOpScript->m_fValLoc + ofsadjust + 2*sizeof(void*)); // lea ECX, [ESP+offset]
+
+ // get address of 'this'
+ X86EmitEspOffset(0x8d, kEDX, 0); // lea EDX, [ESP] ; (address of ECX)
+
+
+#endif
+ AMD64_ONLY(_ASSERTE(fNeedScratchArea));
+ X86EmitCall(NewExternalCodeLabel((LPVOID)ArrayStoreCheck), 0);
+
+ EmitLabel(Cleanup);
+#ifdef _TARGET_AMD64_
+ X86EmitEspOffset(0x8b, kRCX, 0x00 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
+ X86EmitEspOffset(0x8b, kRDX, 0x08 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
+ X86EmitEspOffset(0x8b, kR8, 0x10 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
+
+ if (pArrayOpScript->m_rank >= 2)
+ X86EmitEspOffset(0x8b, kR9, 0x18 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
+#else
+ X86EmitPopReg(kECX); // restore regs
+ X86EmitPopReg(kEDX);
+
+
+ X86EmitR2ROp(0x3B, kEAX, kEAX); // CMP EAX, EAX
+ X86EmitCondJump(Epilog, X86CondCode::kJNZ); // This branch never taken, but epilog walker uses it
+#endif
+
+ EmitLabel(CheckPassed);
+ }
+ else
+ {
+ _ASSERTE(pArrayOpScript->m_op == ArrayOpScript::LOADADDR);
+
+ // Load up the hidden type parameter into 'typeReg'
+ X86Reg typeReg = LoadArrayOpArg(pArrayOpScript->m_typeParamOffs, this, kEAX, ofsadjust);
+
+ // 'typeReg' holds the typeHandle for the ARRAY. This must be a ArrayTypeDesc*, so
+ // mask off the low two bits to get the TypeDesc*
+ X86EmitR2ROp(0x83, (X86Reg)4, typeReg); // AND typeReg, 0xFFFFFFFC
+ Emit8(0xFC);
+
+ // If 'typeReg' is NULL then we're executing the readonly ::Address and no type check is
+ // needed.
+ CodeLabel *Inner_passedTypeCheck = NewCodeLabel();
+
+ X86EmitCondJump(Inner_passedTypeCheck, X86CondCode::kJZ);
+
+ // Get the parameter of the parameterize type
+ // mov typeReg, [typeReg.m_Arg]
+ X86EmitOp(0x8b, typeReg, typeReg, offsetof(ParamTypeDesc, m_Arg) AMD64_ARG(k64BitOp));
+
+ // Compare this against the element type of the array.
+ // mov ESI/R10, [kArrayRefReg]
+ X86EmitOp(0x8b, kArrayMTReg, kArrayRefReg, 0 AMD64_ARG(k64BitOp));
+ // cmp typeReg, [ESI/R10+m_ElementType];
+ X86EmitOp(0x3b, typeReg, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
+
+ // Throw error if not equal
+ Inner_typeMismatchexception = NewCodeLabel();
+ X86EmitCondJump(Inner_typeMismatchexception, X86CondCode::kJNZ);
+ EmitLabel(Inner_passedTypeCheck);
+ }
+ }
+
+ CodeLabel* DoneCheckLabel = 0;
+ if (pArrayOpScript->m_rank == 1 && pArrayOpScript->m_fHasLowerBounds)
+ {
+ DoneCheckLabel = NewCodeLabel();
+ CodeLabel* NotSZArrayLabel = NewCodeLabel();
+
+ // for rank1 arrays, we might actually have two different layouts depending on
+ // if we are ELEMENT_TYPE_ARRAY or ELEMENT_TYPE_SZARRAY.
+
+ // mov EAX, [ARRAY] // EAX holds the method table
+ X86_64BitOperands();
+ X86EmitOp(0x8b, kEAX, kArrayRefReg);
+
+ // test [EAX + m_dwFlags], enum_flag_Category_IfArrayThenSzArray
+ X86_64BitOperands();
+ X86EmitOffsetModRM(0xf7, (X86Reg)0, kEAX, MethodTable::GetOffsetOfFlags());
+ Emit32(MethodTable::GetIfArrayThenSzArrayFlag());
+
+ // jz NotSZArrayLabel
+ X86EmitCondJump(NotSZArrayLabel, X86CondCode::kJZ);
+
+ //Load the passed-in index into the scratch register.
+ const ArrayOpIndexSpec *pai = pArrayOpScript->GetArrayOpIndexSpecs();
+ X86Reg idxReg = LoadArrayOpArg(pai->m_idxloc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
+
+ // cmp idxReg, [kArrayRefReg + LENGTH]
+ X86EmitOp(0x3b, idxReg, kArrayRefReg, ArrayBase::GetOffsetOfNumComponents());
+
+ // jae Inner_rangeexception
+ X86EmitCondJump(Inner_rangeexception, X86CondCode::kJAE);
+
+ // <TODO> if we cared efficiency of this, this move can be optimized</TODO>
+ X86EmitR2ROp(0x8b, kTotalReg, idxReg AMD64_ARG(k32BitOp));
+
+ // sub ARRAY. 8 // 8 is accounts for the Lower bound and Dim count in the ARRAY
+ X86EmitSubReg(kArrayRefReg, 8); // adjust this pointer so that indexing works out for SZARRAY
+
+ X86EmitNearJump(DoneCheckLabel);
+ EmitLabel(NotSZArrayLabel);
+ }
+
+ // For each index, range-check and mix into accumulated total.
+ UINT idx = pArrayOpScript->m_rank;
+ BOOL firstTime = TRUE;
+ while (idx--)
+ {
+ const ArrayOpIndexSpec *pai = pArrayOpScript->GetArrayOpIndexSpecs() + idx;
+
+ //Load the passed-in index into the scratch register.
+ X86Reg srcreg = LoadArrayOpArg(pai->m_idxloc, this, SCRATCH_REGISTER_X86REG, ofsadjust AMD64_ARG(k32BitOp));
+ if (SCRATCH_REGISTER_X86REG != srcreg)
+ X86EmitR2ROp(0x8b, SCRATCH_REGISTER_X86REG, srcreg AMD64_ARG(k32BitOp));
+
+ // sub SCRATCH, dword ptr [kArrayRefReg + LOWERBOUND]
+ if (pArrayOpScript->m_fHasLowerBounds)
+ {
+ X86EmitOp(0x2b, SCRATCH_REGISTER_X86REG, kArrayRefReg, pai->m_lboundofs);
+ }
+
+ // cmp SCRATCH, dword ptr [kArrayRefReg + LENGTH]
+ X86EmitOp(0x3b, SCRATCH_REGISTER_X86REG, kArrayRefReg, pai->m_lengthofs);
+
+ // jae Inner_rangeexception
+ X86EmitCondJump(Inner_rangeexception, X86CondCode::kJAE);
+
+
+ // SCRATCH == idx - LOWERBOUND
+ //
+ // imul SCRATCH, FACTOR
+ if (!firstTime)
+ {
+ //Can skip the first time since FACTOR==1
+ X86EmitR2ROp(0xaf0f, SCRATCH_REGISTER_X86REG, kFactorReg AMD64_ARG(k32BitOp));
+ }
+
+ // TOTAL += SCRATCH
+ if (firstTime)
+ {
+ // First time, we must zero-init TOTAL. Since
+ // zero-initing and then adding is just equivalent to a
+ // "mov", emit a "mov"
+ // mov TOTAL, SCRATCH
+ X86EmitR2ROp(0x8b, kTotalReg, SCRATCH_REGISTER_X86REG AMD64_ARG(k32BitOp));
+ }
+ else
+ {
+ // add TOTAL, SCRATCH
+ X86EmitR2ROp(0x03, kTotalReg, SCRATCH_REGISTER_X86REG AMD64_ARG(k32BitOp));
+ }
+
+ // FACTOR *= [kArrayRefReg + LENGTH]
+ if (idx != 0)
+ {
+ // No need to update FACTOR on the last iteration
+ // since we won't use it again
+
+ if (firstTime)
+ {
+ // must init FACTOR to 1 first: hence,
+ // the "imul" becomes a "mov"
+ // mov FACTOR, [kArrayRefReg + LENGTH]
+ X86EmitOp(0x8b, kFactorReg, kArrayRefReg, pai->m_lengthofs);
+ }
+ else
+ {
+ // imul FACTOR, [kArrayRefReg + LENGTH]
+ X86EmitOp(0xaf0f, kFactorReg, kArrayRefReg, pai->m_lengthofs);
+ }
+ }
+
+ firstTime = FALSE;
+ }
+
+ if (DoneCheckLabel != 0)
+ EmitLabel(DoneCheckLabel);
+
+ // Pass these values to X86EmitArrayOp() to generate the element address.
+ X86Reg elemBaseReg = kArrayRefReg;
+ X86Reg elemScaledReg = kTotalReg;
+ UINT32 elemSize = pArrayOpScript->m_elemsize;
+ UINT32 elemOfs = pArrayOpScript->m_ofsoffirst;
+
+ if (!(elemSize == 1 || elemSize == 2 || elemSize == 4 || elemSize == 8))
+ {
+ switch (elemSize)
+ {
+ // No way to express this as a SIB byte. Fold the scale
+ // into TOTAL.
+
+ case 16:
+ // shl TOTAL,4
+ X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
+ Emit8(4);
+ break;
+
+ case 32:
+ // shl TOTAL,5
+ X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
+ Emit8(5);
+ break;
+
+ case 64:
+ // shl TOTAL,6
+ X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
+ Emit8(6);
+ break;
+
+ default:
+ // imul TOTAL, elemScale
+ X86EmitR2ROp(0x69, kTotalReg, kTotalReg AMD64_ARG(k32BitOp));
+ Emit32(elemSize);
+ break;
+ }
+ elemSize = 1;
+ }
+
+ _ASSERTE(FitsInU1(elemSize));
+ BYTE elemScale = static_cast<BYTE>(elemSize);
+
+ // Now, do the operation:
+
+ switch (pArrayOpScript->m_op)
+ {
+ case ArrayOpScript::LOADADDR:
+ // lea eax, ELEMADDR
+ X86EmitOp(0x8d, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
+ break;
+
+ case ArrayOpScript::LOAD:
+ if (pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER)
+ {
+ // Ensure that these registers have been saved!
+ _ASSERTE(fSavedESI && fSavedEDI);
+
+ //lea esi, ELEMADDR
+ X86EmitOp(0x8d, kESI, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
+
+ _ASSERTE(!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fRetBufLoc));
+ // mov edi, retbufptr
+ X86EmitR2ROp(0x8b, kEDI, GetX86ArgumentRegisterFromOffset(pArrayOpScript->m_fRetBufLoc - TransitionBlock::GetOffsetOfArgumentRegisters()));
+
+COPY_VALUE_CLASS:
+ {
+ size_t size = pArrayOpScript->m_elemsize;
+ size_t total = 0;
+ if(pArrayOpScript->m_gcDesc)
+ {
+ CGCDescSeries* cur = pArrayOpScript->m_gcDesc->GetHighestSeries();
+ if ((cur->startoffset-elemOfs) > 0)
+ generate_noref_copy ((unsigned) (cur->startoffset - elemOfs), this);
+ total += cur->startoffset - elemOfs;
+
+ SSIZE_T cnt = (SSIZE_T) pArrayOpScript->m_gcDesc->GetNumSeries();
+ // special array encoding
+ _ASSERTE(cnt < 0);
+
+ for (SSIZE_T __i = 0; __i > cnt; __i--)
+ {
+ HALF_SIZE_T skip = cur->val_serie[__i].skip;
+ HALF_SIZE_T nptrs = cur->val_serie[__i].nptrs;
+ total += nptrs*sizeof (DWORD*);
+ do
+ {
+ AMD64_ONLY(_ASSERTE(fNeedScratchArea));
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) JIT_ByRefWriteBarrier), 0);
+ } while (--nptrs);
+ if (skip > 0)
+ {
+ //check if we are at the end of the series
+ if (__i == (cnt + 1))
+ skip = skip - (HALF_SIZE_T)(cur->startoffset - elemOfs);
+ if (skip > 0)
+ generate_noref_copy (skip, this);
+ }
+ total += skip;
+ }
+
+ _ASSERTE (size == total);
+ }
+ else
+ {
+ // no ref anywhere, just copy the bytes.
+ _ASSERTE (size);
+ generate_noref_copy ((unsigned)size, this);
+ }
+ }
+ }
+ else
+ {
+ switch (pArrayOpScript->m_elemsize)
+ {
+ case 1:
+ // mov[zs]x eax, byte ptr ELEMADDR
+ X86EmitOp(pArrayOpScript->m_signed ? 0xbe0f : 0xb60f, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ break;
+
+ case 2:
+ // mov[zs]x eax, word ptr ELEMADDR
+ X86EmitOp(pArrayOpScript->m_signed ? 0xbf0f : 0xb70f, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ break;
+
+ case 4:
+ if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
+ {
+#ifdef _TARGET_AMD64_
+ // movss xmm0, dword ptr ELEMADDR
+ Emit8(0xf3);
+ X86EmitOp(0x100f, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+#else // !_TARGET_AMD64_
+ // fld dword ptr ELEMADDR
+ X86EmitOp(0xd9, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+#endif // !_TARGET_AMD64_
+ }
+ else
+ {
+ // mov eax, ELEMADDR
+ X86EmitOp(0x8b, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ }
+ break;
+
+ case 8:
+ if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
+ {
+#ifdef _TARGET_AMD64_
+ // movsd xmm0, qword ptr ELEMADDR
+ Emit8(0xf2);
+ X86EmitOp(0x100f, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+#else // !_TARGET_AMD64_
+ // fld qword ptr ELEMADDR
+ X86EmitOp(0xdd, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+#endif // !_TARGET_AMD64_
+ }
+ else
+ {
+ // mov eax, ELEMADDR
+ X86EmitOp(0x8b, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
+#ifdef _TARGET_X86_
+ // mov edx, ELEMADDR + 4
+ X86EmitOp(0x8b, kEDX, elemBaseReg, elemOfs + 4, elemScaledReg, elemScale);
+#endif
+ }
+ break;
+
+ default:
+ _ASSERTE(0);
+ }
+ }
+
+ break;
+
+ case ArrayOpScript::STORE:
+
+ switch (pArrayOpScript->m_elemsize)
+ {
+ case 1:
+ // mov SCRATCH, [esp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
+ // mov byte ptr ELEMADDR, SCRATCH.b
+ X86EmitOp(0x88, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ break;
+ case 2:
+ // mov SCRATCH, [esp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
+ // mov word ptr ELEMADDR, SCRATCH.w
+ Emit8(0x66);
+ X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ break;
+ case 4:
+#ifndef _TARGET_AMD64_
+ if (pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER)
+ {
+ // mov SCRATCH, [esp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
+
+ _ASSERTE(SCRATCH_REGISTER_X86REG == kEAX); // value to store is already in EAX where we want it.
+ // lea edx, ELEMADDR
+ X86EmitOp(0x8d, kEDX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+
+ // call JIT_Writeable_Thunks_Buf.WriteBarrierReg[0] (== EAX)
+ X86EmitCall(NewExternalCodeLabel((LPVOID) &JIT_WriteBarrierEAX), 0);
+ }
+ else
+#else // _TARGET_AMD64_
+ if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
+ {
+ if (!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc))
+ {
+ kValueReg = (X86Reg)TransitionBlock::GetArgumentIndexFromOffset(pArrayOpScript->m_fValLoc);
+ }
+ else
+ {
+ kValueReg = (X86Reg)0; // xmm0
+
+ // movss xmm0, dword ptr [rsp+??]
+ Emit8(0xf3);
+ X86EmitOp(0x100f, kValueReg, (X86Reg)4 /*rsp*/, ofsadjust + pArrayOpScript->m_fValLoc);
+ }
+
+ // movss dword ptr ELEMADDR, xmm?
+ Emit8(0xf3);
+ X86EmitOp(0x110f, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ }
+ else
+#endif // _TARGET_AMD64_
+ {
+ // mov SCRATCH, [esp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust AMD64_ARG(k32BitOp));
+
+ // mov ELEMADDR, SCRATCH
+ X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ }
+ break;
+
+ case 8:
+
+ if (!(pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER))
+ {
+#ifdef _TARGET_AMD64_
+ if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
+ {
+ if (!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc))
+ {
+ kValueReg = (X86Reg)TransitionBlock::GetArgumentIndexFromOffset(pArrayOpScript->m_fValLoc);
+ }
+ else
+ {
+ kValueReg = (X86Reg)0; // xmm0
+
+ // movsd xmm0, qword ptr [rsp+??]
+ Emit8(0xf2);
+ X86EmitOp(0x100f, kValueReg, (X86Reg)4 /*rsp*/, ofsadjust + pArrayOpScript->m_fValLoc);
+ }
+
+ // movsd qword ptr ELEMADDR, xmm?
+ Emit8(0xf2);
+ X86EmitOp(0x110f, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ }
+ else
+ {
+ // mov SCRATCH, [esp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
+
+ // mov ELEMADDR, SCRATCH
+ X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale, k64BitOp);
+ }
+#else // !_TARGET_AMD64_
+ _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register: so too lazy to implement that case
+ // mov SCRATCH, [esp + valoffset]
+ X86EmitEspOffset(0x8b, SCRATCH_REGISTER_X86REG, pArrayOpScript->m_fValLoc + ofsadjust);
+ // mov ELEMADDR, SCRATCH
+ X86EmitOp(0x89, SCRATCH_REGISTER_X86REG, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+
+ _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register: so too lazy to implement that case
+ // mov SCRATCH, [esp + valoffset + 4]
+ X86EmitEspOffset(0x8b, SCRATCH_REGISTER_X86REG, pArrayOpScript->m_fValLoc + ofsadjust + 4);
+ // mov ELEMADDR+4, SCRATCH
+ X86EmitOp(0x89, SCRATCH_REGISTER_X86REG, elemBaseReg, elemOfs+4, elemScaledReg, elemScale);
+#endif // !_TARGET_AMD64_
+ break;
+ }
+#ifdef _TARGET_AMD64_
+ else
+ {
+ _ASSERTE(SCRATCH_REGISTER_X86REG == kEAX); // value to store is already in EAX where we want it.
+ // lea rcx, ELEMADDR
+ X86EmitOp(0x8d, kRCX, elemBaseReg, elemOfs, elemScaledReg, elemScale, k64BitOp);
+
+ // mov rdx, [rsp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kRDX, ofsadjust);
+ _ASSERT(kRCX != kValueReg);
+ if (kRDX != kValueReg)
+ X86EmitR2ROp(0x8b, kRDX, kValueReg);
+
+ _ASSERTE(fNeedScratchArea);
+ X86EmitCall(NewExternalCodeLabel((PVOID)JIT_WriteBarrier), 0);
+ break;
+ }
+#endif // _TARGET_AMD64_
+ // FALL THROUGH (on x86)
+ default:
+ // Ensure that these registers have been saved!
+ _ASSERTE(fSavedESI && fSavedEDI);
+
+#ifdef _TARGET_AMD64_
+ // mov rsi, [rsp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kRSI, ofsadjust);
+ if (kRSI != kValueReg)
+ X86EmitR2ROp(0x8b, kRSI, kValueReg);
+#else // !_TARGET_AMD64_
+ _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc));
+ // lea esi, [esp + valoffset]
+ X86EmitEspOffset(0x8d, kESI, pArrayOpScript->m_fValLoc + ofsadjust);
+#endif // !_TARGET_AMD64_
+
+ // lea edi, ELEMADDR
+ X86EmitOp(0x8d, kEDI, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
+ goto COPY_VALUE_CLASS;
+ }
+ break;
+
+ default:
+ _ASSERTE(0);
+ }
+
+ EmitLabel(Epilog);
+
+#ifdef _TARGET_AMD64_
+ if (fNeedPrologue)
+ {
+ if (fNeedScratchArea)
+ {
+ // Throw away scratch area
+ X86EmitAddEsp(sizeof(ArgumentRegisters) + 0x8);
+ }
+
+ if (fSavedEDI)
+ X86EmitPopReg(kRDI);
+
+ if (fSavedESI)
+ X86EmitPopReg(kRSI);
+ }
+
+ X86EmitReturn(0);
+#else // !_TARGET_AMD64_
+ // Restore the callee-saved registers
+ X86EmitPopReg(kFactorReg);
+ X86EmitPopReg(kTotalReg);
+
+ // ret N
+ X86EmitReturn(pArrayOpScript->m_cbretpop);
+#endif // !_TARGET_AMD64_
+
+ // Exception points must clean up the stack for all those extra args.
+ // kFactorReg and kTotalReg will be popped by the jump targets.
+
+ void *pvExceptionThrowFn;
+
+#if defined(_TARGET_AMD64_)
+#define ARRAYOP_EXCEPTION_HELPERS(base) { (PVOID)base, (PVOID)base##_RSIRDI, (PVOID)base##_ScratchArea, (PVOID)base##_RSIRDI_ScratchArea }
+ static void *rgNullExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubNullException);
+ static void *rgRangeExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubRangeException);
+ static void *rgTypeMismatchExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubTypeMismatchException);
+#undef ARRAYOP_EXCEPTION_HELPERS
+
+ UINT iExceptionHelper = (fNeedRSIRDI ? 1 : 0) + (fNeedScratchArea ? 2 : 0);
+#endif // defined(_TARGET_AMD64_)
+
+ EmitLabel(Inner_nullexception);
+
+#ifndef _TARGET_AMD64_
+ pvExceptionThrowFn = (LPVOID)ArrayOpStubNullException;
+
+ Emit8(0xb8); // mov EAX, <stack cleanup>
+ Emit32(pArrayOpScript->m_cbretpop);
+#else //_TARGET_AMD64_
+ pvExceptionThrowFn = rgNullExceptionHelpers[iExceptionHelper];
+#endif //!_TARGET_AMD64_
+ X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
+
+ EmitLabel(Inner_rangeexception);
+#ifndef _TARGET_AMD64_
+ pvExceptionThrowFn = (LPVOID)ArrayOpStubRangeException;
+ Emit8(0xb8); // mov EAX, <stack cleanup>
+ Emit32(pArrayOpScript->m_cbretpop);
+#else //_TARGET_AMD64_
+ pvExceptionThrowFn = rgRangeExceptionHelpers[iExceptionHelper];
+#endif //!_TARGET_AMD64_
+ X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
+
+ if (Inner_typeMismatchexception != NULL)
+ {
+ EmitLabel(Inner_typeMismatchexception);
+#ifndef _TARGET_AMD64_
+ pvExceptionThrowFn = (LPVOID)ArrayOpStubTypeMismatchException;
+ Emit8(0xb8); // mov EAX, <stack cleanup>
+ Emit32(pArrayOpScript->m_cbretpop);
+#else //_TARGET_AMD64_
+ pvExceptionThrowFn = rgTypeMismatchExceptionHelpers[iExceptionHelper];
+#endif //!_TARGET_AMD64_
+ X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
+ }
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#endif // FEATURE_ARRAYSTUB_AS_IL
+
+//===========================================================================
+// Emits code to break into debugger
+VOID StubLinkerCPU::EmitDebugBreak()
+{
+ STANDARD_VM_CONTRACT;
+
+ // int3
+ Emit8(0xCC);
+}
+
+#if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning (disable : 4740) // There is inline asm code in this function, which disables
+ // global optimizations.
+#pragma warning (disable : 4731)
+#endif // _MSC_VER
+Thread* __stdcall CreateThreadBlockReturnHr(ComMethodFrame *pFrame)
+{
+
+ WRAPPER_NO_CONTRACT;
+
+ Thread *pThread = NULL;
+
+ HRESULT hr = S_OK;
+
+ // This means that a thread is FIRST coming in from outside the EE.
+ BEGIN_ENTRYPOINT_THROWS;
+ pThread = SetupThreadNoThrow(&hr);
+ END_ENTRYPOINT_THROWS;
+
+ if (pThread == NULL) {
+ // Unwind stack, and return hr
+ // NOTE: assumes __stdcall
+ // Note that this code does not handle the rare COM signatures that do not return HRESULT
+ // compute the callee pop stack bytes
+ UINT numArgStackBytes = pFrame->GetNumCallerStackBytes();
+ unsigned frameSize = sizeof(Frame) + sizeof(LPVOID);
+ LPBYTE iEsp = ((LPBYTE)pFrame) + ComMethodFrame::GetOffsetOfCalleeSavedRegisters();
+ __asm
+ {
+ mov eax, hr
+ mov edx, numArgStackBytes
+ //*****************************************
+ // reset the stack pointer
+ // none of the locals above can be used in the asm below
+ // if we wack the stack pointer
+ mov esp, iEsp
+ // pop callee saved registers
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ pop ecx ; //return address
+ // pop the callee cleanup stack args
+ add esp, edx ;// callee cleanup of args
+ jmp ecx; // jump to the address to continue execution
+
+ // We will never get here. This "ret" is just so that code-disassembling
+ // profilers know to stop disassembling any further
+ ret
+ }
+ }
+
+ return pThread;
+}
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+#endif // defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
+
+#endif // !defined(CROSSGEN_COMPILE) && !defined(FEATURE_STUBS_AS_IL)
+
+#endif // !DACCESS_COMPILE
+
+
+#ifdef _TARGET_AMD64_
+
+//
+// TailCallFrame Object Scanning
+//
+// This handles scanning/promotion of GC objects that were
+// protected by the TailCallHelper routine. Note that the objects
+// being protected is somewhat dynamic and is dependent upon the
+// the callee...
+//
+
+void TailCallFrame::GcScanRoots(promote_func *fn, ScanContext* sc)
+{
+ WRAPPER_NO_CONTRACT;
+
+ if (m_pGCLayout != NULL)
+ {
+ struct FrameOffsetDecoder {
+ private:
+ TADDR prevOffset;
+ TADDR rangeEnd;
+ BOOL maybeInterior;
+ BOOL atEnd;
+ PTR_SBYTE pbOffsets;
+
+ DWORD ReadNumber() {
+ signed char i;
+ DWORD offset = 0;
+ while ((i = *pbOffsets++) >= 0)
+ {
+ offset = (offset << 7) | i;
+ }
+ offset = (offset << 7) | (i & 0x7F);
+ return offset;
+ }
+
+ public:
+ FrameOffsetDecoder(PTR_GSCookie _base, TADDR offsets)
+ : prevOffset(dac_cast<TADDR>(_base)), rangeEnd(~0LL), atEnd(FALSE), pbOffsets(dac_cast<PTR_SBYTE>(offsets)) { maybeInterior = FALSE;}
+
+ bool MoveNext() {
+ LIMITED_METHOD_CONTRACT;
+
+ if (rangeEnd < prevOffset)
+ {
+ prevOffset -= sizeof(void*);
+ return true;
+ }
+ if (atEnd) return false;
+ DWORD offset = ReadNumber();
+ atEnd = (offset & 1);
+ BOOL range = (offset & 2);
+ maybeInterior = (offset & 0x80000000);
+
+ offset &= 0x7FFFFFFC;
+
+#ifdef _WIN64
+ offset <<= 1;
+#endif
+ offset += sizeof(void*);
+ _ASSERTE(prevOffset > offset);
+ prevOffset -= offset;
+
+ if (range)
+ {
+ _ASSERTE(!atEnd);
+ _ASSERTE(!maybeInterior);
+ DWORD offsetEnd = ReadNumber();
+ atEnd = (offsetEnd & 1);
+ offsetEnd = (offsetEnd & ~1) << 1;
+ // range encoding starts with a range of 3 (2 is better to encode as
+ // 2 offsets), so 0 == 2 (the last offset in the range)
+ offsetEnd += sizeof(void*) * 2;
+ rangeEnd = prevOffset - offsetEnd;
+ }
+
+ return true;
+ }
+
+ BOOL MaybeInterior() const { return maybeInterior; }
+
+ PTR_PTR_Object Current() const { return PTR_PTR_Object(prevOffset); }
+
+ } decoder(GetGSCookiePtr(), m_pGCLayout);
+
+ while (decoder.MoveNext())
+ {
+ PTR_PTR_Object ppRef = decoder.Current();
+
+ LOG((LF_GC, INFO3, "Tail Call Frame Promoting" FMT_ADDR "to",
+ DBG_ADDR(OBJECTREF_TO_UNCHECKED_OBJECTREF(*ppRef)) ));
+ if (decoder.MaybeInterior())
+ PromoteCarefully(fn, ppRef, sc, GC_CALL_INTERIOR|CHECK_APP_DOMAIN);
+ else
+ (*fn)(ppRef, sc, 0);
+ LOG((LF_GC, INFO3, FMT_ADDR "\n", DBG_ADDR(OBJECTREF_TO_UNCHECKED_OBJECTREF(*ppRef)) ));
+ }
+ }
+}
+
+#ifndef DACCESS_COMPILE
+static void EncodeOneGCOffset(CPUSTUBLINKER *pSl, ULONG delta, BOOL maybeInterior, BOOL range, BOOL last)
+{
+ CONTRACTL
+ {
+ THROWS; // From the stublinker
+ MODE_ANY;
+ GC_NOTRIGGER;
+ }
+ CONTRACTL_END;
+
+ // Everything should be pointer aligned
+ // but we use a high bit for interior, and the 0 bit to denote the end of the list
+ // we use the 1 bit to denote a range
+ _ASSERTE((delta % sizeof(void*)) == 0);
+
+#if defined(_WIN64)
+ // For 64-bit, we have 3 bits of alignment, so we allow larger frames
+ // by shifting and gaining a free high-bit.
+ ULONG encodedDelta = delta >> 1;
+#else
+ // For 32-bit, we just limit our frame size to <2GB. (I know, such a bummer!)
+ ULONG encodedDelta = delta;
+#endif
+ _ASSERTE((encodedDelta & 0x80000003) == 0);
+ if (last)
+ {
+ encodedDelta |= 1;
+ }
+
+ if (range)
+ {
+ encodedDelta |= 2;
+ }
+ else if (maybeInterior)
+ {
+ _ASSERTE(!range);
+ encodedDelta |= 0x80000000;
+ }
+
+ BYTE bytes[5];
+ UINT index = 5;
+ bytes[--index] = (BYTE)((encodedDelta & 0x7F) | 0x80);
+ encodedDelta >>= 7;
+ while (encodedDelta > 0)
+ {
+ bytes[--index] = (BYTE)(encodedDelta & 0x7F);
+ encodedDelta >>= 7;
+ }
+ pSl->EmitBytes(&bytes[index], 5 - index);
+}
+
+static void EncodeGCOffsets(CPUSTUBLINKER *pSl, /* const */ ULONGARRAY & gcOffsets)
+{
+ CONTRACTL
+ {
+ THROWS;
+ MODE_ANY;
+ GC_NOTRIGGER;
+ }
+ CONTRACTL_END;
+
+ _ASSERTE(gcOffsets.Count() > 0);
+
+ ULONG prevOffset = 0;
+ int i = 0;
+ BOOL last = FALSE;
+ do {
+ ULONG offset = gcOffsets[i];
+ // Everything should be pointer aligned
+ // but we use the 0-bit to mean maybeInterior, for byrefs.
+ _ASSERTE(((offset % sizeof(void*)) == 0) || ((offset % sizeof(void*)) == 1));
+ BOOL maybeInterior = (offset & 1);
+ offset &= ~1;
+
+ // Encode just deltas because they're smaller (and the list should be sorted)
+ _ASSERTE(offset >= (prevOffset + sizeof(void*)));
+ ULONG delta = offset - (prevOffset + sizeof(void*));
+ if (!maybeInterior && gcOffsets.Count() > i + 2)
+ {
+ // Check for a potential range.
+ // Only do it if we have 3 or more pointers in a row
+ ULONG rangeOffset = offset;
+ int j = i + 1;
+ do {
+ ULONG nextOffset = gcOffsets[j];
+ // interior pointers can't be in ranges
+ if (nextOffset & 1)
+ break;
+ // ranges must be saturated
+ if (nextOffset != (rangeOffset + sizeof(void*)))
+ break;
+ j++;
+ rangeOffset = nextOffset;
+ } while(j < gcOffsets.Count());
+
+ if (j > (i + 2))
+ {
+ EncodeOneGCOffset(pSl, delta, FALSE, TRUE, last);
+ i = j - 1;
+ _ASSERTE(rangeOffset >= (offset + (sizeof(void*) * 2)));
+ delta = rangeOffset - (offset + (sizeof(void*) * 2));
+ offset = rangeOffset;
+ }
+ }
+ last = (++i == gcOffsets.Count());
+
+
+ EncodeOneGCOffset(pSl, delta, maybeInterior, FALSE, last);
+
+ prevOffset = offset;
+ } while (!last);
+}
+
+static void AppendGCLayout(ULONGARRAY &gcLayout, size_t baseOffset, BOOL fIsTypedRef, TypeHandle VMClsHnd)
+{
+ STANDARD_VM_CONTRACT;
+
+ _ASSERTE((baseOffset % 16) == 0);
+ _ASSERTE(FitsInU4(baseOffset));
+
+ if (fIsTypedRef)
+ {
+ *gcLayout.AppendThrowing() = (ULONG)(baseOffset | 1); // "| 1" to mark it as an interior pointer
+ }
+ else if (!VMClsHnd.IsNativeValueType())
+ {
+ MethodTable* pMT = VMClsHnd.GetMethodTable();
+ _ASSERTE(pMT);
+ _ASSERTE(pMT->IsValueType());
+
+ // walk the GC descriptors, reporting the correct offsets
+ if (pMT->ContainsPointers())
+ {
+ // size of instance when unboxed must be adjusted for the syncblock
+ // index and the VTable pointer.
+ DWORD size = pMT->GetBaseSize();
+
+ // we don't include this term in our 'ppstop' calculation below.
+ _ASSERTE(pMT->GetComponentSize() == 0);
+
+ CGCDesc* map = CGCDesc::GetCGCDescFromMT(pMT);
+ CGCDescSeries* cur = map->GetLowestSeries();
+ CGCDescSeries* last = map->GetHighestSeries();
+
+ _ASSERTE(cur <= last);
+ do
+ {
+ // offset to embedded references in this series must be
+ // adjusted by the VTable pointer, when in the unboxed state.
+ size_t adjustOffset = cur->GetSeriesOffset() - sizeof(void *);
+
+ _ASSERTE(baseOffset >= adjustOffset);
+ size_t start = baseOffset - adjustOffset;
+ size_t stop = start - (cur->GetSeriesSize() + size);
+ for (size_t off = stop + sizeof(void*); off <= start; off += sizeof(void*))
+ {
+ _ASSERTE(gcLayout.Count() == 0 || off > gcLayout[gcLayout.Count() - 1]);
+ _ASSERTE(FitsInU4(off));
+ *gcLayout.AppendThrowing() = (ULONG)off;
+ }
+ cur++;
+
+ } while (cur <= last);
+ }
+ }
+}
+
+Stub * StubLinkerCPU::CreateTailCallCopyArgsThunk(CORINFO_SIG_INFO * pSig,
+ CorInfoHelperTailCallSpecialHandling flags)
+{
+ STANDARD_VM_CONTRACT;
+
+ CPUSTUBLINKER sl;
+ CPUSTUBLINKER* pSl = &sl;
+
+ // Generates a function that looks like this:
+ // size_t CopyArguments(va_list args, (RCX)
+ // CONTEXT *pCtx, (RDX)
+ // DWORD64 *pvStack, (R8)
+ // size_t cbStack) (R9)
+ // {
+ // if (pCtx != NULL) {
+ // foreach (arg in args) {
+ // copy into pCtx or pvStack
+ // }
+ // }
+ // return <size of stack needed>;
+ // }
+ //
+
+ CodeLabel *pNullLabel = pSl->NewCodeLabel();
+
+ // test rdx, rdx
+ pSl->X86EmitR2ROp(0x85, kRDX, kRDX);
+
+ // jz NullLabel
+ pSl->X86EmitCondJump(pNullLabel, X86CondCode::kJZ);
+
+ UINT nArgSlot = 0;
+ UINT totalArgs = pSig->totalILArgs() + ((pSig->isVarArg() || pSig->hasTypeArg()) ? 1 : 0);
+ bool fR10Loaded = false;
+ UINT cbArg;
+ static const UINT rgcbArgRegCtxtOffsets[4] = { offsetof(CONTEXT, Rcx), offsetof(CONTEXT, Rdx),
+ offsetof(CONTEXT, R8), offsetof(CONTEXT, R9) };
+ static const UINT rgcbFpArgRegCtxtOffsets[4] = { offsetof(CONTEXT, Xmm0.Low), offsetof(CONTEXT, Xmm1.Low),
+ offsetof(CONTEXT, Xmm2.Low), offsetof(CONTEXT, Xmm3.Low) };
+
+ ULONGARRAY gcLayout;
+
+ // On input to the function R9 contains the size of the buffer
+ // The first time this macro runs, R10 is loaded with the 'top' of the Frame
+ // and R9 is changed to point to the 'top' of the copy buffer.
+ // Then both R9 and R10 are decremented by the size of the struct we're copying
+ // So R10 is the value to put in the argument slot, and R9 is where the data
+ // should be copied to (or zeroed out in the case of the return buffer).
+#define LOAD_STRUCT_OFFSET_IF_NEEDED(cbSize) \
+ { \
+ _ASSERTE(cbSize > 0); \
+ _ASSERTE(FitsInI4(cbSize)); \
+ __int32 offset = (__int32)cbSize; \
+ if (!fR10Loaded) { \
+ /* mov r10, [rdx + offset of RSP] */ \
+ pSl->X86EmitIndexRegLoad(kR10, kRDX, offsetof(CONTEXT, Rsp)); \
+ /* add an extra 8 because RSP is pointing at the return address */ \
+ offset -= 8; \
+ /* add r10, r9 */ \
+ pSl->X86EmitAddRegReg(kR10, kR9); \
+ /* add r9, r8 */ \
+ pSl->X86EmitAddRegReg(kR9, kR8); \
+ fR10Loaded = true; \
+ } \
+ /* sub r10, offset */ \
+ pSl->X86EmitSubReg(kR10, offset); \
+ /* sub r9, cbSize */ \
+ pSl->X86EmitSubReg(kR9, cbSize); \
+ }
+
+
+ if (flags & CORINFO_TAILCALL_STUB_DISPATCH_ARG) {
+ // This is set for stub dispatch
+ // The JIT placed an extra argument in the list that needs to
+ // get shoved into R11, and not counted.
+ // pCtx->R11 = va_arg(args, DWORD64);
+
+ // mov rax, [rcx]
+ pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
+ // add rcx, 8
+ pSl->X86EmitAddReg(kRCX, 8);
+ // mov [rdx + offset of R11], rax
+ pSl->X86EmitIndexRegStore(kRDX, offsetof(CONTEXT, R11), kRAX);
+ }
+
+ ULONG cbStructOffset = 0;
+
+ // First comes the 'this' pointer
+ if (pSig->hasThis()) {
+ // mov rax, [rcx]
+ pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
+ // add rcx, 8
+ pSl->X86EmitAddReg(kRCX, 8);
+ // mov [rdx + offset of RCX/RDX], rax
+ pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
+ }
+
+ // Next the return buffer
+ cbArg = 0;
+ TypeHandle th(pSig->retTypeClass);
+ if ((pSig->retType == CORINFO_TYPE_REFANY) || (pSig->retType == CORINFO_TYPE_VALUECLASS)) {
+ cbArg = th.GetSize();
+ }
+
+ if (ArgIterator::IsArgPassedByRef(cbArg)) {
+ totalArgs++;
+
+ // We always reserve space for the return buffer, and we always zero it out,
+ // so the GC won't complain, but if it's already pointing above the frame,
+ // then we need to pass it in (so it will get passed out).
+ // Otherwise we assume the caller is returning void, so we just pass in
+ // dummy space to be overwritten.
+ UINT cbUsed = (cbArg + 0xF) & ~0xF;
+ LOAD_STRUCT_OFFSET_IF_NEEDED(cbUsed);
+ // now emit a 'memset(r9, 0, cbUsed)'
+ {
+ // xorps xmm0, xmm0
+ pSl->X86EmitR2ROp(X86_INSTR_XORPS, kXMM0, kXMM0);
+ if (cbUsed <= 4 * 16) {
+ // movaps [r9], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0);
+ if (16 < cbUsed) {
+ // movaps [r9 + 16], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 16);
+ if (32 < cbUsed) {
+ // movaps [r9 + 32], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 32);
+ if (48 < cbUsed) {
+ // movaps [r9 + 48], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 48);
+ }
+ }
+ }
+ }
+ else {
+ // a loop (one double-quadword at a time)
+ pSl->X86EmitZeroOutReg(kR11);
+ // LoopLabel:
+ CodeLabel *pLoopLabel = pSl->NewCodeLabel();
+ pSl->EmitLabel(pLoopLabel);
+ // movaps [r9 + r11], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0, kR11, 1);
+ // add r11, 16
+ pSl->X86EmitAddReg(kR11, 16);
+ // cmp r11, cbUsed
+ pSl->X86EmitCmpRegImm32(kR11, cbUsed);
+ // jl LoopLabel
+ pSl->X86EmitCondJump(pLoopLabel, X86CondCode::kJL);
+ }
+ }
+ cbStructOffset += cbUsed;
+ AppendGCLayout(gcLayout, cbStructOffset, pSig->retType == CORINFO_TYPE_REFANY, th);
+
+ // mov rax, [rcx]
+ pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
+ // add rcx, 8
+ pSl->X86EmitAddReg(kRCX, 8);
+ // cmp rax, [rdx + offset of R12]
+ pSl->X86EmitOffsetModRM(0x3B, kRAX, kRDX, offsetof(CONTEXT, R12));
+
+ CodeLabel *pSkipLabel = pSl->NewCodeLabel();
+ // jnb SkipLabel
+ pSl->X86EmitCondJump(pSkipLabel, X86CondCode::kJNB);
+
+ // Also check the lower bound of the stack in case the return buffer is on the GC heap
+ // and the GC heap is below the stack
+ // cmp rax, rsp
+ pSl->X86EmitR2ROp(0x3B, kRAX, (X86Reg)4 /*kRSP*/);
+ // jna SkipLabel
+ pSl->X86EmitCondJump(pSkipLabel, X86CondCode::kJB);
+ // mov rax, r10
+ pSl->X86EmitMovRegReg(kRAX, kR10);
+ // SkipLabel:
+ pSl->EmitLabel(pSkipLabel);
+ // mov [rdx + offset of RCX], rax
+ pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
+ }
+
+ // VarArgs Cookie *or* Generics Instantiation Parameter
+ if (pSig->hasTypeArg() || pSig->isVarArg()) {
+ // mov rax, [rcx]
+ pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
+ // add rcx, 8
+ pSl->X86EmitAddReg(kRCX, 8);
+ // mov [rdx + offset of RCX/RDX], rax
+ pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
+ }
+
+ _ASSERTE(nArgSlot <= 4);
+
+ // Now for *all* the 'real' arguments
+ SigPointer ptr((PCCOR_SIGNATURE)pSig->args);
+ Module * module = GetModule(pSig->scope);
+ Instantiation classInst((TypeHandle*)pSig->sigInst.classInst, pSig->sigInst.classInstCount);
+ Instantiation methodInst((TypeHandle*)pSig->sigInst.methInst, pSig->sigInst.methInstCount);
+ SigTypeContext typeCtxt(classInst, methodInst);
+
+ for( ;nArgSlot < totalArgs; ptr.SkipExactlyOne()) {
+ CorElementType et = ptr.PeekElemTypeNormalized(module, &typeCtxt);
+ if (et == ELEMENT_TYPE_SENTINEL)
+ continue;
+
+ // mov rax, [rcx]
+ pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
+ // add rcx, 8
+ pSl->X86EmitAddReg(kRCX, 8);
+ switch (et) {
+ case ELEMENT_TYPE_INTERNAL:
+ // TODO
+ _ASSERTE(!"Shouldn't see ELEMENT_TYPE_INTERNAL");
+ break;
+ case ELEMENT_TYPE_TYPEDBYREF:
+ case ELEMENT_TYPE_VALUETYPE:
+ th = ptr.GetTypeHandleThrowing(module, &typeCtxt, ClassLoader::LoadTypes, CLASS_LOAD_UNRESTOREDTYPEKEY);
+ _ASSERTE(!th.IsNull());
+ g_IBCLogger.LogEEClassAndMethodTableAccess(th.GetMethodTable());
+ cbArg = (UINT)th.GetSize();
+ if (ArgIterator::IsArgPassedByRef(cbArg)) {
+ UINT cbUsed = (cbArg + 0xF) & ~0xF;
+ LOAD_STRUCT_OFFSET_IF_NEEDED(cbUsed);
+ // rax has the source pointer
+ // r9 has the intermediate copy location
+ // r10 has the final destination
+ if (nArgSlot < 4) {
+ pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kR10);
+ }
+ else {
+ pSl->X86EmitIndexRegStore(kR8, 8 * nArgSlot++, kR10);
+ }
+ // now emit a 'memcpy(rax, r9, cbUsed)'
+ // These structs are supposed to be 16-byte aligned, but
+ // Reflection puts them on the GC heap, which is only 8-byte
+ // aligned. It also means we have to be careful about not
+ // copying too much (because we might cross a page boundary)
+ UINT cbUsed16 = (cbArg + 7) & ~0xF;
+ _ASSERTE((cbUsed16 == cbUsed) || ((cbUsed16 + 16) == cbUsed));
+
+ if (cbArg <= 192) {
+ // Unrolled version (6 x 16 bytes in parallel)
+ UINT offset = 0;
+ while (offset < cbUsed16) {
+ // movups xmm0, [rax + offset]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM0, kRAX, offset);
+ if (offset + 16 < cbUsed16) {
+ // movups xmm1, [rax + offset + 16]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM1, kRAX, offset + 16);
+ if (offset + 32 < cbUsed16) {
+ // movups xmm2, [rax + offset + 32]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM2, kRAX, offset + 32);
+ if (offset + 48 < cbUsed16) {
+ // movups xmm3, [rax + offset + 48]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM3, kRAX, offset + 48);
+ if (offset + 64 < cbUsed16) {
+ // movups xmm4, [rax + offset + 64]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM4, kRAX, offset + 64);
+ if (offset + 80 < cbUsed16) {
+ // movups xmm5, [rax + offset + 80]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM5, kRAX, offset + 80);
+ }
+ }
+ }
+ }
+ }
+ // movaps [r9 + offset], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, offset);
+ offset += 16;
+ if (offset < cbUsed16) {
+ // movaps [r9 + 16], xmm1
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM1, kR9, offset);
+ offset += 16;
+ if (offset < cbUsed16) {
+ // movaps [r9 + 32], xmm2
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM2, kR9, offset);
+ offset += 16;
+ if (offset < cbUsed16) {
+ // movaps [r9 + 48], xmm3
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM3, kR9, offset);
+ offset += 16;
+ if (offset < cbUsed16) {
+ // movaps [r9 + 64], xmm4
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM4, kR9, offset);
+ offset += 16;
+ if (offset < cbUsed16) {
+ // movaps [r9 + 80], xmm5
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM5, kR9, offset);
+ offset += 16;
+ }
+ }
+ }
+ }
+ }
+ }
+ // Copy the last 8 bytes if needed
+ if (cbUsed > cbUsed16) {
+ _ASSERTE(cbUsed16 < cbArg);
+ // movlps xmm0, [rax + offset]
+ pSl->X86EmitOp(X86_INSTR_MOVLPS_R_RM, kXMM0, kRAX, offset);
+ // movlps [r9 + offset], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVLPS_RM_R, kXMM0, kR9, offset);
+ }
+ }
+ else {
+ // a loop (one double-quadword at a time)
+ pSl->X86EmitZeroOutReg(kR11);
+ // LoopLabel:
+ CodeLabel *pLoopLabel = pSl->NewCodeLabel();
+ pSl->EmitLabel(pLoopLabel);
+ // movups xmm0, [rax + r11]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM0, kRAX, 0, kR11, 1);
+ // movaps [r9 + r11], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0, kR11, 1);
+ // add r11, 16
+ pSl->X86EmitAddReg(kR11, 16);
+ // cmp r11, cbUsed16
+ pSl->X86EmitCmpRegImm32(kR11, cbUsed16);
+ // jl LoopLabel
+ pSl->X86EmitCondJump(pLoopLabel, X86CondCode::kJL);
+ if (cbArg > cbUsed16) {
+ _ASSERTE(cbUsed16 + 8 >= cbArg);
+ // movlps xmm0, [rax + r11]
+ pSl->X86EmitOp(X86_INSTR_MOVLPS_R_RM, kXMM0, kRAX, 0, kR11, 1);
+ // movlps [r9 + r11], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVLPS_RM_R, kXMM0, kR9, 0, kR11, 1);
+ }
+ }
+ cbStructOffset += cbUsed;
+ AppendGCLayout(gcLayout, cbStructOffset, et == ELEMENT_TYPE_TYPEDBYREF, th);
+ break;
+ }
+
+ //
+ // Explicit Fall-Through for non-IsArgPassedByRef
+ //
+
+ default:
+ if (nArgSlot < 4) {
+ pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot], kRAX);
+ if ((et == ELEMENT_TYPE_R4) || (et == ELEMENT_TYPE_R8)) {
+ pSl->X86EmitIndexRegStore(kRDX, rgcbFpArgRegCtxtOffsets[nArgSlot], kRAX);
+ }
+ }
+ else {
+ pSl->X86EmitIndexRegStore(kR8, 8 * nArgSlot, kRAX);
+ }
+ nArgSlot++;
+ break;
+ }
+ }
+
+#undef LOAD_STRUCT_OFFSET_IF_NEEDED
+
+ // Keep our 4 shadow slots and even number of slots (to keep 16-byte aligned)
+ if (nArgSlot < 4)
+ nArgSlot = 4;
+ else if (nArgSlot & 1)
+ nArgSlot++;
+
+ _ASSERTE((cbStructOffset % 16) == 0);
+
+ // xor eax, eax
+ pSl->X86EmitZeroOutReg(kRAX);
+ // ret
+ pSl->X86EmitReturn(0);
+
+ // NullLabel:
+ pSl->EmitLabel(pNullLabel);
+
+ CodeLabel *pGCLayoutLabel = NULL;
+ if (gcLayout.Count() == 0) {
+ // xor eax, eax
+ pSl->X86EmitZeroOutReg(kRAX);
+ }
+ else {
+ // lea rax, [rip + offset to gclayout]
+ pGCLayoutLabel = pSl->NewCodeLabel();
+ pSl->X86EmitLeaRIP(pGCLayoutLabel, kRAX);
+ }
+ // mov [r9], rax
+ pSl->X86EmitIndexRegStore(kR9, 0, kRAX);
+ // mov rax, cbStackNeeded
+ pSl->X86EmitRegLoad(kRAX, cbStructOffset + nArgSlot * 8);
+ // ret
+ pSl->X86EmitReturn(0);
+
+ if (gcLayout.Count() > 0) {
+ // GCLayout:
+ pSl->EmitLabel(pGCLayoutLabel);
+ EncodeGCOffsets(pSl, gcLayout);
+ }
+
+ return pSl->Link();
+}
+#endif // DACCESS_COMPILE
+
+#endif // _TARGET_AMD64_
+
+
+#ifdef HAS_FIXUP_PRECODE
+
+#ifdef HAS_FIXUP_PRECODE_CHUNKS
+TADDR FixupPrecode::GetMethodDesc()
+{
+ LIMITED_METHOD_CONTRACT;
+ SUPPORTS_DAC;
+
+ // This lookup is also manually inlined in PrecodeFixupThunk assembly code
+ TADDR base = *PTR_TADDR(GetBase());
+ if (base == NULL)
+ return NULL;
+ return base + (m_MethodDescChunkIndex * MethodDesc::ALIGNMENT);
+}
+#endif
+
+#ifdef DACCESS_COMPILE
+void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
+{
+ SUPPORTS_DAC;
+ DacEnumMemoryRegion(dac_cast<TADDR>(this), sizeof(FixupPrecode));
+
+ DacEnumMemoryRegion(GetBase(), sizeof(TADDR));
+}
+#endif // DACCESS_COMPILE
+
+#endif // HAS_FIXUP_PRECODE
+
+#ifndef DACCESS_COMPILE
+
+BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD)
+{
+ CONTRACTL
+ {
+ THROWS; // Creating a JumpStub could throw OutOfMemory
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ BYTE* callAddrAdj = (BYTE*)pRel32 + 4;
+ INT32 expectedRel32 = static_cast<INT32>((BYTE*)expected - callAddrAdj);
+
+ INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD);
+
+ _ASSERTE(IS_ALIGNED(pRel32, sizeof(INT32)));
+ return FastInterlockCompareExchange((LONG*)pRel32, (LONG)targetRel32, (LONG)expectedRel32) == (LONG)expectedRel32;
+}
+
+void StubPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator /* = NULL */,
+ BYTE type /* = StubPrecode::Type */, TADDR target /* = NULL */)
+{
+ WRAPPER_NO_CONTRACT;
+
+ IN_WIN64(m_movR10 = X86_INSTR_MOV_R10_IMM64); // mov r10, pMethodDesc
+ IN_WIN32(m_movEAX = X86_INSTR_MOV_EAX_IMM32); // mov eax, pMethodDesc
+ m_pMethodDesc = (TADDR)pMD;
+ IN_WIN32(m_mov_rm_r = X86_INSTR_MOV_RM_R); // mov reg,reg
+ m_type = type;
+ m_jmp = X86_INSTR_JMP_REL32; // jmp rel32
+
+ if (pLoaderAllocator != NULL)
+ {
+ // Use pMD == NULL in all precode initialization methods to allocate the initial jump stub in non-dynamic heap
+ // that has the same lifetime like as the precode itself
+ if (target == NULL)
+ target = GetPreStubEntryPoint();
+ m_rel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, pLoaderAllocator);
+ }
+}
+
+#ifdef HAS_NDIRECT_IMPORT_PRECODE
+
+void NDirectImportPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
+{
+ WRAPPER_NO_CONTRACT;
+ StubPrecode::Init(pMD, pLoaderAllocator, NDirectImportPrecode::Type, GetEEFuncEntryPoint(NDirectImportThunk));
+}
+
+#endif // HAS_NDIRECT_IMPORT_PRECODE
+
+
+#ifdef HAS_REMOTING_PRECODE
+
+void RemotingPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator /* = NULL */)
+{
+ WRAPPER_NO_CONTRACT;
+
+ IN_WIN64(m_movR10 = X86_INSTR_MOV_R10_IMM64); // mov r10, pMethodDesc
+ IN_WIN32(m_movEAX = X86_INSTR_MOV_EAX_IMM32); // mov eax, pMethodDesc
+ m_pMethodDesc = (TADDR)pMD;
+ m_type = PRECODE_REMOTING; // nop
+ m_call = X86_INSTR_CALL_REL32;
+ m_jmp = X86_INSTR_JMP_REL32; // jmp rel32
+
+ if (pLoaderAllocator != NULL)
+ {
+ m_callRel32 = rel32UsingJumpStub(&m_callRel32,
+ GetEEFuncEntryPoint(PrecodeRemotingThunk), NULL /* pMD */, pLoaderAllocator);
+ m_rel32 = rel32UsingJumpStub(&m_rel32,
+ GetPreStubEntryPoint(), NULL /* pMD */, pLoaderAllocator);
+ }
+}
+
+#endif // HAS_REMOTING_PRECODE
+
+
+#ifdef HAS_FIXUP_PRECODE
+void FixupPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex /*=0*/, int iPrecodeChunkIndex /*=0*/)
+{
+ WRAPPER_NO_CONTRACT;
+
+ m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
+ m_type = FixupPrecode::TypePrestub;
+
+ // Initialize chunk indices only if they are not initialized yet. This is necessary to make MethodDesc::Reset work.
+ if (m_PrecodeChunkIndex == 0)
+ {
+ _ASSERTE(FitsInU1(iPrecodeChunkIndex));
+ m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
+ }
+
+ if (iMethodDescChunkIndex != -1)
+ {
+ if (m_MethodDescChunkIndex == 0)
+ {
+ _ASSERTE(FitsInU1(iMethodDescChunkIndex));
+ m_MethodDescChunkIndex = static_cast<BYTE>(iMethodDescChunkIndex);
+ }
+
+ if (*(void**)GetBase() == NULL)
+ *(void**)GetBase() = (BYTE*)pMD - (iMethodDescChunkIndex * MethodDesc::ALIGNMENT);
+ }
+
+ _ASSERTE(GetMethodDesc() == (TADDR)pMD);
+
+ if (pLoaderAllocator != NULL)
+ {
+ m_rel32 = rel32UsingJumpStub(&m_rel32,
+ GetEEFuncEntryPoint(PrecodeFixupThunk), NULL /* pMD */, pLoaderAllocator);
+ }
+}
+
+BOOL FixupPrecode::SetTargetInterlocked(TADDR target, TADDR expected)
+{
+ CONTRACTL
+ {
+ THROWS; // Creating a JumpStub could throw OutOfMemory
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ INT64 oldValue = *(INT64*)this;
+ BYTE* pOldValue = (BYTE*)&oldValue;
+
+ if (pOldValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] != FixupPrecode::TypePrestub)
+ return FALSE;
+
+ MethodDesc * pMD = (MethodDesc*)GetMethodDesc();
+ g_IBCLogger.LogMethodPrecodeWriteAccess(pMD);
+
+ INT64 newValue = oldValue;
+ BYTE* pNewValue = (BYTE*)&newValue;
+
+ pNewValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] = FixupPrecode::Type;
+
+ pOldValue[offsetof(FixupPrecode,m_op)] = X86_INSTR_CALL_REL32;
+ pNewValue[offsetof(FixupPrecode,m_op)] = X86_INSTR_JMP_REL32;
+
+ *(INT32*)(&pNewValue[offsetof(FixupPrecode,m_rel32)]) = rel32UsingJumpStub(&m_rel32, target, pMD);
+
+ _ASSERTE(IS_ALIGNED(this, sizeof(INT64)));
+ EnsureWritableExecutablePages(this, sizeof(INT64));
+ return FastInterlockCompareExchangeLong((INT64*) this, newValue, oldValue) == oldValue;
+}
+
+#ifdef FEATURE_NATIVE_IMAGE_GENERATION
+// Partial initialization. Used to save regrouped chunks.
+void FixupPrecode::InitForSave(int iPrecodeChunkIndex)
+{
+ m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
+ m_type = FixupPrecode::TypePrestub;
+
+ _ASSERTE(FitsInU1(iPrecodeChunkIndex));
+ m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
+
+ // The rest is initialized in code:FixupPrecode::Fixup
+}
+
+void FixupPrecode::Fixup(DataImage *image, MethodDesc * pMD)
+{
+ STANDARD_VM_CONTRACT;
+
+ // Note that GetMethodDesc() does not return the correct value because of
+ // regrouping of MethodDescs into hot and cold blocks. That's why the caller
+ // has to supply the actual MethodDesc
+
+ SSIZE_T mdChunkOffset;
+ ZapNode * pMDChunkNode = image->GetNodeForStructure(pMD, &mdChunkOffset);
+ ZapNode * pHelperThunk = image->GetHelperThunk(CORINFO_HELP_EE_PRECODE_FIXUP);
+
+ image->FixupFieldToNode(this, offsetof(FixupPrecode, m_rel32),
+ pHelperThunk, 0, IMAGE_REL_BASED_REL32);
+
+ // Set the actual chunk index
+ FixupPrecode * pNewPrecode = (FixupPrecode *)image->GetImagePointer(this);
+
+ size_t mdOffset = mdChunkOffset - sizeof(MethodDescChunk);
+ size_t chunkIndex = mdOffset / MethodDesc::ALIGNMENT;
+ _ASSERTE(FitsInU1(chunkIndex));
+ pNewPrecode->m_MethodDescChunkIndex = (BYTE) chunkIndex;
+
+ // Fixup the base of MethodDescChunk
+ if (m_PrecodeChunkIndex == 0)
+ {
+ image->FixupFieldToNode(this, (BYTE *)GetBase() - (BYTE *)this,
+ pMDChunkNode, sizeof(MethodDescChunk));
+ }
+}
+#endif // FEATURE_NATIVE_IMAGE_GENERATION
+
+#endif // HAS_FIXUP_PRECODE
+
+#endif // !DACCESS_COMPILE
+
+
+#ifdef HAS_THISPTR_RETBUF_PRECODE
+
+// rel32 jmp target that points back to the jump (infinite loop).
+// Used to mark uninitialized ThisPtrRetBufPrecode target
+#define REL32_JMP_SELF (-5)
+
+#ifndef DACCESS_COMPILE
+void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
+{
+ WRAPPER_NO_CONTRACT;
+
+ IN_WIN64(m_nop1 = X86_INSTR_NOP;) // nop
+#ifdef UNIX_AMD64_ABI
+ m_prefix1 = 0x48;
+ m_movScratchArg0 = 0xC78B; // mov rax,rdi
+ m_prefix2 = 0x48;
+ m_movArg0Arg1 = 0xFE8B; // mov rdi,rsi
+ m_prefix3 = 0x48;
+ m_movArg1Scratch = 0xF08B; // mov rsi,rax
+#else
+ IN_WIN64(m_prefix1 = 0x48;)
+ m_movScratchArg0 = 0xC889; // mov r/eax,r/ecx
+ IN_WIN64(m_prefix2 = 0x48;)
+ m_movArg0Arg1 = 0xD189; // mov r/ecx,r/edx
+ IN_WIN64(m_prefix3 = 0x48;)
+ m_movArg1Scratch = 0xC289; // mov r/edx,r/eax
+#endif
+ m_nop2 = X86_INSTR_NOP; // nop
+ m_jmp = X86_INSTR_JMP_REL32; // jmp rel32
+ m_pMethodDesc = (TADDR)pMD;
+
+ // This precode is never patched lazily - avoid unnecessary jump stub allocation
+ m_rel32 = REL32_JMP_SELF;
+}
+
+BOOL ThisPtrRetBufPrecode::SetTargetInterlocked(TADDR target, TADDR expected)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ // This precode is never patched lazily - the interlocked semantics is not required.
+ _ASSERTE(m_rel32 == REL32_JMP_SELF);
+
+ // Use pMD == NULL to allocate the jump stub in non-dynamic heap that has the same lifetime as the precode itself
+ m_rel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, ((MethodDesc *)GetMethodDesc())->GetLoaderAllocatorForCode());
+
+ return TRUE;
+}
+#endif // !DACCESS_COMPILE
+
+PCODE ThisPtrRetBufPrecode::GetTarget()
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ // This precode is never patched lazily - pretend that the uninitialized m_rel32 points to prestub
+ if (m_rel32 == REL32_JMP_SELF)
+ return GetPreStubEntryPoint();
+
+ return rel32Decode(PTR_HOST_MEMBER_TADDR(ThisPtrRetBufPrecode, this, m_rel32));
+}
+
+#endif // HAS_THISPTR_RETBUF_PRECODE
diff --git a/src/vm/i386/stublinkerx86.h b/src/vm/i386/stublinkerx86.h
new file mode 100644
index 0000000000..237fc794d4
--- /dev/null
+++ b/src/vm/i386/stublinkerx86.h
@@ -0,0 +1,781 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef STUBLINKERX86_H_
+#define STUBLINKERX86_H_
+
+#include "stublink.h"
+
+struct ArrayOpScript;
+class MetaSig;
+
+//=======================================================================
+
+#define X86_INSTR_CALL_REL32 0xE8 // call rel32
+#define X86_INSTR_CALL_IND 0x15FF // call dword ptr[addr32]
+#define X86_INSTR_CALL_IND_EAX 0x10FF // call dword ptr[eax]
+#define X86_INSTR_CALL_IND_EAX_OFFSET 0x50FF // call dword ptr[eax + offset] ; where offset follows these 2 bytes
+#define X86_INSTR_CALL_EAX 0xD0FF // call eax
+#define X86_INSTR_JMP_REL32 0xE9 // jmp rel32
+#define X86_INSTR_JMP_IND 0x25FF // jmp dword ptr[addr32]
+#define X86_INSTR_JMP_EAX 0xE0FF // jmp eax
+#define X86_INSTR_MOV_EAX_IMM32 0xB8 // mov eax, imm32
+#define X86_INSTR_MOV_EAX_ECX_IND 0x018b // mov eax, [ecx]
+#define X86_INSTR_CMP_IND_ECX_IMM32 0x3981 // cmp [ecx], imm32
+#define X86_INSTR_MOV_RM_R 0x89 // mov r/m,reg
+
+#define X86_INSTR_MOV_AL 0xB0 // mov al, imm8
+#define X86_INSTR_JMP_REL8 0xEB // jmp short rel8
+
+#define X86_INSTR_NOP 0x90 // nop
+#define X86_INSTR_NOP3_1 0x9090 // 1st word of 3-byte nop
+#define X86_INSTR_NOP3_3 0x90 // 3rd byte of 3-byte nop
+#define X86_INSTR_INT3 0xCC // int 3
+#define X86_INSTR_HLT 0xF4 // hlt
+
+#define X86_INSTR_MOVAPS_R_RM 0x280F // movaps xmm1, xmm2/mem128
+#define X86_INSTR_MOVAPS_RM_R 0x290F // movaps xmm1/mem128, xmm2
+#define X86_INSTR_MOVLPS_R_RM 0x120F // movlps xmm1, xmm2/mem128
+#define X86_INSTR_MOVLPS_RM_R 0x130F // movlps xmm1/mem128, xmm2
+#define X86_INSTR_MOVUPS_R_RM 0x100F // movups xmm1, xmm2/mem128
+#define X86_INSTR_MOVUPS_RM_R 0x110F // movups xmm1/mem128, xmm2
+#define X86_INSTR_XORPS 0x570F // xorps xmm1, xmm2/mem128
+
+#ifdef _TARGET_AMD64_
+#define X86_INSTR_MOV_R10_IMM64 0xBA49 // mov r10, imm64
+#endif
+
+//----------------------------------------------------------------------
+// Encodes X86 registers. The numbers are chosen to match Intel's opcode
+// encoding.
+//----------------------------------------------------------------------
+enum X86Reg
+{
+ kEAX = 0,
+ kECX = 1,
+ kEDX = 2,
+ kEBX = 3,
+ // kESP intentionally omitted because of its irregular treatment in MOD/RM
+ kEBP = 5,
+ kESI = 6,
+ kEDI = 7,
+
+#ifdef _TARGET_X86_
+ NumX86Regs = 8,
+#endif // _TARGET_X86_
+
+ kXMM0 = 0,
+ kXMM1 = 1,
+ kXMM2 = 2,
+ kXMM3 = 3,
+ kXMM4 = 4,
+ kXMM5 = 5,
+#if defined(_TARGET_AMD64_)
+ kXMM6 = 6,
+ kXMM7 = 7,
+ kXMM8 = 8,
+ kXMM9 = 9,
+ kXMM10 = 10,
+ kXMM11 = 11,
+ kXMM12 = 12,
+ kXMM13 = 13,
+ kXMM14 = 14,
+ kXMM15 = 15,
+ // Integer registers commence here
+ kRAX = 0,
+ kRCX = 1,
+ kRDX = 2,
+ kRBX = 3,
+ // kRSP intentionally omitted because of its irregular treatment in MOD/RM
+ kRBP = 5,
+ kRSI = 6,
+ kRDI = 7,
+ kR8 = 8,
+ kR9 = 9,
+ kR10 = 10,
+ kR11 = 11,
+ kR12 = 12,
+ kR13 = 13,
+ kR14 = 14,
+ kR15 = 15,
+ NumX86Regs = 16,
+
+#endif // _TARGET_AMD64_
+
+ // We use "push ecx" instead of "sub esp, sizeof(LPVOID)"
+ kDummyPushReg = kECX
+};
+
+
+// Use this only if you are absolutely sure that the instruction format
+// handles it. This is not declared as X86Reg so that users are forced
+// to add a cast and think about what exactly they are doing.
+const int kESP_Unsafe = 4;
+
+//----------------------------------------------------------------------
+// Encodes X86 conditional jumps. The numbers are chosen to match
+// Intel's opcode encoding.
+//----------------------------------------------------------------------
+class X86CondCode {
+ public:
+ enum cc {
+ kJA = 0x7,
+ kJAE = 0x3,
+ kJB = 0x2,
+ kJBE = 0x6,
+ kJC = 0x2,
+ kJE = 0x4,
+ kJZ = 0x4,
+ kJG = 0xf,
+ kJGE = 0xd,
+ kJL = 0xc,
+ kJLE = 0xe,
+ kJNA = 0x6,
+ kJNAE = 0x2,
+ kJNB = 0x3,
+ kJNBE = 0x7,
+ kJNC = 0x3,
+ kJNE = 0x5,
+ kJNG = 0xe,
+ kJNGE = 0xc,
+ kJNL = 0xd,
+ kJNLE = 0xf,
+ kJNO = 0x1,
+ kJNP = 0xb,
+ kJNS = 0x9,
+ kJNZ = 0x5,
+ kJO = 0x0,
+ kJP = 0xa,
+ kJPE = 0xa,
+ kJPO = 0xb,
+ kJS = 0x8,
+ };
+};
+
+//----------------------------------------------------------------------
+// StubLinker with extensions for generating X86 code.
+//----------------------------------------------------------------------
+class StubLinkerCPU : public StubLinker
+{
+ public:
+
+#ifdef _TARGET_AMD64_
+ enum X86OperandSize
+ {
+ k32BitOp,
+ k64BitOp,
+ };
+#endif
+
+ VOID X86EmitAddReg(X86Reg reg, INT32 imm32);
+ VOID X86EmitAddRegReg(X86Reg destreg, X86Reg srcReg);
+ VOID X86EmitSubReg(X86Reg reg, INT32 imm32);
+ VOID X86EmitSubRegReg(X86Reg destreg, X86Reg srcReg);
+
+ VOID X86EmitMovRegReg(X86Reg destReg, X86Reg srcReg);
+ VOID X86EmitMovSPReg(X86Reg srcReg);
+ VOID X86EmitMovRegSP(X86Reg destReg);
+
+ VOID X86EmitPushReg(X86Reg reg);
+ VOID X86EmitPopReg(X86Reg reg);
+ VOID X86EmitPushRegs(unsigned regSet);
+ VOID X86EmitPopRegs(unsigned regSet);
+ VOID X86EmitPushImm32(UINT value);
+ VOID X86EmitPushImm32(CodeLabel &pTarget);
+ VOID X86EmitPushImm8(BYTE value);
+ VOID X86EmitPushImmPtr(LPVOID value WIN64_ARG(X86Reg tmpReg = kR10));
+
+ VOID X86EmitCmpRegImm32(X86Reg reg, INT32 imm32); // cmp reg, imm32
+ VOID X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32); // cmp [reg+offs], imm32
+#ifdef _TARGET_AMD64_
+ VOID X64EmitCmp32RegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32); // cmp dword ptr [reg+offs], imm32
+
+ VOID X64EmitMovXmmXmm(X86Reg destXmmreg, X86Reg srcXmmReg);
+ VOID X64EmitMovdqaFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0);
+ VOID X64EmitMovdqaToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0);
+ VOID X64EmitMovSDFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0);
+ VOID X64EmitMovSDToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0);
+ VOID X64EmitMovSSFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0);
+ VOID X64EmitMovSSToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0);
+
+ VOID X64EmitMovXmmWorker(BYTE prefix, BYTE opcode, X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0);
+#endif
+
+ VOID X86EmitZeroOutReg(X86Reg reg);
+ VOID X86EmitJumpReg(X86Reg reg);
+
+ VOID X86EmitOffsetModRM(BYTE opcode, X86Reg altreg, X86Reg indexreg, __int32 ofs);
+ VOID X86EmitOffsetModRmSIB(BYTE opcode, X86Reg opcodeOrReg, X86Reg baseReg, X86Reg indexReg, __int32 scale, __int32 ofs);
+
+ VOID X86EmitTailcallWithESPAdjust(CodeLabel *pTarget, INT32 imm32);
+ VOID X86EmitTailcallWithSinglePop(CodeLabel *pTarget, X86Reg reg);
+
+ VOID X86EmitNearJump(CodeLabel *pTarget);
+ VOID X86EmitCondJump(CodeLabel *pTarget, X86CondCode::cc condcode);
+ VOID X86EmitCall(CodeLabel *target, int iArgBytes);
+ VOID X86EmitReturn(WORD wArgBytes);
+#ifdef _TARGET_AMD64_
+ VOID X86EmitLeaRIP(CodeLabel *target, X86Reg reg);
+#endif
+
+ static const unsigned X86TLSFetch_TRASHABLE_REGS = (1<<kEAX) | (1<<kEDX) | (1<<kECX);
+ VOID X86EmitTLSFetch(DWORD idx, X86Reg dstreg, unsigned preservedRegSet);
+
+ VOID X86EmitCurrentThreadFetch(X86Reg dstreg, unsigned preservedRegSet);
+ VOID X86EmitCurrentAppDomainFetch(X86Reg dstreg, unsigned preservedRegSet);
+
+ VOID X86EmitIndexRegLoad(X86Reg dstreg, X86Reg srcreg, __int32 ofs = 0);
+ VOID X86EmitIndexRegStore(X86Reg dstreg, __int32 ofs, X86Reg srcreg);
+#if defined(_TARGET_AMD64_)
+ VOID X86EmitIndexRegStoreRSP(__int32 ofs, X86Reg srcreg);
+ VOID X86EmitIndexRegStoreR12(__int32 ofs, X86Reg srcreg);
+#endif // defined(_TARGET_AMD64_)
+
+ VOID X86EmitIndexPush(X86Reg srcreg, __int32 ofs);
+ VOID X86EmitBaseIndexPush(X86Reg baseReg, X86Reg indexReg, __int32 scale, __int32 ofs);
+ VOID X86EmitIndexPop(X86Reg srcreg, __int32 ofs);
+ VOID X86EmitIndexLea(X86Reg dstreg, X86Reg srcreg, __int32 ofs);
+#if defined(_TARGET_AMD64_)
+ VOID X86EmitIndexLeaRSP(X86Reg dstreg, X86Reg srcreg, __int32 ofs);
+#endif // defined(_TARGET_AMD64_)
+
+ VOID X86EmitSPIndexPush(__int32 ofs);
+ VOID X86EmitSubEsp(INT32 imm32);
+ VOID X86EmitAddEsp(INT32 imm32);
+ VOID X86EmitEspOffset(BYTE opcode,
+ X86Reg altreg,
+ __int32 ofs
+ AMD64_ARG(X86OperandSize OperandSize = k64BitOp)
+ );
+ VOID X86EmitPushEBPframe();
+
+ // These are used to emit calls to notify the profiler of transitions in and out of
+ // managed code through COM->COM+ interop or N/Direct
+ VOID EmitProfilerComCallProlog(TADDR pFrameVptr, X86Reg regFrame);
+ VOID EmitProfilerComCallEpilog(TADDR pFrameVptr, X86Reg regFrame);
+
+
+
+ // Emits the most efficient form of the operation:
+ //
+ // opcode altreg, [basereg + scaledreg*scale + ofs]
+ //
+ // or
+ //
+ // opcode [basereg + scaledreg*scale + ofs], altreg
+ //
+ // (the opcode determines which comes first.)
+ //
+ //
+ // Limitations:
+ //
+ // scale must be 0,1,2,4 or 8.
+ // if scale == 0, scaledreg is ignored.
+ // basereg and altreg may be equal to 4 (ESP) but scaledreg cannot
+ // for some opcodes, "altreg" may actually select an operation
+ // rather than a second register argument.
+ //
+
+ VOID X86EmitOp(WORD opcode,
+ X86Reg altreg,
+ X86Reg basereg,
+ __int32 ofs = 0,
+ X86Reg scaledreg = (X86Reg)0,
+ BYTE scale = 0
+ AMD64_ARG(X86OperandSize OperandSize = k32BitOp)
+ );
+
+#ifdef _TARGET_AMD64_
+ FORCEINLINE
+ VOID X86EmitOp(WORD opcode,
+ X86Reg altreg,
+ X86Reg basereg,
+ __int32 ofs,
+ X86OperandSize OperandSize
+ )
+ {
+ X86EmitOp(opcode, altreg, basereg, ofs, (X86Reg)0, 0, OperandSize);
+ }
+#endif // _TARGET_AMD64_
+
+ // Emits
+ //
+ // opcode altreg, modrmreg
+ //
+ // or
+ //
+ // opcode modrmreg, altreg
+ //
+ // (the opcode determines which one comes first)
+ //
+ // For single-operand opcodes, "altreg" actually selects
+ // an operation rather than a register.
+
+ VOID X86EmitR2ROp(WORD opcode,
+ X86Reg altreg,
+ X86Reg modrmreg
+ AMD64_ARG(X86OperandSize OperandSize = k64BitOp)
+ );
+
+ VOID X86EmitRegLoad(X86Reg reg, UINT_PTR imm);
+
+ VOID X86EmitRegSave(X86Reg altreg, __int32 ofs)
+ {
+ LIMITED_METHOD_CONTRACT;
+ X86EmitEspOffset(0x89, altreg, ofs);
+ // X86Reg values never are outside a byte.
+ UnwindSavedReg(static_cast<UCHAR>(altreg), ofs);
+ }
+
+ VOID X86_64BitOperands ()
+ {
+ WRAPPER_NO_CONTRACT;
+#ifdef _TARGET_AMD64_
+ Emit8(0x48);
+#endif
+ }
+
+ VOID EmitEnable(CodeLabel *pForwardRef);
+ VOID EmitRareEnable(CodeLabel *pRejoinPoint);
+
+ VOID EmitDisable(CodeLabel *pForwardRef, BOOL fCallIn, X86Reg ThreadReg);
+ VOID EmitRareDisable(CodeLabel *pRejoinPoint);
+ VOID EmitRareDisableHRESULT(CodeLabel *pRejoinPoint, CodeLabel *pExitPoint);
+
+ VOID EmitSetup(CodeLabel *pForwardRef);
+ VOID EmitRareSetup(CodeLabel* pRejoinPoint, BOOL fThrow);
+ VOID EmitCheckGSCookie(X86Reg frameReg, int gsCookieOffset);
+
+#ifdef _TARGET_X86_
+ void EmitComMethodStubProlog(TADDR pFrameVptr, CodeLabel** rgRareLabels,
+ CodeLabel** rgRejoinLabels, BOOL bShouldProfile);
+
+ void EmitComMethodStubEpilog(TADDR pFrameVptr, CodeLabel** rgRareLabels,
+ CodeLabel** rgRejoinLabels, BOOL bShouldProfile);
+#endif
+
+ VOID EmitMethodStubProlog(TADDR pFrameVptr, int transitionBlockOffset);
+ VOID EmitMethodStubEpilog(WORD numArgBytes, int transitionBlockOffset);
+
+ VOID EmitUnboxMethodStub(MethodDesc* pRealMD);
+#if defined(FEATURE_SHARE_GENERIC_CODE)
+ VOID EmitInstantiatingMethodStub(MethodDesc* pSharedMD, void* extra);
+#endif // FEATURE_SHARE_GENERIC_CODE
+
+#if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
+ //========================================================================
+ // shared Epilog for stubs that enter managed code from COM
+ // uses a return thunk within the method desc
+ void EmitSharedComMethodStubEpilog(TADDR pFrameVptr,
+ CodeLabel** rgRareLabels,
+ CodeLabel** rgRejoinLabels,
+ unsigned offsetReturnThunk,
+ BOOL bShouldProfile);
+#endif // FEATURE_COMINTEROP && _TARGET_X86_
+
+ //===========================================================================
+ // Computes hash code for MulticastDelegate.Invoke()
+ static UINT_PTR HashMulticastInvoke(MetaSig* pSig);
+
+ //===========================================================================
+ // Emits code for Delegate.Invoke() any delegate type
+ VOID EmitDelegateInvoke();
+
+ //===========================================================================
+ // Emits code for MulticastDelegate.Invoke() - sig specific
+ VOID EmitMulticastInvoke(UINT_PTR hash);
+
+ //===========================================================================
+ // Emits code for Delegate.Invoke() on delegates that recorded creator assembly
+ VOID EmitSecureDelegateInvoke(UINT_PTR hash);
+
+ //===========================================================================
+ // Emits code to adjust for a static delegate target.
+ VOID EmitShuffleThunk(struct ShuffleEntry *pShuffleEntryArray);
+
+
+ //===========================================================================
+ // Emits code to do an array operation.
+ VOID EmitArrayOpStub(const ArrayOpScript*);
+
+ //Worker function to emit throw helpers for array ops.
+ VOID EmitArrayOpStubThrow(unsigned exConst, unsigned cbRetArg);
+
+ //===========================================================================
+ // Emits code to break into debugger
+ VOID EmitDebugBreak();
+
+#if defined(_DEBUG) && (defined(_TARGET_AMD64_) || defined(_TARGET_X86_)) && !defined(FEATURE_PAL)
+ //===========================================================================
+ // Emits code to log JITHelper access
+ void EmitJITHelperLoggingThunk(PCODE pJitHelper, LPVOID helperFuncCount);
+#endif
+
+#ifdef _DEBUG
+ VOID X86EmitDebugTrashReg(X86Reg reg);
+#endif
+
+#if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO) && !defined(CROSSGEN_COMPILE)
+ virtual VOID EmitUnwindInfoCheckWorker (CodeLabel *pCheckLabel);
+ virtual VOID EmitUnwindInfoCheckSubfunction();
+#endif
+
+#ifdef _TARGET_AMD64_
+
+ static Stub * CreateTailCallCopyArgsThunk(CORINFO_SIG_INFO * pSig,
+ CorInfoHelperTailCallSpecialHandling flags);
+
+#endif // _TARGET_AMD64_
+
+ private:
+ VOID X86EmitSubEspWorker(INT32 imm32);
+
+ public:
+ static void Init();
+
+};
+
+inline TADDR rel32Decode(/*PTR_INT32*/ TADDR pRel32)
+{
+ LIMITED_METHOD_CONTRACT;
+ SUPPORTS_DAC;
+ return pRel32 + 4 + *PTR_INT32(pRel32);
+}
+
+BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD);
+
+//------------------------------------------------------------------------
+//
+// Precode definitions
+//
+//------------------------------------------------------------------------
+
+EXTERN_C VOID STDCALL PrecodeFixupThunk();
+
+#ifdef _WIN64
+
+#define OFFSETOF_PRECODE_TYPE 0
+#define OFFSETOF_PRECODE_TYPE_CALL_OR_JMP 5
+#define OFFSETOF_PRECODE_TYPE_MOV_R10 10
+
+#define SIZEOF_PRECODE_BASE 16
+
+#else
+
+EXTERN_C VOID STDCALL PrecodeRemotingThunk();
+
+#define OFFSETOF_PRECODE_TYPE 5
+#define OFFSETOF_PRECODE_TYPE_CALL_OR_JMP 5
+#define OFFSETOF_PRECODE_TYPE_MOV_RM_R 6
+
+#define SIZEOF_PRECODE_BASE 8
+
+#endif // _WIN64
+
+
+#include <pshpack1.h>
+
+// Invalid precode type
+struct InvalidPrecode {
+ // int3
+ static const int Type = 0xCC;
+};
+
+
+// Regular precode
+struct StubPrecode {
+
+#ifdef _WIN64
+ static const BYTE Type = 0x40;
+ // mov r10,pMethodDesc
+ // inc eax
+ // jmp Stub
+#else
+ static const BYTE Type = 0xED;
+ // mov eax,pMethodDesc
+ // mov ebp,ebp
+ // jmp Stub
+#endif // _WIN64
+
+ IN_WIN64(USHORT m_movR10;)
+ IN_WIN32(BYTE m_movEAX;)
+ TADDR m_pMethodDesc;
+ IN_WIN32(BYTE m_mov_rm_r;)
+ BYTE m_type;
+ BYTE m_jmp;
+ INT32 m_rel32;
+
+ void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator = NULL, BYTE type = StubPrecode::Type, TADDR target = NULL);
+
+ TADDR GetMethodDesc()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ return m_pMethodDesc;
+ }
+
+ PCODE GetTarget()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ return rel32Decode(PTR_HOST_MEMBER_TADDR(StubPrecode, this, m_rel32));
+ }
+
+ BOOL SetTargetInterlocked(TADDR target, TADDR expected)
+ {
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ EnsureWritableExecutablePages(&m_rel32);
+ return rel32SetInterlocked(&m_rel32, target, expected, (MethodDesc*)GetMethodDesc());
+ }
+};
+IN_WIN64(static_assert_no_msg(offsetof(StubPrecode, m_movR10) == OFFSETOF_PRECODE_TYPE);)
+IN_WIN64(static_assert_no_msg(offsetof(StubPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_R10);)
+IN_WIN32(static_assert_no_msg(offsetof(StubPrecode, m_mov_rm_r) == OFFSETOF_PRECODE_TYPE);)
+IN_WIN32(static_assert_no_msg(offsetof(StubPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_RM_R);)
+typedef DPTR(StubPrecode) PTR_StubPrecode;
+
+
+#ifdef HAS_NDIRECT_IMPORT_PRECODE
+
+// NDirect import precode
+// (This is fake precode. VTable slot does not point to it.)
+struct NDirectImportPrecode : StubPrecode {
+
+#ifdef _WIN64
+ static const int Type = 0x48;
+ // mov r10,pMethodDesc
+ // dec eax
+ // jmp NDirectImportThunk
+#else
+ static const int Type = 0xC0;
+ // mov eax,pMethodDesc
+ // mov eax,eax
+ // jmp NDirectImportThunk
+#endif // _WIN64
+
+ void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator);
+
+ LPVOID GetEntrypoint()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return this;
+ }
+};
+typedef DPTR(NDirectImportPrecode) PTR_NDirectImportPrecode;
+
+#endif // HAS_NDIRECT_IMPORT_PRECODE
+
+
+#ifdef HAS_REMOTING_PRECODE
+
+// Precode with embedded remoting interceptor
+struct RemotingPrecode {
+
+#ifdef _WIN64
+ static const int Type = XXX; // NYI
+ // mov r10,pMethodDesc
+ // call PrecodeRemotingThunk
+ // jmp Prestub/Stub/NativeCode
+#else
+ static const int Type = 0x90;
+ // mov eax,pMethodDesc
+ // nop
+ // call PrecodeRemotingThunk
+ // jmp Prestub/Stub/NativeCode
+#endif // _WIN64
+
+ IN_WIN64(USHORT m_movR10;)
+ IN_WIN32(BYTE m_movEAX;)
+ TADDR m_pMethodDesc;
+ BYTE m_type;
+ BYTE m_call;
+ INT32 m_callRel32;
+ BYTE m_jmp;
+ INT32 m_rel32;
+
+ void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator = NULL);
+
+ TADDR GetMethodDesc()
+ {
+ LIMITED_METHOD_CONTRACT;
+ SUPPORTS_DAC;
+
+ return m_pMethodDesc;
+ }
+
+ PCODE GetTarget()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ return rel32Decode(PTR_HOST_MEMBER_TADDR(RemotingPrecode, this, m_rel32));
+ }
+
+ BOOL SetTargetInterlocked(TADDR target, TADDR expected)
+ {
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ EnsureWritableExecutablePages(&m_rel32);
+ return rel32SetInterlocked(&m_rel32, target, expected, (MethodDesc*)GetMethodDesc());
+ }
+};
+IN_WIN64(static_assert_no_msg(offsetof(RemotingPrecode, m_movR10) == OFFSETOF_PRECODE_TYPE);)
+IN_WIN64(static_assert_no_msg(offsetof(RemotingPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_R10);)
+IN_WIN32(static_assert_no_msg(offsetof(RemotingPrecode, m_type) == OFFSETOF_PRECODE_TYPE);)
+typedef DPTR(RemotingPrecode) PTR_RemotingPrecode;
+
+#endif // HAS_REMOTING_PRECODE
+
+
+#ifdef HAS_FIXUP_PRECODE
+
+// Fixup precode is used in ngen images when the prestub does just one time fixup.
+// The fixup precode is simple jump once patched. It does not have the two instruction overhead of regular precode.
+struct FixupPrecode {
+
+ static const int TypePrestub = 0x5E;
+ // The entrypoint has to be 8-byte aligned so that the "call PrecodeFixupThunk" can be patched to "jmp NativeCode" atomically.
+ // call PrecodeFixupThunk
+ // db TypePrestub (pop esi)
+ // db MethodDescChunkIndex
+ // db PrecodeChunkIndex
+
+ static const int Type = 0x5F;
+ // After it has been patched to point to native code
+ // jmp NativeCode
+ // db Type (pop edi)
+
+ BYTE m_op;
+ INT32 m_rel32;
+ BYTE m_type;
+ BYTE m_MethodDescChunkIndex;
+ BYTE m_PrecodeChunkIndex;
+#ifdef HAS_FIXUP_PRECODE_CHUNKS
+ // Fixup precode chunk is associated with MethodDescChunk. The layout of the fixup precode chunk is:
+ //
+ // FixupPrecode Entrypoint PrecodeChunkIndex = 2
+ // FixupPrecode Entrypoint PrecodeChunkIndex = 1
+ // FixupPrecode Entrypoint PrecodeChunkIndex = 0
+ // TADDR Base of MethodDescChunk
+#else
+ TADDR m_pMethodDesc;
+#endif
+
+ void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex = 0, int iPrecodeChunkIndex = 0);
+
+#ifdef HAS_FIXUP_PRECODE_CHUNKS
+ TADDR GetBase()
+ {
+ LIMITED_METHOD_CONTRACT;
+ SUPPORTS_DAC;
+
+ return dac_cast<TADDR>(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode);
+ }
+
+ TADDR GetMethodDesc();
+#else // HAS_FIXUP_PRECODE_CHUNKS
+ TADDR GetMethodDesc()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return m_pMethodDesc;
+ }
+#endif // HAS_FIXUP_PRECODE_CHUNKS
+
+ PCODE GetTarget()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ return rel32Decode(PTR_HOST_MEMBER_TADDR(FixupPrecode, this, m_rel32));
+ }
+
+ BOOL SetTargetInterlocked(TADDR target, TADDR expected);
+
+ static BOOL IsFixupPrecodeByASM(TADDR addr)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ return *dac_cast<PTR_BYTE>(addr) == X86_INSTR_JMP_REL32;
+ }
+
+#ifdef FEATURE_PREJIT
+ // Partial initialization. Used to save regrouped chunks.
+ void InitForSave(int iPrecodeChunkIndex);
+
+ void Fixup(DataImage *image, MethodDesc * pMD);
+#endif
+
+#ifdef DACCESS_COMPILE
+ void EnumMemoryRegions(CLRDataEnumMemoryFlags flags);
+#endif
+};
+IN_WIN32(static_assert_no_msg(offsetof(FixupPrecode, m_type) == OFFSETOF_PRECODE_TYPE));
+IN_WIN64(static_assert_no_msg(offsetof(FixupPrecode, m_op) == OFFSETOF_PRECODE_TYPE);)
+IN_WIN64(static_assert_no_msg(offsetof(FixupPrecode, m_type) == OFFSETOF_PRECODE_TYPE_CALL_OR_JMP);)
+
+typedef DPTR(FixupPrecode) PTR_FixupPrecode;
+
+#endif // HAS_FIXUP_PRECODE
+
+#ifdef HAS_THISPTR_RETBUF_PRECODE
+
+// Precode to stuffle this and retbuf for closed delegates over static methods with return buffer
+struct ThisPtrRetBufPrecode {
+
+#ifdef _WIN64
+ static const int Type = 0x90;
+#else
+ static const int Type = 0xC2;
+#endif // _WIN64
+
+ // mov regScratch,regArg0
+ // mov regArg0,regArg1
+ // mov regArg1,regScratch
+ // nop
+ // jmp EntryPoint
+ // dw pMethodDesc
+
+ IN_WIN64(BYTE m_nop1;)
+ IN_WIN64(BYTE m_prefix1;)
+ WORD m_movScratchArg0;
+ IN_WIN64(BYTE m_prefix2;)
+ WORD m_movArg0Arg1;
+ IN_WIN64(BYTE m_prefix3;)
+ WORD m_movArg1Scratch;
+ BYTE m_nop2;
+ BYTE m_jmp;
+ INT32 m_rel32;
+ TADDR m_pMethodDesc;
+
+ void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator);
+
+ TADDR GetMethodDesc()
+ {
+ LIMITED_METHOD_CONTRACT;
+ SUPPORTS_DAC;
+
+ return m_pMethodDesc;
+ }
+
+ PCODE GetTarget();
+
+ BOOL SetTargetInterlocked(TADDR target, TADDR expected);
+};
+IN_WIN32(static_assert_no_msg(offsetof(ThisPtrRetBufPrecode, m_movArg1Scratch) + 1 == OFFSETOF_PRECODE_TYPE);)
+typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode;
+
+#endif // HAS_THISPTR_RETBUF_PRECODE
+
+#include <poppack.h>
+
+#endif // STUBLINKERX86_H_
diff --git a/src/vm/i386/virtualcallstubcpu.hpp b/src/vm/i386/virtualcallstubcpu.hpp
new file mode 100644
index 0000000000..33ce8199b9
--- /dev/null
+++ b/src/vm/i386/virtualcallstubcpu.hpp
@@ -0,0 +1,1077 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// File: virtualcallstubcpu.hpp
+//
+
+
+//
+
+//
+// ============================================================================
+
+#ifndef _VIRTUAL_CALL_STUB_X86_H
+#define _VIRTUAL_CALL_STUB_X86_H
+
+#ifdef DECLARE_DATA
+#include "asmconstants.h"
+#ifdef FEATURE_REMOTING
+#include "remoting.h"
+#endif
+#endif
+
+#include <pshpack1.h> // Since we are placing code, we want byte packing of the structs
+
+#define USES_LOOKUP_STUBS 1
+
+/*********************************************************************************************
+Stubs that contain code are all part of larger structs called Holders. There is a
+Holder for each kind of stub, i.e XXXStub is contained with XXXHolder. Holders are
+essentially an implementation trick that allowed rearranging the code sequences more
+easily while trying out different alternatives, and for dealing with any alignment
+issues in a way that was mostly immune to the actually code sequences. These Holders
+should be revisited when the stub code sequences are fixed, since in many cases they
+add extra space to a stub that is not really needed.
+
+Stubs are placed in cache and hash tables. Since unaligned access of data in memory
+is very slow, the keys used in those tables should be aligned. The things used as keys
+typically also occur in the generated code, e.g. a token as an immediate part of an instruction.
+For now, to avoid alignment computations as different code strategies are tried out, the key
+fields are all in the Holders. Eventually, many of these fields should be dropped, and the instruction
+streams aligned so that the immediate fields fall on aligned boundaries.
+*/
+
+#if USES_LOOKUP_STUBS
+
+struct LookupStub;
+struct LookupHolder;
+
+/*LookupStub**************************************************************************************
+Virtual and interface call sites are initially setup to point at LookupStubs.
+This is because the runtime type of the <this> pointer is not yet known,
+so the target cannot be resolved. Note: if the jit is able to determine the runtime type
+of the <this> pointer, it should be generating a direct call not a virtual or interface call.
+This stub pushes a lookup token onto the stack to identify the sought after method, and then
+jumps into the EE (VirtualCallStubManager::ResolveWorkerStub) to effectuate the lookup and
+transfer of control to the appropriate target method implementation, perhaps patching of the call site
+along the way to point to a more appropriate stub. Hence callsites that point to LookupStubs
+get quickly changed to point to another kind of stub.
+*/
+struct LookupStub
+{
+ inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
+ inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
+ inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
+
+private:
+ friend struct LookupHolder;
+
+ // DispatchStub:: _entryPoint expects:
+ // ecx: object (the "this" pointer)
+ // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
+ BYTE _entryPoint [2]; // 50 push eax ;save siteAddrForRegisterIndirect - this may be an indirect call
+ // 68 push
+ size_t _token; // xx xx xx xx 32-bit constant
+#ifdef STUB_LOGGING
+ BYTE cntr2[2]; // ff 05 inc
+ size_t* c_lookup; // xx xx xx xx [call_lookup_counter]
+#endif //STUB_LOGGING
+ BYTE part2 [1]; // e9 jmp
+ DISPL _resolveWorkerDispl;// xx xx xx xx pc-rel displ
+};
+
+/* LookupHolders are the containers for LookupStubs, they provide for any alignment of
+stubs as necessary. In the case of LookupStubs, alignment is necessary since
+LookupStubs are placed in a hash table keyed by token. */
+struct LookupHolder
+{
+ static void InitializeStatic();
+
+ void Initialize(PCODE resolveWorkerTarget, size_t dispatchToken);
+
+ LookupStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
+
+ static LookupHolder* FromLookupEntry(PCODE lookupEntry);
+
+private:
+ friend struct LookupStub;
+
+ BYTE align[(sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*)))%sizeof(void*)];
+ LookupStub _stub;
+ BYTE pad[sizeof(void*) -
+ ((sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*))) +
+ (sizeof(LookupStub))
+ ) % sizeof(void*)]; //complete DWORD
+
+ static_assert_no_msg((sizeof(void*) -
+ ((sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*))) +
+ (sizeof(LookupStub))
+ ) % sizeof(void*)) != 0);
+};
+
+#endif // USES_LOOKUP_STUBS
+
+struct DispatchStub;
+struct DispatchHolder;
+
+/*DispatchStub**************************************************************************************
+Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs.
+A dispatch stub has an expected type (expectedMT), target address (target) and fail address (failure).
+If the calling frame does in fact have the <this> type be of the expected type, then
+control is transfered to the target address, the method implementation. If not,
+then control is transfered to the fail address, a fail stub (see below) where a polymorphic
+lookup is done to find the correct address to go to.
+
+implementation note: Order, choice of instructions, and branch directions
+should be carefully tuned since it can have an inordinate effect on performance. Particular
+attention needs to be paid to the effects on the BTB and branch prediction, both in the small
+and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
+Note that since this stub is only used for mostly monomorphic callsites (ones that are not, get patched
+to something else), therefore the conditional jump "jne failure" is mostly not taken, and hence it is important
+that the branch prediction staticly predict this, which means it must be a forward jump. The alternative
+is to reverse the order of the jumps and make sure that the resulting conditional jump "je implTarget"
+is statically predicted as taken, i.e a backward jump. The current choice was taken since it was easier
+to control the placement of the stubs than control the placement of the jitted code and the stubs. */
+struct DispatchStub
+{
+ inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
+
+ inline size_t expectedMT() { LIMITED_METHOD_CONTRACT; return _expectedMT; }
+ inline PCODE implTarget() { LIMITED_METHOD_CONTRACT; return (PCODE) &_implDispl + sizeof(DISPL) + _implDispl; }
+ inline PCODE failTarget() { LIMITED_METHOD_CONTRACT; return (PCODE) &_failDispl + sizeof(DISPL) + _failDispl; }
+ inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(DispatchStub); }
+
+private:
+ friend struct DispatchHolder;
+
+ // DispatchStub:: _entryPoint expects:
+ // ecx: object (the "this" pointer)
+ // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
+#ifndef STUB_LOGGING
+ BYTE _entryPoint [2]; // 81 39 cmp [ecx], ; This is the place where we are going to fault on null this.
+ size_t _expectedMT; // xx xx xx xx expectedMT ; If you change it, change also AdjustContextForVirtualStub in excep.cpp!!!
+ BYTE jmpOp1[2]; // 0f 85 jne
+ DISPL _failDispl; // xx xx xx xx failEntry ;must be forward jmp for perf reasons
+ BYTE jmpOp2; // e9 jmp
+ DISPL _implDispl; // xx xx xx xx implTarget
+#else //STUB_LOGGING
+ BYTE _entryPoint [2]; // ff 05 inc
+ size_t* d_call; // xx xx xx xx [call_mono_counter]
+ BYTE cmpOp [2]; // 81 39 cmp [ecx],
+ size_t _expectedMT; // xx xx xx xx expectedMT
+ BYTE jmpOp1[2]; // 0f 84 je
+ DISPL _implDispl; // xx xx xx xx implTarget ;during logging, perf is not so important
+ BYTE fail [2]; // ff 05 inc
+ size_t* d_miss; // xx xx xx xx [miss_mono_counter]
+ BYTE jmpFail; // e9 jmp
+ DISPL _failDispl; // xx xx xx xx failEntry
+#endif //STUB_LOGGING
+};
+
+/* DispatchHolders are the containers for DispatchStubs, they provide for any alignment of
+stubs as necessary. DispatchStubs are placed in a hashtable and in a cache. The keys for both
+are the pair expectedMT and token. Efficiency of the of the hash table is not a big issue,
+since lookups in it are fairly rare. Efficiency of the cache is paramount since it is accessed frequently
+o(see ResolveStub below). Currently we are storing both of these fields in the DispatchHolder to simplify
+alignment issues. If inlineMT in the stub itself was aligned, then it could be the expectedMT field.
+While the token field can be logically gotten by following the failure target to the failEntryPoint
+of the ResolveStub and then to the token over there, for perf reasons of cache access, it is duplicated here.
+This allows us to use DispatchStubs in the cache. The alternative is to provide some other immutable struct
+for the cache composed of the triplet (expectedMT, token, target) and some sort of reclaimation scheme when
+they are thrown out of the cache via overwrites (since concurrency will make the obvious approaches invalid).
+*/
+
+/* @workaround for ee resolution - Since the EE does not currently have a resolver function that
+does what we want, see notes in implementation of VirtualCallStubManager::Resolver, we are
+using dispatch stubs to siumulate what we want. That means that inlineTarget, which should be immutable
+is in fact written. Hence we have moved target out into the holder and aligned it so we can
+atomically update it. When we get a resolver function that does what we want, we can drop this field,
+and live with just the inlineTarget field in the stub itself, since immutability will hold.*/
+struct DispatchHolder
+{
+ static void InitializeStatic();
+
+ void Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT);
+
+ DispatchStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
+
+ static DispatchHolder* FromDispatchEntry(PCODE dispatchEntry);
+
+private:
+ //force expectedMT to be aligned since used as key in hash tables.
+#ifndef STUB_LOGGING
+ BYTE align[(sizeof(void*)-(offsetof(DispatchStub,_expectedMT)%sizeof(void*)))%sizeof(void*)];
+#endif
+ DispatchStub _stub;
+ BYTE pad[(sizeof(void*)-(sizeof(DispatchStub)%sizeof(void*))+offsetof(DispatchStub,_expectedMT))%sizeof(void*)]; //complete DWORD
+};
+
+struct ResolveStub;
+struct ResolveHolder;
+
+/*ResolveStub**************************************************************************************
+Polymorphic call sites and monomorphic calls that fail end up in a ResolverStub. There is only
+one resolver stub built for any given token, even though there may be many call sites that
+use that token and many distinct <this> types that are used in the calling call frames. A resolver stub
+actually has two entry points, one for polymorphic call sites and one for dispatch stubs that fail on their
+expectedMT test. There is a third part of the resolver stub that enters the ee when a decision should
+be made about changing the callsite. Therefore, we have defined the resolver stub as three distinct pieces,
+even though they are actually allocated as a single contiguous block of memory. These pieces are:
+
+A ResolveStub has two entry points:
+
+FailEntry - where the dispatch stub goes if the expected MT test fails. This piece of the stub does
+a check to see how often we are actually failing. If failures are frequent, control transfers to the
+patch piece to cause the call site to be changed from a mostly monomorphic callsite
+(calls dispatch stub) to a polymorphic callsize (calls resolve stub). If failures are rare, control
+transfers to the resolve piece (see ResolveStub). The failEntryPoint decrements a counter
+every time it is entered. The ee at various times will add a large chunk to the counter.
+
+ResolveEntry - does a lookup via in a cache by hashing the actual type of the calling frame s
+<this> and the token identifying the (contract,method) pair desired. If found, control is transfered
+to the method implementation. If not found in the cache, the token is pushed and the ee is entered via
+the ResolveWorkerStub to do a full lookup and eventual transfer to the correct method implementation. Since
+there is a different resolve stub for every token, the token can be inlined and the token can be pre-hashed.
+The effectiveness of this approach is highly sensitive to the effectiveness of the hashing algorithm used,
+as well as its speed. It turns out it is very important to make the hash function sensitive to all
+of the bits of the method table, as method tables are laid out in memory in a very non-random way. Before
+making any changes to the code sequences here, it is very important to measure and tune them as perf
+can vary greatly, in unexpected ways, with seeming minor changes.
+
+Implementation note - Order, choice of instructions, and branch directions
+should be carefully tuned since it can have an inordinate effect on performance. Particular
+attention needs to be paid to the effects on the BTB and branch prediction, both in the small
+and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
+Note that this stub is called in highly polymorphic cases, but the cache should have been sized
+and the hash function chosen to maximize the cache hit case. Hence the cmp/jcc instructions should
+mostly be going down the cache hit route, and it is important that this be statically predicted as so.
+Hence the 3 jcc instrs need to be forward jumps. As structured, there is only one jmp/jcc that typically
+gets put in the BTB since all the others typically fall straight thru. Minimizing potential BTB entries
+is important. */
+
+struct ResolveStub
+{
+ inline PCODE failEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[0]; }
+ inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; }
+ inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; }
+
+ inline INT32* pCounter() { LIMITED_METHOD_CONTRACT; return _pCounter; }
+ inline UINT32 hashedToken() { LIMITED_METHOD_CONTRACT; return _hashedToken >> LOG2_PTRSIZE; }
+ inline size_t cacheAddress() { LIMITED_METHOD_CONTRACT; return _cacheAddress; }
+ inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
+ inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(ResolveStub); }
+
+private:
+ friend struct ResolveHolder;
+
+ // ResolveStub::_failEntryPoint expects:
+ // ecx: object (the "this" pointer)
+ // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
+ BYTE _failEntryPoint [2]; // 83 2d sub
+ INT32* _pCounter; // xx xx xx xx [counter],
+ BYTE part0 [2]; // 01 01
+ // 7c jl
+ BYTE toPatcher; // xx backpatcher ;must be forward jump, for perf reasons
+ // ;fall into the resolver stub
+
+ // ResolveStub::_resolveEntryPoint expects:
+ // ecx: object (the "this" pointer)
+ // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
+ BYTE _resolveEntryPoint[6]; // 50 push eax ;save siteAddrForRegisterIndirect - this may be an indirect call
+ // 8b 01 mov eax,[ecx] ;get the method table from the "this" pointer. This is the place
+ // ; where we are going to fault on null this. If you change it,
+ // ; change also AdjustContextForVirtualStub in excep.cpp!!!
+ // 52 push edx
+ // 8b d0 mov edx, eax
+ BYTE part1 [6]; // c1 e8 0C shr eax,12 ;we are adding upper bits into lower bits of mt
+ // 03 c2 add eax,edx
+ // 35 xor eax,
+ UINT32 _hashedToken; // xx xx xx xx hashedToken ;along with pre-hashed token
+ BYTE part2 [1]; // 25 and eax,
+ size_t mask; // xx xx xx xx cache_mask
+ BYTE part3 [2]; // 8b 80 mov eax, [eax+
+ size_t _cacheAddress; // xx xx xx xx lookupCache]
+#ifdef STUB_LOGGING
+ BYTE cntr1[2]; // ff 05 inc
+ size_t* c_call; // xx xx xx xx [call_cache_counter]
+#endif //STUB_LOGGING
+ BYTE part4 [2]; // 3b 10 cmp edx,[eax+
+ // BYTE mtOffset; // ResolverCacheElem.pMT]
+ BYTE part5 [1]; // 75 jne
+ BYTE toMiss1; // xx miss ;must be forward jump, for perf reasons
+ BYTE part6 [2]; // 81 78 cmp [eax+
+ BYTE tokenOffset; // xx ResolverCacheElem.token],
+ size_t _token; // xx xx xx xx token
+ BYTE part7 [1]; // 75 jne
+ BYTE toMiss2; // xx miss ;must be forward jump, for perf reasons
+ BYTE part8 [2]; // 8B 40 xx mov eax,[eax+
+ BYTE targetOffset; // ResolverCacheElem.target]
+ BYTE part9 [6]; // 5a pop edx
+ // 83 c4 04 add esp,4 ;throw away siteAddrForRegisterIndirect - we don't need it now
+ // ff e0 jmp eax
+ // miss:
+ BYTE miss [1]; // 5a pop edx ; don't pop siteAddrForRegisterIndirect - leave it on the stack for use by ResolveWorkerChainLookupAsmStub and/or ResolveWorkerAsmStub
+ BYTE _slowEntryPoint[1]; // 68 push
+ size_t _tokenPush; // xx xx xx xx token
+#ifdef STUB_LOGGING
+ BYTE cntr2[2]; // ff 05 inc
+ size_t* c_miss; // xx xx xx xx [miss_cache_counter]
+#endif //STUB_LOGGING
+ BYTE part10 [1]; // e9 jmp
+ DISPL _resolveWorkerDispl; // xx xx xx xx resolveWorker == ResolveWorkerChainLookupAsmStub or ResolveWorkerAsmStub
+ BYTE patch[1]; // e8 call
+ DISPL _backpatcherDispl; // xx xx xx xx backpatcherWorker == BackPatchWorkerAsmStub
+ BYTE part11 [1]; // eb jmp
+ BYTE toResolveStub; // xx resolveStub, i.e. go back to _resolveEntryPoint
+};
+
+/* ResolveHolders are the containers for ResolveStubs, They provide
+for any alignment of the stubs as necessary. The stubs are placed in a hash table keyed by
+the token for which they are built. Efficiency of access requires that this token be aligned.
+For now, we have copied that field into the ResolveHolder itself, if the resolve stub is arranged such that
+any of its inlined tokens (non-prehashed) is aligned, then the token field in the ResolveHolder
+is not needed. */
+struct ResolveHolder
+{
+ static void InitializeStatic();
+
+ void Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
+ size_t dispatchToken, UINT32 hashedToken,
+ void * cacheAddr, INT32 * counterAddr);
+
+ ResolveStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
+
+ static ResolveHolder* FromFailEntry(PCODE failEntry);
+ static ResolveHolder* FromResolveEntry(PCODE resolveEntry);
+
+private:
+ //align _token in resolve stub
+
+ BYTE align[(sizeof(void*)-((offsetof(ResolveStub,_token))%sizeof(void*)))%sizeof(void*)
+#ifdef STUB_LOGGING // This turns out to be zero-sized in stub_logging case, and is an error. So round up.
+ +sizeof(void*)
+#endif
+ ];
+
+ ResolveStub _stub;
+
+//#ifdef STUB_LOGGING // This turns out to be zero-sized in non stub_logging case, and is an error. So remove
+ BYTE pad[(sizeof(void*)-((sizeof(ResolveStub))%sizeof(void*))+offsetof(ResolveStub,_token))%sizeof(void*)]; //fill out DWORD
+//#endif
+};
+#include <poppack.h>
+
+
+#ifdef DECLARE_DATA
+
+#ifndef DACCESS_COMPILE
+
+#ifdef _MSC_VER
+
+#ifdef CHAIN_LOOKUP
+/* This will perform a chained lookup of the entry if the initial cache lookup fails
+
+ Entry stack:
+ dispatch token
+ siteAddrForRegisterIndirect (used only if this is a RegisterIndirect dispatch call)
+ return address of caller to stub
+ Also, EAX contains the pointer to the first ResolveCacheElem pointer for the calculated
+ bucket in the cache table.
+*/
+__declspec (naked) void ResolveWorkerChainLookupAsmStub()
+{
+ enum
+ {
+ e_token_size = 4,
+ e_indirect_addr_size = 4,
+ e_caller_ret_addr_size = 4,
+ };
+ enum
+ {
+ // this is the part of the stack that is present as we enter this function:
+ e_token = 0,
+ e_indirect_addr = e_token + e_token_size,
+ e_caller_ret_addr = e_indirect_addr + e_indirect_addr_size,
+ e_ret_esp = e_caller_ret_addr + e_caller_ret_addr_size,
+ };
+ enum
+ {
+ e_spilled_reg_size = 8,
+ };
+
+ // main loop setup
+ __asm {
+#ifdef STUB_LOGGING
+ inc g_chained_lookup_call_counter
+#endif
+ // spill regs
+ push edx
+ push ecx
+ // move the token into edx
+ mov edx,[esp+e_spilled_reg_size+e_token]
+ // move the MT into ecx
+ mov ecx,[ecx]
+ }
+ main_loop:
+ __asm {
+ // get the next entry in the chain (don't bother checking the first entry again)
+ mov eax,[eax+e_resolveCacheElem_offset_next]
+ // test if we hit a terminating NULL
+ test eax,eax
+ jz fail
+ // compare the MT of the ResolveCacheElem
+ cmp ecx,[eax+e_resolveCacheElem_offset_mt]
+ jne main_loop
+ // compare the token of the ResolveCacheElem
+ cmp edx,[eax+e_resolveCacheElem_offset_token]
+ jne main_loop
+ // success
+ // decrement success counter and move entry to start if necessary
+ sub g_dispatch_cache_chain_success_counter,1
+ //@TODO: Perhaps this should be a jl for better branch prediction?
+ jge nopromote
+ // be quick to reset the counter so we don't get a bunch of contending threads
+ add g_dispatch_cache_chain_success_counter,CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT
+ // promote the entry to the beginning of the chain
+ mov ecx,eax
+ call VirtualCallStubManager::PromoteChainEntry
+ }
+ nopromote:
+ __asm {
+ // clean up the stack and jump to the target
+ pop ecx
+ pop edx
+ add esp,(e_caller_ret_addr - e_token)
+ mov eax,[eax+e_resolveCacheElem_offset_target]
+ jmp eax
+ }
+ fail:
+ __asm {
+#ifdef STUB_LOGGING
+ inc g_chained_lookup_miss_counter
+#endif
+ // restore registers
+ pop ecx
+ pop edx
+ jmp ResolveWorkerAsmStub
+ }
+}
+#endif
+
+/* Call the resolver, it will return where we are supposed to go.
+ There is a little stack magic here, in that we are entered with one
+ of the arguments for the resolver (the token) on the stack already.
+ We just push the other arguments, <this> in the call frame and the call site pointer,
+ and call the resolver.
+
+ On return we have the stack frame restored to the way it was when the ResolveStub
+ was called, i.e. as it was at the actual call site. The return value from
+ the resolver is the address we need to transfer control to, simulating a direct
+ call from the original call site. If we get passed back NULL, it means that the
+ resolution failed, an unimpelemented method is being called.
+
+ Entry stack:
+ dispatch token
+ siteAddrForRegisterIndirect (used only if this is a RegisterIndirect dispatch call)
+ return address of caller to stub
+
+ Call stack:
+ pointer to TransitionBlock
+ call site
+ dispatch token
+ TransitionBlock
+ ArgumentRegisters (ecx, edx)
+ CalleeSavedRegisters (ebp, ebx, esi, edi)
+ return address of caller to stub
+ */
+__declspec (naked) void ResolveWorkerAsmStub()
+{
+ CANNOT_HAVE_CONTRACT;
+
+ __asm {
+ //
+ // The stub arguments are where we want to setup the TransitionBlock. We will
+ // setup the TransitionBlock later once we can trash them
+ //
+ // push ebp-frame
+ // push ebp
+ // mov ebp,esp
+
+ // save CalleeSavedRegisters
+ // push ebx
+
+ push esi
+ push edi
+
+ // push ArgumentRegisters
+ push ecx
+ push edx
+
+ mov esi, esp
+
+ push [esi + 4*4] // dispatch token
+ push [esi + 5*4] // siteAddrForRegisterIndirect
+ push esi // pTransitionBlock
+
+ // Setup up proper EBP frame now that the stub arguments can be trashed
+ mov [esi + 4*4],ebx
+ mov [esi + 5*4],ebp
+ lea ebp, [esi + 5*4]
+
+ // Make the call
+ call VSD_ResolveWorker
+
+ // From here on, mustn't trash eax
+
+ // pop ArgumentRegisters
+ pop edx
+ pop ecx
+
+ // pop CalleeSavedRegisters
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+
+ // Now jump to the target
+ jmp eax // continue on into the method
+ }
+}
+
+#ifdef FEATURE_REMOTING
+/* For an in-context dispatch, we will find the target. This
+ is the slow path, and erects a MachState structure for
+ creating a HelperMethodFrame
+
+ Entry stack:
+ dispatch token
+ return address of caller to stub
+
+ Call stack:
+ pointer to StubDispatchFrame
+ call site
+ dispatch token
+ StubDispatchFrame
+ GSCookie
+ negspace
+ vptr
+ datum
+ ArgumentRegisters (ecx, edx)
+ CalleeSavedRegisters (ebp, ebx, esi, edi)
+ return address of caller to stub
+*/
+__declspec (naked) void InContextTPDispatchAsmStub()
+{
+ CANNOT_HAVE_CONTRACT;
+
+ __asm {
+ // Pop dispatch token
+ pop eax
+
+ // push ebp-frame
+ push ebp
+ mov ebp,esp
+
+ // save CalleeSavedRegisters
+ push ebx
+ push esi
+ push edi
+
+ // push ArgumentRegisters
+ push ecx
+ push edx
+
+ mov esi, esp
+
+ push eax // token
+ push esi // pTransitionContext
+
+ // Make the call
+ call VSD_GetTargetForTPWorker
+
+ // From here on, mustn't trash eax
+
+ // pop ArgumentRegisters
+ pop edx
+ pop ecx
+
+ // pop CalleeSavedRegisters
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+
+ // Now jump to the target
+ jmp eax // continue on into the method
+ }
+}
+
+/* For an in-context dispatch, we will try to find the target in
+ the resolve cache. If this fails, we will jump to the full
+ version of InContextTPDispatchAsmStub
+
+ Entry stack:
+ dispatch slot number of interface MD
+ caller return address
+ ECX: this object
+*/
+__declspec (naked) void InContextTPQuickDispatchAsmStub()
+{
+ CANNOT_HAVE_CONTRACT;
+
+ __asm {
+ // Spill registers
+ push ecx
+ push edx
+
+ // Arg 2 - token
+ mov eax, [esp + 8]
+ push eax
+
+ // Arg 1 - this
+ push ecx
+
+ // Make the call
+ call VSD_GetTargetForTPWorkerQuick
+
+ // Restore registers
+ pop edx
+ pop ecx
+
+ // Test to see if we found a target
+ test eax, eax
+ jnz TargetFound
+
+ // If no target, jump to the slow worker
+ jmp InContextTPDispatchAsmStub
+
+ TargetFound:
+ // We got a target, so pop off the token and jump to it
+ add esp,4
+ jmp eax
+ }
+}
+#endif // FEATURE_REMOTING
+
+/* Call the callsite back patcher. The fail stub piece of the resolver is being
+call too often, i.e. dispatch stubs are failing the expect MT test too often.
+In this stub wraps the call to the BackPatchWorker to take care of any stack magic
+needed.
+*/
+__declspec (naked) void BackPatchWorkerAsmStub()
+{
+ CANNOT_HAVE_CONTRACT;
+
+ __asm {
+ push EBP
+ mov ebp,esp
+ push EAX // it may contain siteAddrForRegisterIndirect
+ push ECX
+ push EDX
+ push EAX // push any indirect call address as the second arg to BackPatchWorker
+ push [EBP+8] // and push return address as the first arg to BackPatchWorker
+ call VirtualCallStubManager::BackPatchWorkerStatic
+ pop EDX
+ pop ECX
+ pop EAX
+ mov esp,ebp
+ pop ebp
+ ret
+ }
+}
+
+#endif // _MSC_VER
+
+#ifdef _DEBUG
+//
+// This function verifies that a pointer to an indirection cell lives inside a delegate object.
+// In the delegate case the indirection cell is held by the delegate itself in _methodPtrAux, when the delegate Invoke is
+// called the shuffle thunk is first invoked and that will call into the virtual dispatch stub.
+// Before control is given to the virtual dispatch stub a pointer to the indirection cell (thus an interior pointer to the delegate)
+// is pushed in EAX
+//
+BOOL isDelegateCall(BYTE *interiorPtr)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ if (GCHeap::GetGCHeap()->IsHeapPointer((void*)interiorPtr))
+ {
+ Object *delegate = (Object*)(interiorPtr - DelegateObject::GetOffsetOfMethodPtrAux());
+ VALIDATEOBJECTREF(ObjectToOBJECTREF(delegate));
+ _ASSERTE(delegate->GetMethodTable()->IsDelegate());
+
+ return TRUE;
+ }
+ return FALSE;
+}
+#endif
+
+StubCallSite::StubCallSite(TADDR siteAddrForRegisterIndirect, PCODE returnAddr)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ // Not used
+ // if (isCallRelative(returnAddr))
+ // {
+ // m_siteAddr = returnAddr - sizeof(DISPL);
+ // }
+ // else
+ if (isCallRelativeIndirect((BYTE *)returnAddr))
+ {
+ m_siteAddr = *dac_cast<PTR_PTR_PCODE>(returnAddr - sizeof(PCODE));
+ }
+ else
+ {
+ _ASSERTE(isCallRegisterIndirect((BYTE *)returnAddr) || isDelegateCall((BYTE *)siteAddrForRegisterIndirect));
+ m_siteAddr = dac_cast<PTR_PCODE>(siteAddrForRegisterIndirect);
+ }
+}
+
+// the special return address for VSD tailcalls
+extern "C" void STDCALL JIT_TailCallReturnFromVSD();
+
+PCODE StubCallSite::GetCallerAddress()
+{
+ LIMITED_METHOD_CONTRACT;
+ if (m_returnAddr != (PCODE)JIT_TailCallReturnFromVSD)
+ return m_returnAddr;
+
+ // Find the tailcallframe in the frame chain and get the actual caller from the first TailCallFrame
+ return TailCallFrame::FindTailCallFrame(GetThread()->GetFrame())->GetCallerAddress();
+}
+
+#ifdef STUB_LOGGING
+extern size_t g_lookup_inline_counter;
+extern size_t g_mono_call_counter;
+extern size_t g_mono_miss_counter;
+extern size_t g_poly_call_counter;
+extern size_t g_poly_miss_counter;
+#endif
+
+/* Template used to generate the stub. We generate a stub by allocating a block of
+ memory and copy the template over it and just update the specific fields that need
+ to be changed.
+*/
+LookupStub lookupInit;
+
+void LookupHolder::InitializeStatic()
+{
+ static_assert_no_msg(((offsetof(LookupStub, _token)+offsetof(LookupHolder, _stub)) % sizeof(void*)) == 0);
+ static_assert_no_msg((sizeof(LookupHolder) % sizeof(void*)) == 0);
+
+ lookupInit._entryPoint [0] = 0x50;
+ lookupInit._entryPoint [1] = 0x68;
+ static_assert_no_msg(sizeof(lookupInit._entryPoint) == 2);
+ lookupInit._token = 0xcccccccc;
+#ifdef STUB_LOGGING
+ lookupInit.cntr2 [0] = 0xff;
+ lookupInit.cntr2 [1] = 0x05;
+ static_assert_no_msg(sizeof(lookupInit.cntr2) == 2);
+ lookupInit.c_lookup = &g_call_lookup_counter;
+#endif //STUB_LOGGING
+ lookupInit.part2 [0] = 0xe9;
+ static_assert_no_msg(sizeof(lookupInit.part2) == 1);
+ lookupInit._resolveWorkerDispl = 0xcccccccc;
+}
+
+void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken)
+{
+ _stub = lookupInit;
+
+ //fill in the stub specific fields
+ //@TODO: Get rid of this duplication of data.
+ _stub._token = dispatchToken;
+ _stub._resolveWorkerDispl = resolveWorkerTarget - ((PCODE) &_stub._resolveWorkerDispl + sizeof(DISPL));
+}
+
+LookupHolder* LookupHolder::FromLookupEntry(PCODE lookupEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ LookupHolder* lookupHolder = (LookupHolder*) ( lookupEntry - offsetof(LookupHolder, _stub) - offsetof(LookupStub, _entryPoint) );
+ // _ASSERTE(lookupHolder->_stub._entryPoint[0] == lookupInit._entryPoint[0]);
+ return lookupHolder;
+}
+
+
+/* Template used to generate the stub. We generate a stub by allocating a block of
+ memory and copy the template over it and just update the specific fields that need
+ to be changed.
+*/
+DispatchStub dispatchInit;
+
+void DispatchHolder::InitializeStatic()
+{
+ // Check that _expectedMT is aligned in the DispatchHolder
+ static_assert_no_msg(((offsetof(DispatchHolder, _stub) + offsetof(DispatchStub,_expectedMT)) % sizeof(void*)) == 0);
+ static_assert_no_msg((sizeof(DispatchHolder) % sizeof(void*)) == 0);
+
+#ifndef STUB_LOGGING
+ dispatchInit._entryPoint [0] = 0x81;
+ dispatchInit._entryPoint [1] = 0x39;
+ static_assert_no_msg(sizeof(dispatchInit._entryPoint) == 2);
+
+ dispatchInit._expectedMT = 0xcccccccc;
+ dispatchInit.jmpOp1 [0] = 0x0f;
+ dispatchInit.jmpOp1 [1] = 0x85;
+ static_assert_no_msg(sizeof(dispatchInit.jmpOp1) == 2);
+
+ dispatchInit._failDispl = 0xcccccccc;
+ dispatchInit.jmpOp2 = 0xe9;
+ dispatchInit._implDispl = 0xcccccccc;
+#else //STUB_LOGGING
+ dispatchInit._entryPoint [0] = 0xff;
+ dispatchInit._entryPoint [1] = 0x05;
+ static_assert_no_msg(sizeof(dispatchInit._entryPoint) == 2);
+
+ dispatchInit.d_call = &g_mono_call_counter;
+ dispatchInit.cmpOp [0] = 0x81;
+ dispatchInit.cmpOp [1] = 0x39;
+ static_assert_no_msg(sizeof(dispatchInit.cmpOp) == 2);
+
+ dispatchInit._expectedMT = 0xcccccccc;
+ dispatchInit.jmpOp1 [0] = 0x0f;
+ dispatchInit.jmpOp1 [1] = 0x84;
+ static_assert_no_msg(sizeof(dispatchInit.jmpOp1) == 2);
+
+ dispatchInit._implDispl = 0xcccccccc;
+ dispatchInit.fail [0] = 0xff;
+ dispatchInit.fail [1] = 0x05;
+ static_assert_no_msg(sizeof(dispatchInit.fail) == 2);
+
+ dispatchInit.d_miss = &g_mono_miss_counter;
+ dispatchInit.jmpFail = 0xe9;
+ dispatchInit._failDispl = 0xcccccccc;
+#endif //STUB_LOGGING
+};
+
+void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT)
+{
+ _stub = dispatchInit;
+
+ //fill in the stub specific fields
+ _stub._expectedMT = (size_t) expectedMT;
+ _stub._failDispl = failTarget - ((PCODE) &_stub._failDispl + sizeof(DISPL));
+ _stub._implDispl = implTarget - ((PCODE) &_stub._implDispl + sizeof(DISPL));
+}
+
+DispatchHolder* DispatchHolder::FromDispatchEntry(PCODE dispatchEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ DispatchHolder* dispatchHolder = (DispatchHolder*) ( dispatchEntry - offsetof(DispatchHolder, _stub) - offsetof(DispatchStub, _entryPoint) );
+ // _ASSERTE(dispatchHolder->_stub._entryPoint[0] == dispatchInit._entryPoint[0]);
+ return dispatchHolder;
+}
+
+
+/* Template used to generate the stub. We generate a stub by allocating a block of
+ memory and copy the template over it and just update the specific fields that need
+ to be changed.
+*/
+
+ResolveStub resolveInit;
+
+void ResolveHolder::InitializeStatic()
+{
+ //Check that _token is aligned in ResolveHolder
+ static_assert_no_msg(((offsetof(ResolveHolder, _stub) + offsetof(ResolveStub, _token)) % sizeof(void*)) == 0);
+ static_assert_no_msg((sizeof(ResolveHolder) % sizeof(void*)) == 0);
+
+ resolveInit._failEntryPoint [0] = 0x83;
+ resolveInit._failEntryPoint [1] = 0x2d;
+ static_assert_no_msg(sizeof(resolveInit._failEntryPoint) == 2);
+
+ resolveInit._pCounter = (INT32 *) (size_t) 0xcccccccc;
+ resolveInit.part0 [0] = 0x01;
+ resolveInit.part0 [1] = 0x7c;
+ static_assert_no_msg(sizeof(resolveInit.part0) == 2);
+
+ resolveInit.toPatcher = (offsetof(ResolveStub, patch) - (offsetof(ResolveStub, toPatcher) + 1)) & 0xFF;
+
+ resolveInit._resolveEntryPoint [0] = 0x50;
+ resolveInit._resolveEntryPoint [1] = 0x8b;
+ resolveInit._resolveEntryPoint [2] = 0x01;
+ resolveInit._resolveEntryPoint [3] = 0x52;
+ resolveInit._resolveEntryPoint [4] = 0x8b;
+ resolveInit._resolveEntryPoint [5] = 0xd0;
+ static_assert_no_msg(sizeof(resolveInit._resolveEntryPoint) == 6);
+
+ resolveInit.part1 [0] = 0xc1;
+ resolveInit.part1 [1] = 0xe8;
+ resolveInit.part1 [2] = CALL_STUB_CACHE_NUM_BITS;
+ resolveInit.part1 [3] = 0x03;
+ resolveInit.part1 [4] = 0xc2;
+ resolveInit.part1 [5] = 0x35;
+ static_assert_no_msg(sizeof(resolveInit.part1) == 6);
+
+ resolveInit._hashedToken = 0xcccccccc;
+ resolveInit.part2 [0] = 0x25;
+ static_assert_no_msg(sizeof(resolveInit.part2) == 1);
+
+ resolveInit.mask = (CALL_STUB_CACHE_MASK << LOG2_PTRSIZE);
+ resolveInit.part3 [0] = 0x8b;
+ resolveInit.part3 [1] = 0x80;;
+ static_assert_no_msg(sizeof(resolveInit.part3) == 2);
+
+ resolveInit._cacheAddress = 0xcccccccc;
+#ifdef STUB_LOGGING
+ resolveInit.cntr1 [0] = 0xff;
+ resolveInit.cntr1 [1] = 0x05;
+ static_assert_no_msg(sizeof(resolveInit.cntr1) == 2);
+
+ resolveInit.c_call = &g_poly_call_counter;
+#endif //STUB_LOGGING
+ resolveInit.part4 [0] = 0x3b;
+ resolveInit.part4 [1] = 0x10;
+ static_assert_no_msg(sizeof(resolveInit.part4) == 2);
+
+ // resolveInit.mtOffset = offsetof(ResolveCacheElem,pMT) & 0xFF;
+ static_assert_no_msg(offsetof(ResolveCacheElem,pMT) == 0);
+
+ resolveInit.part5 [0] = 0x75;
+ static_assert_no_msg(sizeof(resolveInit.part5) == 1);
+
+ resolveInit.toMiss1 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss1)+1);
+
+ resolveInit.part6 [0] = 0x81;
+ resolveInit.part6 [1] = 0x78;
+ static_assert_no_msg(sizeof(resolveInit.part6) == 2);
+
+ resolveInit.tokenOffset = offsetof(ResolveCacheElem,token) & 0xFF;
+
+ resolveInit._token = 0xcccccccc;
+
+ resolveInit.part7 [0] = 0x75;
+ static_assert_no_msg(sizeof(resolveInit.part7) == 1);
+
+ resolveInit.part8 [0] = 0x8b;
+ resolveInit.part8 [1] = 0x40;
+ static_assert_no_msg(sizeof(resolveInit.part8) == 2);
+
+ resolveInit.targetOffset = offsetof(ResolveCacheElem,target) & 0xFF;
+
+ resolveInit.toMiss2 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss2)+1);
+
+ resolveInit.part9 [0] = 0x5a;
+ resolveInit.part9 [1] = 0x83;
+ resolveInit.part9 [2] = 0xc4;
+ resolveInit.part9 [3] = 0x04;
+ resolveInit.part9 [4] = 0xff;
+ resolveInit.part9 [5] = 0xe0;
+ static_assert_no_msg(sizeof(resolveInit.part9) == 6);
+
+ resolveInit.miss [0] = 0x5a;
+// resolveInit.miss [1] = 0xb8;
+// resolveInit._hashedTokenMov = 0xcccccccc;
+ resolveInit._slowEntryPoint [0] = 0x68;
+ resolveInit._tokenPush = 0xcccccccc;
+#ifdef STUB_LOGGING
+ resolveInit.cntr2 [0] = 0xff;
+ resolveInit.cntr2 [1] = 0x05;
+ resolveInit.c_miss = &g_poly_miss_counter;
+#endif //STUB_LOGGING
+ resolveInit.part10 [0] = 0xe9;
+ resolveInit._resolveWorkerDispl = 0xcccccccc;
+
+ resolveInit.patch [0] = 0xe8;
+ resolveInit._backpatcherDispl = 0xcccccccc;
+ resolveInit.part11 [0] = 0xeb;
+ resolveInit.toResolveStub = (offsetof(ResolveStub, _resolveEntryPoint) - (offsetof(ResolveStub, toResolveStub) + 1)) & 0xFF;
+};
+
+void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
+ size_t dispatchToken, UINT32 hashedToken,
+ void * cacheAddr, INT32 * counterAddr)
+{
+ _stub = resolveInit;
+
+ //fill in the stub specific fields
+ _stub._pCounter = counterAddr;
+ _stub._hashedToken = hashedToken << LOG2_PTRSIZE;
+ _stub._cacheAddress = (size_t) cacheAddr;
+ _stub._token = dispatchToken;
+// _stub._hashedTokenMov = hashedToken;
+ _stub._tokenPush = dispatchToken;
+ _stub._resolveWorkerDispl = resolveWorkerTarget - ((PCODE) &_stub._resolveWorkerDispl + sizeof(DISPL));
+ _stub._backpatcherDispl = patcherTarget - ((PCODE) &_stub._backpatcherDispl + sizeof(DISPL));
+}
+
+ResolveHolder* ResolveHolder::FromFailEntry(PCODE failEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ ResolveHolder* resolveHolder = (ResolveHolder*) ( failEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _failEntryPoint) );
+ // _ASSERTE(resolveHolder->_stub._resolveEntryPoint[0] == resolveInit._resolveEntryPoint[0]);
+ return resolveHolder;
+}
+
+ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ ResolveHolder* resolveHolder = (ResolveHolder*) ( resolveEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _resolveEntryPoint) );
+ // _ASSERTE(resolveHolder->_stub._resolveEntryPoint[0] == resolveInit._resolveEntryPoint[0]);
+ return resolveHolder;
+}
+
+#endif // DACCESS_COMPILE
+
+VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress)
+{
+ SUPPORTS_DAC;
+#ifdef DACCESS_COMPILE
+
+ return SK_BREAKPOINT; // Dac always uses the slower lookup
+
+#else
+
+ StubKind stubKind = SK_UNKNOWN;
+
+ EX_TRY
+ {
+ // If stubStartAddress is completely bogus, then this might AV,
+ // so we protect it with SEH. An AV here is OK.
+ AVInRuntimeImplOkayHolder AVOkay;
+
+ WORD firstWord = *((WORD*) stubStartAddress);
+
+#ifndef STUB_LOGGING
+ if (firstWord == 0x3981)
+#else //STUB_LOGGING
+ if (firstWord == 0x05ff)
+#endif
+ {
+ stubKind = SK_DISPATCH;
+ }
+ else if (firstWord == 0x6850)
+ {
+ stubKind = SK_LOOKUP;
+ }
+ else if (firstWord == 0x8b50)
+ {
+ stubKind = SK_RESOLVE;
+ }
+ else
+ {
+ BYTE firstByte = ((BYTE*) stubStartAddress)[0];
+ BYTE secondByte = ((BYTE*) stubStartAddress)[1];
+
+ if ((firstByte == X86_INSTR_INT3) ||
+ (secondByte == X86_INSTR_INT3))
+ {
+ stubKind = SK_BREAKPOINT;
+ }
+ }
+ }
+ EX_CATCH
+ {
+ stubKind = SK_UNKNOWN;
+ }
+ EX_END_CATCH(SwallowAllExceptions);
+
+ return stubKind;
+
+#endif // DACCESS_COMPILE
+}
+
+#endif //DECLARE_DATA
+
+#endif // _VIRTUAL_CALL_STUB_X86_H