From 4b4aad7217d3292650e77eec2cf4c198ea9c3b4b Mon Sep 17 00:00:00 2001 From: Jiyoung Yun Date: Wed, 23 Nov 2016 19:09:09 +0900 Subject: Imported Upstream version 1.1.0 --- src/vm/i386/.gitmirror | 1 + src/vm/i386/CLRErrorReporting.vrg | 5 + src/vm/i386/RedirectedHandledJITCase.asm | 136 + src/vm/i386/asmconstants.h | 485 +++ src/vm/i386/asmhelpers.asm | 2400 +++++++++++ src/vm/i386/cgencpu.h | 573 +++ src/vm/i386/cgenx86.cpp | 2257 ++++++++++ src/vm/i386/excepcpu.h | 87 + src/vm/i386/excepx86.cpp | 3734 ++++++++++++++++ src/vm/i386/fptext.asm | 277 ++ src/vm/i386/gmsasm.asm | 37 + src/vm/i386/gmscpu.h | 140 + src/vm/i386/gmsx86.cpp | 1245 ++++++ src/vm/i386/jithelp.asm | 2574 +++++++++++ src/vm/i386/jitinterfacex86.cpp | 1922 +++++++++ src/vm/i386/profiler.cpp | 336 ++ src/vm/i386/remotingx86.cpp | 225 + src/vm/i386/stublinkerx86.cpp | 6806 ++++++++++++++++++++++++++++++ src/vm/i386/stublinkerx86.h | 781 ++++ src/vm/i386/virtualcallstubcpu.hpp | 1077 +++++ 20 files changed, 25098 insertions(+) create mode 100644 src/vm/i386/.gitmirror create mode 100644 src/vm/i386/CLRErrorReporting.vrg create mode 100644 src/vm/i386/RedirectedHandledJITCase.asm create mode 100644 src/vm/i386/asmconstants.h create mode 100644 src/vm/i386/asmhelpers.asm create mode 100644 src/vm/i386/cgencpu.h create mode 100644 src/vm/i386/cgenx86.cpp create mode 100644 src/vm/i386/excepcpu.h create mode 100644 src/vm/i386/excepx86.cpp create mode 100644 src/vm/i386/fptext.asm create mode 100644 src/vm/i386/gmsasm.asm create mode 100644 src/vm/i386/gmscpu.h create mode 100644 src/vm/i386/gmsx86.cpp create mode 100644 src/vm/i386/jithelp.asm create mode 100644 src/vm/i386/jitinterfacex86.cpp create mode 100644 src/vm/i386/profiler.cpp create mode 100644 src/vm/i386/remotingx86.cpp create mode 100644 src/vm/i386/stublinkerx86.cpp create mode 100644 src/vm/i386/stublinkerx86.h create mode 100644 src/vm/i386/virtualcallstubcpu.hpp (limited to 'src/vm/i386') diff --git a/src/vm/i386/.gitmirror b/src/vm/i386/.gitmirror new file mode 100644 index 0000000000..f507630f94 --- /dev/null +++ b/src/vm/i386/.gitmirror @@ -0,0 +1 @@ +Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file diff --git a/src/vm/i386/CLRErrorReporting.vrg b/src/vm/i386/CLRErrorReporting.vrg new file mode 100644 index 0000000000..6e45ba967c --- /dev/null +++ b/src/vm/i386/CLRErrorReporting.vrg @@ -0,0 +1,5 @@ +VSREG 7 + +[HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Eventlog\Application\.NET Runtime 4.0 Error Reporting] +"EventMessageFile"="[DWFolder.D0DF3458_A845_11D3_8D0A_0050046416B9]DW20.EXE" +"TypesSupported"=dword:00000007 diff --git a/src/vm/i386/RedirectedHandledJITCase.asm b/src/vm/i386/RedirectedHandledJITCase.asm new file mode 100644 index 0000000000..80345623e7 --- /dev/null +++ b/src/vm/i386/RedirectedHandledJITCase.asm @@ -0,0 +1,136 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. +; See the LICENSE file in the project root for more information. + +; ==++== +; + +; +; ==--== +; *********************************************************************** +; File: RedirectedHandledJITCase.asm +; +; *********************************************************************** +; + +; This contains thread-redirecting helper routines that are 100% x86 assembly + + .586 + .model flat + + include asmconstants.inc + + option casemap:none + .code + +EXTERN _GetCurrentSavedRedirectContext@0:PROC + +; +; WARNING!! These functions immediately ruin thread unwindability. This is +; WARNING!! OK as long as there is a mechanism for saving the thread context +; WARNING!! prior to running these functions as well as a mechanism for +; WARNING!! restoring the context prior to any stackwalk. This means that +; WARNING!! we need to ensure that no GC can occur while the stack is +; WARNING!! unwalkable. This further means that we cannot allow any exception +; WARNING!! to occure when the stack is unwalkable +; + + +; If you edit this macro, make sure you update GetCONTEXTFromRedirectedStubStackFrame. +; This function is used by both the personality routine and the debugger to retrieve the original CONTEXT. +GenerateRedirectedHandledJITCaseStub MACRO reason + +EXTERN ?RedirectedHandledJITCaseFor&reason&@Thread@@CGXXZ:proc + + ALIGN 4 +_RedirectedHandledJITCaseFor&reason&_Stub@0 PROC PUBLIC + + push eax ; where to stuff the fake return address + push ebp ; save interrupted ebp for stack walk + mov ebp, esp + sub esp, 4 ; stack slot to save the CONTEXT * + + ; + ; Save a copy of the redirect CONTEXT*. + ; This is needed for the debugger to unwind the stack. + ; + call _GetCurrentSavedRedirectContext@0 + + mov [ebp-4], eax +.errnz REDIRECTSTUB_EBP_OFFSET_CONTEXT + 4, REDIRECTSTUB_EBP_OFFSET_CONTEXT has changed - update asm stubs + + ; + ; Fetch the interrupted eip and save it as our return address. + ; + mov eax, [eax + CONTEXT_Eip] + mov [ebp+4], eax + + ; + ; Call target, which will do whatever we needed to do in the context + ; of the target thread, and will RtlRestoreContext when it is done. + ; + call ?RedirectedHandledJITCaseFor&reason&@Thread@@CGXXZ + + int 3 ; target shouldn't return. + +; Put a label here to tell the debugger where the end of this function is. +PUBLIC _RedirectedHandledJITCaseFor&reason&_StubEnd@0 +_RedirectedHandledJITCaseFor&reason&_StubEnd@0: + +_RedirectedHandledJITCaseFor&reason&_Stub@0 ENDP + +ENDM + +; HijackFunctionStart and HijackFunctionEnd are used to tell BBT to keep the hijacking functions together. +; Debugger uses range to check whether IP falls into one of them (see code:Debugger::s_hijackFunction). + +_HijackFunctionStart@0 proc public +ret +_HijackFunctionStart@0 endp + +GenerateRedirectedHandledJITCaseStub +GenerateRedirectedHandledJITCaseStub +GenerateRedirectedHandledJITCaseStub +GenerateRedirectedHandledJITCaseStub + +; Hijack for exceptions. +; This can be used to hijack at a 2nd-chance exception and execute the UEF + +EXTERN _ExceptionHijackWorker@16:PROC + +_ExceptionHijack@0 PROC PUBLIC + + ; This is where we land when we're hijacked from an IP by the debugger. + ; The debugger has already pushed the args: + ; - a CONTEXT + ; - a EXCEPTION_RECORD onto the stack + ; - an DWORD to use to mulitplex the hijack + ; - an arbitrary void* data parameter + call _ExceptionHijackWorker@16 + + ; Don't expect to return from here. Debugger will unhijack us. It has the full + ; context and can properly restore us. + int 3 + +; Put a label here to tell the debugger where the end of this function is. +public _ExceptionHijackEnd@0 +_ExceptionHijackEnd@0: + +_ExceptionHijack@0 ENDP + +; It is very important to have a dummy function here. +; Without it, the image has two labels without any instruction in between: +; One for the last label in this function, and one for the first function in the image following this asm file. +; Then the linker is free to remove from PDB the function symbol for the function +; immediately following this, and replace the reference with the last label in this file. +; When this happens, BBT loses info about function, moves pieces within the function to random place, and generates bad code. +_HijackFunctionLast@0 proc public +ret +_HijackFunctionLast@0 endp + +; This is the first function outside the "keep together range". Used by BBT scripts. +_HijackFunctionEnd@0 proc public +ret +_HijackFunctionEnd@0 endp + +END diff --git a/src/vm/i386/asmconstants.h b/src/vm/i386/asmconstants.h new file mode 100644 index 0000000000..5fd39d6897 --- /dev/null +++ b/src/vm/i386/asmconstants.h @@ -0,0 +1,485 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// asmconstants.h - +// +// This header defines field offsets and constants used by assembly code +// Be sure to rebuild clr/src/vm/ceemain.cpp after changing this file, to +// ensure that the constants match the expected C/C++ values + +// +// If you need to figure out a constant that has changed and is causing +// a compile-time assert, check out USE_COMPILE_TIME_CONSTANT_FINDER. +// TODO: put the constant finder in a common place so other platforms can use it. + +#ifndef _TARGET_X86_ +#error this file should only be used on an X86 platform +#endif + +#include "../../inc/switches.h" + +#ifndef ASMCONSTANTS_C_ASSERT +#define ASMCONSTANTS_C_ASSERT(cond) +#endif + +#ifndef ASMCONSTANTS_RUNTIME_ASSERT +#define ASMCONSTANTS_RUNTIME_ASSERT(cond) +#endif + +// Some contants are different in _DEBUG builds. This macro factors out ifdefs from below. +#ifdef _DEBUG +#define DBG_FRE(dbg,fre) dbg +#else +#define DBG_FRE(dbg,fre) fre +#endif + +//*************************************************************************** +#if defined(_DEBUG) && defined(_TARGET_X86_) && !defined(FEATURE_CORECLR) + #define HAS_TRACK_CXX_EXCEPTION_CODE_HACK 1 + #define TRACK_CXX_EXCEPTION_CODE_HACK +#else + #define HAS_TRACK_CXX_EXCEPTION_CODE_HACK 0 +#endif + +#define INITIAL_SUCCESS_COUNT 0x100 + +#define DynamicHelperFrameFlags_Default 0 +#define DynamicHelperFrameFlags_ObjectArg 1 +#define DynamicHelperFrameFlags_ObjectArg2 2 + +#ifdef FEATURE_REMOTING +#define TransparentProxyObject___stubData 0x8 +ASMCONSTANTS_C_ASSERT(TransparentProxyObject___stubData == offsetof(TransparentProxyObject, _stubData)) + +#define TransparentProxyObject___stub 0x14 +ASMCONSTANTS_C_ASSERT(TransparentProxyObject___stub == offsetof(TransparentProxyObject, _stub)) + +#define TransparentProxyObject___pMT 0xc +ASMCONSTANTS_C_ASSERT(TransparentProxyObject___pMT == offsetof(TransparentProxyObject, _pMT)) +#endif // FEATURE_REMOTING + +// CONTEXT from rotor_pal.h +#define CONTEXT_Edi 0x9c +ASMCONSTANTS_C_ASSERT(CONTEXT_Edi == offsetof(CONTEXT,Edi)) + +#define CONTEXT_Esi 0xa0 +ASMCONSTANTS_C_ASSERT(CONTEXT_Esi == offsetof(CONTEXT,Esi)) + +#define CONTEXT_Ebx 0xa4 +ASMCONSTANTS_C_ASSERT(CONTEXT_Ebx == offsetof(CONTEXT,Ebx)) + +#define CONTEXT_Edx 0xa8 +ASMCONSTANTS_C_ASSERT(CONTEXT_Edx == offsetof(CONTEXT,Edx)) + +#define CONTEXT_Eax 0xb0 +ASMCONSTANTS_C_ASSERT(CONTEXT_Eax == offsetof(CONTEXT,Eax)) + +#define CONTEXT_Ebp 0xb4 +ASMCONSTANTS_C_ASSERT(CONTEXT_Ebp == offsetof(CONTEXT,Ebp)) + +#define CONTEXT_Eip 0xb8 +ASMCONSTANTS_C_ASSERT(CONTEXT_Eip == offsetof(CONTEXT,Eip)) + +#define CONTEXT_Esp 0xc4 +ASMCONSTANTS_C_ASSERT(CONTEXT_Esp == offsetof(CONTEXT,Esp)) + +// SYSTEM_INFO from rotor_pal.h +#define SYSTEM_INFO_dwNumberOfProcessors 20 +ASMCONSTANTS_C_ASSERT(SYSTEM_INFO_dwNumberOfProcessors == offsetof(SYSTEM_INFO,dwNumberOfProcessors)) + +// SpinConstants from clr/src/vars.h +#define SpinConstants_dwInitialDuration 0 +ASMCONSTANTS_C_ASSERT(SpinConstants_dwInitialDuration == offsetof(SpinConstants,dwInitialDuration)) + +#define SpinConstants_dwMaximumDuration 4 +ASMCONSTANTS_C_ASSERT(SpinConstants_dwMaximumDuration == offsetof(SpinConstants,dwMaximumDuration)) + +#define SpinConstants_dwBackoffFactor 8 +ASMCONSTANTS_C_ASSERT(SpinConstants_dwBackoffFactor == offsetof(SpinConstants,dwBackoffFactor)) + +// EHContext from clr/src/vm/i386/cgencpu.h +#define EHContext_Eax 0x00 +ASMCONSTANTS_C_ASSERT(EHContext_Eax == offsetof(EHContext,Eax)) + +#define EHContext_Ebx 0x04 +ASMCONSTANTS_C_ASSERT(EHContext_Ebx == offsetof(EHContext,Ebx)) + +#define EHContext_Ecx 0x08 +ASMCONSTANTS_C_ASSERT(EHContext_Ecx == offsetof(EHContext,Ecx)) + +#define EHContext_Edx 0x0c +ASMCONSTANTS_C_ASSERT(EHContext_Edx == offsetof(EHContext,Edx)) + +#define EHContext_Esi 0x10 +ASMCONSTANTS_C_ASSERT(EHContext_Esi == offsetof(EHContext,Esi)) + +#define EHContext_Edi 0x14 +ASMCONSTANTS_C_ASSERT(EHContext_Edi == offsetof(EHContext,Edi)) + +#define EHContext_Ebp 0x18 +ASMCONSTANTS_C_ASSERT(EHContext_Ebp == offsetof(EHContext,Ebp)) + +#define EHContext_Esp 0x1c +ASMCONSTANTS_C_ASSERT(EHContext_Esp == offsetof(EHContext,Esp)) + +#define EHContext_Eip 0x20 +ASMCONSTANTS_C_ASSERT(EHContext_Eip == offsetof(EHContext,Eip)) + + +// from clr/src/fjit/helperframe.h +#define SIZEOF_MachState 40 +ASMCONSTANTS_C_ASSERT(SIZEOF_MachState == sizeof(MachState)) + +#define MachState__pEdi 0 +ASMCONSTANTS_C_ASSERT(MachState__pEdi == offsetof(MachState, _pEdi)) + +#define MachState__edi 4 +ASMCONSTANTS_C_ASSERT(MachState__edi == offsetof(MachState, _edi)) + +#define MachState__pEsi 8 +ASMCONSTANTS_C_ASSERT(MachState__pEsi == offsetof(MachState, _pEsi)) + +#define MachState__esi 12 +ASMCONSTANTS_C_ASSERT(MachState__esi == offsetof(MachState, _esi)) + +#define MachState__pEbx 16 +ASMCONSTANTS_C_ASSERT(MachState__pEbx == offsetof(MachState, _pEbx)) + +#define MachState__ebx 20 +ASMCONSTANTS_C_ASSERT(MachState__ebx == offsetof(MachState, _ebx)) + +#define MachState__pEbp 24 +ASMCONSTANTS_C_ASSERT(MachState__pEbp == offsetof(MachState, _pEbp)) + +#define MachState__ebp 28 +ASMCONSTANTS_C_ASSERT(MachState__ebp == offsetof(MachState, _ebp)) + +#define MachState__esp 32 +ASMCONSTANTS_C_ASSERT(MachState__esp == offsetof(MachState, _esp)) + +#define MachState__pRetAddr 36 +ASMCONSTANTS_C_ASSERT(MachState__pRetAddr == offsetof(MachState, _pRetAddr)) + +#define LazyMachState_captureEbp 40 +ASMCONSTANTS_C_ASSERT(LazyMachState_captureEbp == offsetof(LazyMachState, captureEbp)) + +#define LazyMachState_captureEsp 44 +ASMCONSTANTS_C_ASSERT(LazyMachState_captureEsp == offsetof(LazyMachState, captureEsp)) + +#define LazyMachState_captureEip 48 +ASMCONSTANTS_C_ASSERT(LazyMachState_captureEip == offsetof(LazyMachState, captureEip)) + + +#define VASigCookie__StubOffset 4 +ASMCONSTANTS_C_ASSERT(VASigCookie__StubOffset == offsetof(VASigCookie, pNDirectILStub)) + +#define SIZEOF_TailCallFrame 32 +ASMCONSTANTS_C_ASSERT(SIZEOF_TailCallFrame == sizeof(TailCallFrame)) + +#define SIZEOF_GSCookie 4 + +// ICodeManager::SHADOW_SP_IN_FILTER from clr/src/inc/eetwain.h +#define SHADOW_SP_IN_FILTER_ASM 0x1 +ASMCONSTANTS_C_ASSERT(SHADOW_SP_IN_FILTER_ASM == ICodeManager::SHADOW_SP_IN_FILTER) + +// from clr/src/inc/corinfo.h +#define CORINFO_NullReferenceException_ASM 0 +ASMCONSTANTS_C_ASSERT(CORINFO_NullReferenceException_ASM == CORINFO_NullReferenceException) + +#define CORINFO_IndexOutOfRangeException_ASM 3 +ASMCONSTANTS_C_ASSERT(CORINFO_IndexOutOfRangeException_ASM == CORINFO_IndexOutOfRangeException) + +#define CORINFO_OverflowException_ASM 4 +ASMCONSTANTS_C_ASSERT(CORINFO_OverflowException_ASM == CORINFO_OverflowException) + +#define CORINFO_SynchronizationLockException_ASM 5 +ASMCONSTANTS_C_ASSERT(CORINFO_SynchronizationLockException_ASM == CORINFO_SynchronizationLockException) + +#define CORINFO_ArrayTypeMismatchException_ASM 6 +ASMCONSTANTS_C_ASSERT(CORINFO_ArrayTypeMismatchException_ASM == CORINFO_ArrayTypeMismatchException) + +#define CORINFO_ArgumentNullException_ASM 8 +ASMCONSTANTS_C_ASSERT(CORINFO_ArgumentNullException_ASM == CORINFO_ArgumentNullException) + +#define CORINFO_ArgumentException_ASM 9 +ASMCONSTANTS_C_ASSERT(CORINFO_ArgumentException_ASM == CORINFO_ArgumentException) + + +#ifndef CROSSGEN_COMPILE + +// from clr/src/vm/threads.h +#if defined(TRACK_CXX_EXCEPTION_CODE_HACK) // Is C++ exception code tracking turned on? + #define Thread_m_LastCxxSEHExceptionCode 0x20 + ASMCONSTANTS_C_ASSERT(Thread_m_LastCxxSEHExceptionCode == offsetof(Thread, m_LastCxxSEHExceptionCode)) + + #define Thread_m_Context 0x3C +#else + #define Thread_m_Context 0x38 +#endif // TRACK_CXX_EXCEPTION_CODE_HACK +ASMCONSTANTS_C_ASSERT(Thread_m_Context == offsetof(Thread, m_Context)) + +#define Thread_m_State 0x04 +ASMCONSTANTS_C_ASSERT(Thread_m_State == offsetof(Thread, m_State)) +#endif // CROSSGEN_COMPILE + +#define Thread_m_fPreemptiveGCDisabled 0x08 +#ifndef CROSSGEN_COMPILE +ASMCONSTANTS_C_ASSERT(Thread_m_fPreemptiveGCDisabled == offsetof(Thread, m_fPreemptiveGCDisabled)) +#endif // CROSSGEN_COMPILE + +#define Thread_m_pFrame 0x0C +#ifndef CROSSGEN_COMPILE +ASMCONSTANTS_C_ASSERT(Thread_m_pFrame == offsetof(Thread, m_pFrame)) +#endif // CROSSGEN_COMPILE + +#ifndef CROSSGEN_COMPILE +#define Thread_m_dwLockCount 0x18 +ASMCONSTANTS_C_ASSERT(Thread_m_dwLockCount == offsetof(Thread, m_dwLockCount)) + +#define Thread_m_ThreadId 0x1C +ASMCONSTANTS_C_ASSERT(Thread_m_ThreadId == offsetof(Thread, m_ThreadId)) + +#define TS_CatchAtSafePoint_ASM 0x5F +ASMCONSTANTS_C_ASSERT(Thread::TS_CatchAtSafePoint == TS_CatchAtSafePoint_ASM) + +#ifdef FEATURE_HIJACK +#define TS_Hijacked_ASM 0x80 +ASMCONSTANTS_C_ASSERT(Thread::TS_Hijacked == TS_Hijacked_ASM) +#endif + +#endif // CROSSGEN_COMPILE + + +// from clr/src/vm/appdomain.hpp + +#define AppDomain__m_dwId 0x4 +ASMCONSTANTS_C_ASSERT(AppDomain__m_dwId == offsetof(AppDomain, m_dwId)); + +// from clr/src/vm/ceeload.cpp +#ifdef FEATURE_MIXEDMODE +#define IJWNOADThunk__m_cache 0x1C +ASMCONSTANTS_C_ASSERT(IJWNOADThunk__m_cache == offsetof(IJWNOADThunk, m_cache)) + +#define IJWNOADThunk__NextCacheOffset 0x8 +ASMCONSTANTS_C_ASSERT(IJWNOADThunk__NextCacheOffset == sizeof(IJWNOADThunkStubCache)) + +#define IJWNOADThunk__CodeAddrOffsetFromADID 0x4 +ASMCONSTANTS_C_ASSERT(IJWNOADThunk__CodeAddrOffsetFromADID == offsetof(IJWNOADThunkStubCache, m_CodeAddr)) +#endif //FEATURE_MIXEDMODE + +// from clr/src/vm/syncblk.h +#define SizeOfSyncTableEntry_ASM 8 +ASMCONSTANTS_C_ASSERT(sizeof(SyncTableEntry) == SizeOfSyncTableEntry_ASM) + +#define SyncBlockIndexOffset_ASM 4 +ASMCONSTANTS_C_ASSERT(sizeof(ObjHeader) - offsetof(ObjHeader, m_SyncBlockValue) == SyncBlockIndexOffset_ASM) + +#ifndef __GNUC__ +#define SyncTableEntry_m_SyncBlock 0 +ASMCONSTANTS_C_ASSERT(offsetof(SyncTableEntry, m_SyncBlock) == SyncTableEntry_m_SyncBlock) + +#define SyncBlock_m_Monitor 0 +ASMCONSTANTS_C_ASSERT(offsetof(SyncBlock, m_Monitor) == SyncBlock_m_Monitor) + +#define AwareLock_m_MonitorHeld 0 +ASMCONSTANTS_C_ASSERT(offsetof(AwareLock, m_MonitorHeld) == AwareLock_m_MonitorHeld) +#else +// The following 3 offsets have value of 0, and must be +// defined to be an empty string. Otherwise, gas may generate assembly +// code with 0 displacement if 0 is left in the displacement field +// of an instruction. +#define SyncTableEntry_m_SyncBlock // 0 +ASMCONSTANTS_C_ASSERT(offsetof(SyncTableEntry, m_SyncBlock) == 0) + +#define SyncBlock_m_Monitor // 0 +ASMCONSTANTS_C_ASSERT(offsetof(SyncBlock, m_Monitor) == 0) + +#define AwareLock_m_MonitorHeld // 0 +ASMCONSTANTS_C_ASSERT(offsetof(AwareLock, m_MonitorHeld) == 0) +#endif // !__GNUC__ + +#define AwareLock_m_HoldingThread 8 +ASMCONSTANTS_C_ASSERT(offsetof(AwareLock, m_HoldingThread) == AwareLock_m_HoldingThread) + +#define AwareLock_m_Recursion 4 +ASMCONSTANTS_C_ASSERT(offsetof(AwareLock, m_Recursion) == AwareLock_m_Recursion) + +#define BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM 0x08000000 +ASMCONSTANTS_C_ASSERT(BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM == BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX) + +#define BIT_SBLK_SPIN_LOCK_ASM 0x10000000 +ASMCONSTANTS_C_ASSERT(BIT_SBLK_SPIN_LOCK_ASM == BIT_SBLK_SPIN_LOCK) + +#define SBLK_MASK_LOCK_THREADID_ASM 0x000003FF // special value of 0 + 1023 thread ids +ASMCONSTANTS_C_ASSERT(SBLK_MASK_LOCK_THREADID_ASM == SBLK_MASK_LOCK_THREADID) + +#define SBLK_MASK_LOCK_RECLEVEL_ASM 0x0000FC00 // 64 recursion levels +ASMCONSTANTS_C_ASSERT(SBLK_MASK_LOCK_RECLEVEL_ASM == SBLK_MASK_LOCK_RECLEVEL) + +#define SBLK_LOCK_RECLEVEL_INC_ASM 0x00000400 // each level is this much higher than the previous one +ASMCONSTANTS_C_ASSERT(SBLK_LOCK_RECLEVEL_INC_ASM == SBLK_LOCK_RECLEVEL_INC) + +#define BIT_SBLK_IS_HASHCODE_ASM 0x04000000 +ASMCONSTANTS_C_ASSERT(BIT_SBLK_IS_HASHCODE_ASM == BIT_SBLK_IS_HASHCODE) + +#define MASK_SYNCBLOCKINDEX_ASM 0x03ffffff // ((1<::FindCompileTimeConstant' : cannot access private member declared in class 'FindCompileTimeConstant' +// with +// [ +// N=1520 +// ] +// d:\dd\clr\src\ndp\clr\src\vm\i386\asmconstants.h(321) : see declaration of 'FindCompileTimeConstant::FindCompileTimeConstant' +// with +// [ +// N=1520 +// ] +template +class FindCompileTimeConstant +{ +private: + FindCompileTimeConstant(); +}; + +void BogusFunction() +{ + // Sample usage to generate the error + FindCompileTimeConstant bogus_variable; +} +#endif // defined(__cplusplus) && defined(USE_COMPILE_TIME_CONSTANT_FINDER) diff --git a/src/vm/i386/asmhelpers.asm b/src/vm/i386/asmhelpers.asm new file mode 100644 index 0000000000..66a22b7962 --- /dev/null +++ b/src/vm/i386/asmhelpers.asm @@ -0,0 +1,2400 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. +; See the LICENSE file in the project root for more information. + +; ==++== +; + +; +; ==--== +; +; FILE: asmhelpers.asm +; +; *** NOTE: If you make changes to this file, propagate the changes to +; asmhelpers.s in this directory +; + +; +; ====================================================================================== + + .586 + .model flat + +include asmconstants.inc + + assume fs: nothing + option casemap:none + .code + +EXTERN __imp__RtlUnwind@16:DWORD +ifdef _DEBUG +EXTERN _HelperMethodFrameConfirmState@20:PROC +endif +ifdef FEATURE_MIXEDMODE +EXTERN _IJWNOADThunkJumpTargetHelper@4:PROC +endif +EXTERN _StubRareEnableWorker@4:PROC +ifdef FEATURE_COMINTEROP +EXTERN _StubRareDisableHRWorker@4:PROC +endif ; FEATURE_COMINTEROP +EXTERN _StubRareDisableTHROWWorker@4:PROC +EXTERN __imp__TlsGetValue@4:DWORD +TlsGetValue PROTO stdcall +ifdef FEATURE_HIJACK +EXTERN _OnHijackWorker@4:PROC +endif ;FEATURE_HIJACK +EXTERN _COMPlusEndCatch@20:PROC +EXTERN _COMPlusFrameHandler:PROC +ifdef FEATURE_COMINTEROP +EXTERN _COMPlusFrameHandlerRevCom:PROC +endif ; FEATURE_COMINTEROP +EXTERN __alloca_probe:PROC +EXTERN _NDirectImportWorker@4:PROC +EXTERN _UMThunkStubRareDisableWorker@8:PROC +ifndef FEATURE_IMPLICIT_TLS +ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK +; This is defined in C (threads.cpp) and enforces EE_THREAD_NOT_REQUIRED contracts +GetThreadGenericFullCheck EQU ?GetThreadGenericFullCheck@@YGPAVThread@@XZ +EXTERN GetThreadGenericFullCheck:PROC +endif ; ENABLE_GET_THREAD_GENERIC_FULL_CHECK + +EXTERN _gThreadTLSIndex:DWORD +EXTERN _gAppDomainTLSIndex:DWORD +endif ; FEATURE_IMPLICIT_TLS + +EXTERN _VarargPInvokeStubWorker@12:PROC +EXTERN _GenericPInvokeCalliStubWorker@12:PROC + +; To debug that LastThrownObjectException really is EXCEPTION_COMPLUS +ifdef TRACK_CXX_EXCEPTION_CODE_HACK +EXTERN __imp____CxxFrameHandler:PROC +endif + +EXTERN _GetThread@0:PROC +EXTERN _GetAppDomain@0:PROC + +ifdef MDA_SUPPORTED +EXTERN _PInvokeStackImbalanceWorker@8:PROC +endif + +ifndef FEATURE_CORECLR +EXTERN _CopyCtorCallStubWorker@4:PROC +endif + +EXTERN _PreStubWorker@8:PROC + +ifdef FEATURE_COMINTEROP +EXTERN _CLRToCOMWorker@8:PROC +endif + +ifdef FEATURE_REMOTING +EXTERN _TransparentProxyStubWorker@8:PROC +endif + +ifdef FEATURE_PREJIT +EXTERN _ExternalMethodFixupWorker@16:PROC +EXTERN _VirtualMethodFixupWorker@8:PROC +EXTERN _StubDispatchFixupWorker@16:PROC +endif + +ifdef FEATURE_COMINTEROP +EXTERN _ComPreStubWorker@8:PROC +endif + +ifdef FEATURE_READYTORUN +EXTERN _DynamicHelperWorker@20:PROC +endif + +ifdef FEATURE_REMOTING +EXTERN _InContextTPQuickDispatchAsmStub@0:PROC +endif + +EXTERN @JIT_InternalThrow@4:PROC + +EXTERN @ProfileEnter@8:PROC +EXTERN @ProfileLeave@8:PROC +EXTERN @ProfileTailcall@8:PROC + +UNREFERENCED macro arg + local unref + unref equ size arg +endm + +FASTCALL_FUNC macro FuncName,cbArgs +FuncNameReal EQU @&FuncName&@&cbArgs +FuncNameReal proc public +endm + +FASTCALL_ENDFUNC macro +FuncNameReal endp +endm + +ifdef FEATURE_COMINTEROP +ifdef _DEBUG + CPFH_STACK_SIZE equ SIZEOF_FrameHandlerExRecord + STACK_OVERWRITE_BARRIER_SIZE*4 +else ; _DEBUG + CPFH_STACK_SIZE equ SIZEOF_FrameHandlerExRecord +endif ; _DEBUG + +PUSH_CPFH_FOR_COM macro trashReg, pFrameBaseReg, pFrameOffset + + ; + ; Setup the FrameHandlerExRecord + ; + push dword ptr [pFrameBaseReg + pFrameOffset] + push _COMPlusFrameHandlerRevCom + mov trashReg, fs:[0] + push trashReg + mov fs:[0], esp + +ifdef _DEBUG + mov trashReg, STACK_OVERWRITE_BARRIER_SIZE +@@: + push STACK_OVERWRITE_BARRIER_VALUE + dec trashReg + jnz @B +endif ; _DEBUG + +endm ; PUSH_CPFH_FOR_COM + + +POP_CPFH_FOR_COM macro trashReg + + ; + ; Unlink FrameHandlerExRecord from FS:0 chain + ; +ifdef _DEBUG + add esp, STACK_OVERWRITE_BARRIER_SIZE*4 +endif + mov trashReg, [esp + OFFSETOF__FrameHandlerExRecord__m_ExReg__Next] + mov fs:[0], trashReg + add esp, SIZEOF_FrameHandlerExRecord + +endm ; POP_CPFH_FOR_COM +endif ; FEATURE_COMINTEROP + +; +; FramedMethodFrame prolog +; +STUB_PROLOG macro + ; push ebp-frame + push ebp + mov ebp,esp + + ; save CalleeSavedRegisters + push ebx + push esi + push edi + + ; push ArgumentRegisters + push ecx + push edx +endm + +; +; FramedMethodFrame epilog +; +STUB_EPILOG macro + ; pop ArgumentRegisters + pop edx + pop ecx + + ; pop CalleeSavedRegisters + pop edi + pop esi + pop ebx + pop ebp +endm + +; +; FramedMethodFrame epilog +; +STUB_EPILOG_RETURN macro + ; pop ArgumentRegisters + add esp, 8 + + ; pop CalleeSavedRegisters + pop edi + pop esi + pop ebx + pop ebp +endm + +STUB_PROLOG_2_HIDDEN_ARGS macro + + ; + ; The stub arguments are where we want to setup the TransitionBlock. We will + ; setup the TransitionBlock later once we can trash them + ; + ; push ebp-frame + ; push ebp + ; mov ebp,esp + + ; save CalleeSavedRegisters + ; push ebx + + push esi + push edi + + ; push ArgumentRegisters + push ecx + push edx + + mov ecx, [esp + 4*4] + mov edx, [esp + 5*4] + + ; Setup up proper EBP frame now that the stub arguments can be trashed + mov [esp + 4*4],ebx + mov [esp + 5*4],ebp + lea ebp, [esp + 5*4] +endm + +ResetCurrentContext PROC stdcall public + LOCAL ctrlWord:WORD + + ; Clear the direction flag (used for rep instructions) + cld + + fnstcw ctrlWord + fninit ; reset FPU + and ctrlWord, 0f00h ; preserve precision and rounding control + or ctrlWord, 007fh ; mask all exceptions + fldcw ctrlWord ; preserve precision control + RET +ResetCurrentContext ENDP + +;Incoming: +; ESP+4: Pointer to buffer to which FPU state should be saved +_CaptureFPUContext@4 PROC public + + mov ecx, [esp+4] + fnstenv [ecx] + retn 4 + +_CaptureFPUContext@4 ENDP + +; Incoming: +; ESP+4: Pointer to buffer from which FPU state should be restored +_RestoreFPUContext@4 PROC public + + mov ecx, [esp+4] + fldenv [ecx] + retn 4 + +_RestoreFPUContext@4 ENDP + +ifndef FEATURE_CORECLR +ifdef _DEBUG +; For C++ exceptions, we desperately need to know the SEH code. This allows us to properly +; distinguish managed exceptions from C++ exceptions from standard SEH like hard stack overflow. +; We do this by providing our own handler that squirrels away the exception code and then +; defers to the C++ service. Fortunately, two symbols exist for the C++ symbol. +___CxxFrameHandler3 PROC public + + ; We don't know what arguments are passed to us (except for the first arg on stack) + ; It turns out that EAX is part of the non-standard calling convention of this + ; function. + + push eax + push edx + + cmp dword ptr [_gThreadTLSIndex], -1 + je Chain ; CLR is not initialized yet + + call _GetThread@0 + + test eax, eax ; not a managed thread + jz Chain + + mov edx, [esp + 0ch] ; grab the first argument + mov edx, [edx] ; grab the SEH exception code + + mov dword ptr [eax + Thread_m_LastCxxSEHExceptionCode], edx + +Chain: + + pop edx + + ; [esp] contains the value of EAX we must restore. We would like + ; [esp] to contain the address of the real imported CxxFrameHandler + ; so we can chain to it. + + mov eax, [__imp____CxxFrameHandler] + mov eax, [eax] + xchg [esp], eax + + ret + +___CxxFrameHandler3 ENDP +endif ; _DEBUG +endif ; FEATURE_CORECLR + +; Register CLR exception handlers defined on the C++ side with SAFESEH. +; Note that these directives must be in a file that defines symbols that will be used during linking, +; otherwise it's possible that the resulting .obj will completly be ignored by the linker and these +; directives will have no effect. +COMPlusFrameHandler proto c +.safeseh COMPlusFrameHandler + +COMPlusNestedExceptionHandler proto c +.safeseh COMPlusNestedExceptionHandler + +FastNExportExceptHandler proto c +.safeseh FastNExportExceptHandler + +UMThunkPrestubHandler proto c +.safeseh UMThunkPrestubHandler + +ifdef FEATURE_COMINTEROP +COMPlusFrameHandlerRevCom proto c +.safeseh COMPlusFrameHandlerRevCom +endif + +; Note that RtlUnwind trashes EBX, ESI and EDI, so this wrapper preserves them +CallRtlUnwind PROC stdcall public USES ebx esi edi, pEstablisherFrame :DWORD, callback :DWORD, pExceptionRecord :DWORD, retVal :DWORD + + push retVal + push pExceptionRecord + push callback + push pEstablisherFrame + call dword ptr [__imp__RtlUnwind@16] + + ; return 1 + push 1 + pop eax + + RET +CallRtlUnwind ENDP + +_ResumeAtJitEHHelper@4 PROC public + mov edx, [esp+4] ; edx = pContext (EHContext*) + + mov ebx, [edx+EHContext_Ebx] + mov esi, [edx+EHContext_Esi] + mov edi, [edx+EHContext_Edi] + mov ebp, [edx+EHContext_Ebp] + mov ecx, [edx+EHContext_Esp] + mov eax, [edx+EHContext_Eip] + mov [ecx-4], eax + mov eax, [edx+EHContext_Eax] + mov [ecx-8], eax + mov eax, [edx+EHContext_Ecx] + mov [ecx-0Ch], eax + mov eax, [edx+EHContext_Edx] + mov [ecx-10h], eax + lea esp, [ecx-10h] + pop edx + pop ecx + pop eax + ret +_ResumeAtJitEHHelper@4 ENDP + +; int __stdcall CallJitEHFilterHelper(size_t *pShadowSP, EHContext *pContext); +; on entry, only the pContext->Esp, Ebx, Esi, Edi, Ebp, and Eip are initialized +_CallJitEHFilterHelper@8 PROC public + push ebp + mov ebp, esp + push ebx + push esi + push edi + + pShadowSP equ [ebp+8] + pContext equ [ebp+12] + + mov eax, pShadowSP ; Write esp-4 to the shadowSP slot + test eax, eax + jz DONE_SHADOWSP_FILTER + mov ebx, esp + sub ebx, 4 + or ebx, SHADOW_SP_IN_FILTER_ASM + mov [eax], ebx + DONE_SHADOWSP_FILTER: + + mov edx, [pContext] + mov eax, [edx+EHContext_Eax] + mov ebx, [edx+EHContext_Ebx] + mov esi, [edx+EHContext_Esi] + mov edi, [edx+EHContext_Edi] + mov ebp, [edx+EHContext_Ebp] + + call dword ptr [edx+EHContext_Eip] +ifdef _DEBUG + nop ; Indicate that it is OK to call managed code directly from here +endif + + pop edi + pop esi + pop ebx + pop ebp ; don't use 'leave' here, as ebp as been trashed + retn 8 +_CallJitEHFilterHelper@8 ENDP + + +; void __stdcall CallJITEHFinallyHelper(size_t *pShadowSP, EHContext *pContext); +; on entry, only the pContext->Esp, Ebx, Esi, Edi, Ebp, and Eip are initialized +_CallJitEHFinallyHelper@8 PROC public + push ebp + mov ebp, esp + push ebx + push esi + push edi + + pShadowSP equ [ebp+8] + pContext equ [ebp+12] + + mov eax, pShadowSP ; Write esp-4 to the shadowSP slot + test eax, eax + jz DONE_SHADOWSP_FINALLY + mov ebx, esp + sub ebx, 4 + mov [eax], ebx + DONE_SHADOWSP_FINALLY: + + mov edx, [pContext] + mov eax, [edx+EHContext_Eax] + mov ebx, [edx+EHContext_Ebx] + mov esi, [edx+EHContext_Esi] + mov edi, [edx+EHContext_Edi] + mov ebp, [edx+EHContext_Ebp] + call dword ptr [edx+EHContext_Eip] +ifdef _DEBUG + nop ; Indicate that it is OK to call managed code directly from here +endif + + ; Reflect the changes to the context and only update non-volatile registers. + ; This will be used later to update REGDISPLAY + mov edx, [esp+12+12] + mov [edx+EHContext_Ebx], ebx + mov [edx+EHContext_Esi], esi + mov [edx+EHContext_Edi], edi + mov [edx+EHContext_Ebp], ebp + + pop edi + pop esi + pop ebx + pop ebp ; don't use 'leave' here, as ebp as been trashed + retn 8 +_CallJitEHFinallyHelper@8 ENDP + + +_GetSpecificCpuTypeAsm@0 PROC public + push ebx ; ebx is trashed by the cpuid calls + + ; See if the chip supports CPUID + pushfd + pop ecx ; Get the EFLAGS + mov eax, ecx ; Save for later testing + xor ecx, 200000h ; Invert the ID bit. + push ecx + popfd ; Save the updated flags. + pushfd + pop ecx ; Retrieve the updated flags + xor ecx, eax ; Test if it actually changed (bit set means yes) + push eax + popfd ; Restore the flags + + test ecx, 200000h + jz Assume486 + + xor eax, eax + cpuid + + test eax, eax + jz Assume486 ; brif CPUID1 not allowed + + mov eax, 1 + cpuid + + ; filter out everything except family and model + ; Note that some multi-procs have different stepping number for each proc + and eax, 0ff0h + + jmp CpuTypeDone + +Assume486: + mov eax, 0400h ; report 486 +CpuTypeDone: + pop ebx + retn +_GetSpecificCpuTypeAsm@0 ENDP + +; DWORD __stdcall GetSpecificCpuFeaturesAsm(DWORD *pInfo); +_GetSpecificCpuFeaturesAsm@4 PROC public + push ebx ; ebx is trashed by the cpuid calls + + ; See if the chip supports CPUID + pushfd + pop ecx ; Get the EFLAGS + mov eax, ecx ; Save for later testing + xor ecx, 200000h ; Invert the ID bit. + push ecx + popfd ; Save the updated flags. + pushfd + pop ecx ; Retrieve the updated flags + xor ecx, eax ; Test if it actually changed (bit set means yes) + push eax + popfd ; Restore the flags + + test ecx, 200000h + jz CpuFeaturesFail + + xor eax, eax + cpuid + + test eax, eax + jz CpuFeaturesDone ; br if CPUID1 not allowed + + mov eax, 1 + cpuid + mov eax, edx ; return all feature flags + mov edx, [esp+8] + test edx, edx + jz CpuFeaturesDone + mov [edx],ebx ; return additional useful information + jmp CpuFeaturesDone + +CpuFeaturesFail: + xor eax, eax ; Nothing to report +CpuFeaturesDone: + pop ebx + retn 4 +_GetSpecificCpuFeaturesAsm@4 ENDP + + +;----------------------------------------------------------------------- +; The out-of-line portion of the code to enable preemptive GC. +; After the work is done, the code jumps back to the "pRejoinPoint" +; which should be emitted right after the inline part is generated. +; +; Assumptions: +; ebx = Thread +; Preserves +; all registers except ecx. +; +;----------------------------------------------------------------------- +_StubRareEnable proc public + push eax + push edx + + push ebx + call _StubRareEnableWorker@4 + + pop edx + pop eax + retn +_StubRareEnable ENDP + +ifdef FEATURE_COMINTEROP +_StubRareDisableHR proc public + push edx + + push ebx ; Thread + call _StubRareDisableHRWorker@4 + + pop edx + retn +_StubRareDisableHR ENDP +endif ; FEATURE_COMINTEROP + +_StubRareDisableTHROW proc public + push eax + push edx + + push ebx ; Thread + call _StubRareDisableTHROWWorker@4 + + pop edx + pop eax + retn +_StubRareDisableTHROW endp + + +ifdef FEATURE_MIXEDMODE +; VOID __stdcall IJWNOADThunkJumpTarget(void); +; This routine is used by the IJWNOADThunk to determine the callsite of the domain-specific stub to call. +_IJWNOADThunkJumpTarget@0 proc public + + push ebp + mov ebp, esp + + ; EAX contains IJWNOADThunk* + ; Must retain ebx, ecx, edx, esi, edi. + + ; save ebx - holds the IJWNOADThunk* + ; save ecx - holds the current AppDomain ID. + ; save edx - holds the cached AppDomain ID. + push ebx + push ecx + + ; put the IJWNOADThunk into ebx for safe keeping + mov ebx, eax + + ; get thread - assumes registers are preserved + call _GetThread@0 + + ; if thread is null, go down un-optimized path + test eax,eax + jz cachemiss + + ; get current domain - assumes registers are preserved + call _GetAppDomain@0 + + ; if domain is null, go down un-optimized path + test eax,eax + jz cachemiss + + ; get the current appdomain id + mov ecx, [eax + AppDomain__m_dwId] + + ; test it against each cache location + mov eax, ebx + add eax, IJWNOADThunk__m_cache + cmp ecx, [eax] + je cachehit + + add eax, IJWNOADThunk__NextCacheOffset + cmp ecx, [eax] + je cachehit + + add eax, IJWNOADThunk__NextCacheOffset + cmp ecx, [eax] + je cachehit + + add eax, IJWNOADThunk__NextCacheOffset + cmp ecx, [eax] + je cachehit + +cachemiss: + ; save extra registers + push edx + push esi + push edi + + ; call unoptimized path + push ebx ; only arg is IJWNOADThunk* + call _IJWNOADThunkJumpTargetHelper@4 + + ; restore extra registers + pop edi + pop esi + pop edx + + ; jump back up to the epilog + jmp complete + +cachehit: + ; found a matching ADID, get the code addr. + mov eax, [eax + IJWNOADThunk__CodeAddrOffsetFromADID] + + ; if the callsite is null, go down the un-optimized path + test eax, eax + jz cachemiss + +complete: + ; restore regs + pop ecx + pop ebx + + mov esp, ebp + pop ebp + + ; Jump to callsite + jmp eax + + ; This will never be executed. It is just to help out stack-walking logic + ; which disassembles the epilog to unwind the stack. + ret +_IJWNOADThunkJumpTarget@0 endp + +endif + +InternalExceptionWorker proc public + pop edx ; recover RETADDR + add esp, eax ; release caller's args + push edx ; restore RETADDR + jmp @JIT_InternalThrow@4 +InternalExceptionWorker endp + +; EAX -> number of caller arg bytes on the stack that we must remove before going +; to the throw helper, which assumes the stack is clean. +_ArrayOpStubNullException proc public + ; kFactorReg and kTotalReg could not have been modified, but let's pop + ; them anyway for consistency and to avoid future bugs. + pop esi + pop edi + mov ecx, CORINFO_NullReferenceException_ASM + jmp InternalExceptionWorker +_ArrayOpStubNullException endp + +; EAX -> number of caller arg bytes on the stack that we must remove before going +; to the throw helper, which assumes the stack is clean. +_ArrayOpStubRangeException proc public + ; kFactorReg and kTotalReg could not have been modified, but let's pop + ; them anyway for consistency and to avoid future bugs. + pop esi + pop edi + mov ecx, CORINFO_IndexOutOfRangeException_ASM + jmp InternalExceptionWorker +_ArrayOpStubRangeException endp + +; EAX -> number of caller arg bytes on the stack that we must remove before going +; to the throw helper, which assumes the stack is clean. +_ArrayOpStubTypeMismatchException proc public + ; kFactorReg and kTotalReg could not have been modified, but let's pop + ; them anyway for consistency and to avoid future bugs. + pop esi + pop edi + mov ecx, CORINFO_ArrayTypeMismatchException_ASM + jmp InternalExceptionWorker +_ArrayOpStubTypeMismatchException endp + +;------------------------------------------------------------------------------ +; This helper routine enregisters the appropriate arguments and makes the +; actual call. +;------------------------------------------------------------------------------ +; void STDCALL CallDescrWorkerInternal(CallDescrWorkerParams * pParams) +CallDescrWorkerInternal PROC stdcall public USES EBX, + pParams: DWORD + + mov ebx, pParams + + mov ecx, [ebx+CallDescrData__numStackSlots] + mov eax, [ebx+CallDescrData__pSrc] ; copy the stack + test ecx, ecx + jz donestack + lea eax, [eax+4*ecx-4] ; last argument + push dword ptr [eax] + dec ecx + jz donestack + sub eax, 4 + push dword ptr [eax] + dec ecx + jz donestack +stackloop: + sub eax, 4 + push dword ptr [eax] + dec ecx + jnz stackloop +donestack: + + ; now we must push each field of the ArgumentRegister structure + mov eax, [ebx+CallDescrData__pArgumentRegisters] + mov edx, dword ptr [eax] + mov ecx, dword ptr [eax+4] + + call [ebx+CallDescrData__pTarget] +ifdef _DEBUG + nop ; This is a tag that we use in an assert. Fcalls expect to + ; be called from Jitted code or from certain blessed call sites like + ; this one. (See HelperMethodFrame::InsureInit) +endif + + ; Save FP return value if necessary + mov ecx, [ebx+CallDescrData__fpReturnSize] + cmp ecx, 0 + je ReturnsInt + + cmp ecx, 4 + je ReturnsFloat + cmp ecx, 8 + je ReturnsDouble + ; unexpected + jmp Epilog + +ReturnsInt: + mov [ebx+CallDescrData__returnValue], eax + mov [ebx+CallDescrData__returnValue+4], edx + +Epilog: + RET + +ReturnsFloat: + fstp dword ptr [ebx+CallDescrData__returnValue] ; Spill the Float return value + jmp Epilog + +ReturnsDouble: + fstp qword ptr [ebx+CallDescrData__returnValue] ; Spill the Double return value + jmp Epilog + +CallDescrWorkerInternal endp + +ifdef _DEBUG +; int __fastcall HelperMethodFrameRestoreState(HelperMethodFrame*, struct MachState *) +FASTCALL_FUNC HelperMethodFrameRestoreState,8 + mov eax, edx ; eax = MachState* +else +; int __fastcall HelperMethodFrameRestoreState(struct MachState *) +FASTCALL_FUNC HelperMethodFrameRestoreState,4 + mov eax, ecx ; eax = MachState* +endif + ; restore the registers from the m_MachState stucture. Note that + ; we only do this for register that where not saved on the stack + ; at the time the machine state snapshot was taken. + + cmp [eax+MachState__pRetAddr], 0 + +ifdef _DEBUG + jnz noConfirm + push ebp + push ebx + push edi + push esi + push ecx ; HelperFrame* + call _HelperMethodFrameConfirmState@20 + ; on return, eax = MachState* + cmp [eax+MachState__pRetAddr], 0 +noConfirm: +endif + + jz doRet + + lea edx, [eax+MachState__esi] ; Did we have to spill ESI + cmp [eax+MachState__pEsi], edx + jnz SkipESI + mov esi, [edx] ; Then restore it +SkipESI: + + lea edx, [eax+MachState__edi] ; Did we have to spill EDI + cmp [eax+MachState__pEdi], edx + jnz SkipEDI + mov edi, [edx] ; Then restore it +SkipEDI: + + lea edx, [eax+MachState__ebx] ; Did we have to spill EBX + cmp [eax+MachState__pEbx], edx + jnz SkipEBX + mov ebx, [edx] ; Then restore it +SkipEBX: + + lea edx, [eax+MachState__ebp] ; Did we have to spill EBP + cmp [eax+MachState__pEbp], edx + jnz SkipEBP + mov ebp, [edx] ; Then restore it +SkipEBP: + +doRet: + xor eax, eax + retn +FASTCALL_ENDFUNC HelperMethodFrameRestoreState + + +ifndef FEATURE_IMPLICIT_TLS +;--------------------------------------------------------------------------- +; Portable GetThread() function: used if no platform-specific optimizations apply. +; This is in assembly code because we count on edx not getting trashed on calls +; to this function. +;--------------------------------------------------------------------------- +; Thread* __stdcall GetThreadGeneric(void); +GetThreadGeneric PROC stdcall public USES ecx edx + +ifdef _DEBUG + cmp dword ptr [_gThreadTLSIndex], -1 + jnz @F + int 3 +@@: +endif +ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK + ; non-PAL, debug-only GetThreadGeneric should defer to GetThreadGenericFullCheck + ; to do extra contract enforcement. (See GetThreadGenericFullCheck for details.) + ; This code is intentionally not added to asmhelper.s, as this enforcement is only + ; implemented for non-PAL builds. + call GetThreadGenericFullCheck +else + push dword ptr [_gThreadTLSIndex] + call dword ptr [__imp__TlsGetValue@4] +endif + ret +GetThreadGeneric ENDP + +;--------------------------------------------------------------------------- +; Portable GetAppdomain() function: used if no platform-specific optimizations apply. +; This is in assembly code because we count on edx not getting trashed on calls +; to this function. +;--------------------------------------------------------------------------- +; Appdomain* __stdcall GetAppDomainGeneric(void); +GetAppDomainGeneric PROC stdcall public USES ecx edx + +ifdef _DEBUG + cmp dword ptr [_gAppDomainTLSIndex], -1 + jnz @F + int 3 +@@: +endif + + push dword ptr [_gAppDomainTLSIndex] + call dword ptr [__imp__TlsGetValue@4] + ret +GetAppDomainGeneric ENDP +endif + + +ifdef FEATURE_HIJACK + +; A JITted method's return address was hijacked to return to us here. +; VOID OnHijackTripThread() +OnHijackTripThread PROC stdcall public + + ; Don't fiddle with this unless you change HijackFrame::UpdateRegDisplay + ; and HijackArgs + push eax ; make room for the real return address (Eip) + push ebp + push eax + push ecx + push edx + push ebx + push esi + push edi + + ; unused space for floating point state + sub esp,12 + + push esp + call _OnHijackWorker@4 + + ; unused space for floating point state + add esp,12 + + pop edi + pop esi + pop ebx + pop edx + pop ecx + pop eax + pop ebp + retn ; return to the correct place, adjusted by our caller +OnHijackTripThread ENDP + +; VOID OnHijackFPTripThread() +OnHijackFPTripThread PROC stdcall public + + ; Don't fiddle with this unless you change HijackFrame::UpdateRegDisplay + ; and HijackArgs + push eax ; make room for the real return address (Eip) + push ebp + push eax + push ecx + push edx + push ebx + push esi + push edi + + sub esp,12 + + ; save top of the floating point stack (there is return value passed in it) + ; save full 10 bytes to avoid precision loss + fstp tbyte ptr [esp] + + push esp + call _OnHijackWorker@4 + + ; restore top of the floating point stack + fld tbyte ptr [esp] + + add esp,12 + + pop edi + pop esi + pop ebx + pop edx + pop ecx + pop eax + pop ebp + retn ; return to the correct place, adjusted by our caller +OnHijackFPTripThread ENDP + +endif ; FEATURE_HIJACK + + +; Note that the debugger skips this entirely when doing SetIP, +; since COMPlusCheckForAbort should always return 0. Excep.cpp:LeaveCatch +; asserts that to be true. If this ends up doing more work, then the +; debugger may need additional support. +; void __stdcall JIT_EndCatch(); +JIT_EndCatch PROC stdcall public + + ; make temp storage for return address, and push the address of that + ; as the last arg to COMPlusEndCatch + mov ecx, [esp] + push ecx; + push esp; + + ; push the rest of COMPlusEndCatch's args, right-to-left + push esi + push edi + push ebx + push ebp + + call _COMPlusEndCatch@20 ; returns old esp value in eax, stores jump address + ; now eax = new esp, [esp] = new eip + + pop edx ; edx = new eip + mov esp, eax ; esp = new esp + jmp edx ; eip = new eip + +JIT_EndCatch ENDP + +;========================================================================== +; This function is reached only via the embedded ImportThunkGlue code inside +; an NDirectMethodDesc. It's purpose is to load the DLL associated with an +; N/Direct method, then backpatch the DLL target into the methoddesc. +; +; Initial state: +; +; Preemptive GC is *enabled*: we are actually in an unmanaged state. +; +; +; [esp+...] - The *unmanaged* parameters to the DLL target. +; [esp+4] - Return address back into the JIT'ted code that made +; the DLL call. +; [esp] - Contains the "return address." Because we got here +; thru a call embedded inside a MD, this "return address" +; gives us an easy to way to find the MD (which was the +; whole purpose of the embedded call manuever.) +; +; +; +;========================================================================== +_NDirectImportThunk@0 proc public + + ; Preserve argument registers + push ecx + push edx + + ; Invoke the function that does the real work. + push eax + call _NDirectImportWorker@4 + + ; Restore argument registers + pop edx + pop ecx + + ; If we got back from NDirectImportWorker, the MD has been successfully + ; linked and "eax" contains the DLL target. Proceed to execute the + ; original DLL call. + jmp eax ; Jump to DLL target +_NDirectImportThunk@0 endp + +;========================================================================== +; The call in fixup precode initally points to this function. +; The pupose of this function is to load the MethodDesc and forward the call the prestub. +_PrecodeFixupThunk@0 proc public + + pop eax ; Pop the return address. It points right after the call instruction in the precode. + push esi + push edi + + ; Inline computation done by FixupPrecode::GetMethodDesc() + movzx esi,byte ptr [eax+2] ; m_PrecodeChunkIndex + movzx edi,byte ptr [eax+1] ; m_MethodDescChunkIndex + mov eax,dword ptr [eax+esi*8+3] + lea eax,[eax+edi*4] + + pop edi + pop esi + jmp _ThePreStub@0 + +_PrecodeFixupThunk@0 endp + +; LPVOID __stdcall CTPMethodTable__CallTargetHelper2( +; const void *pTarget, +; LPVOID pvFirst, +; LPVOID pvSecond) +CTPMethodTable__CallTargetHelper2 proc stdcall public, + pTarget : DWORD, + pvFirst : DWORD, + pvSecond : DWORD + mov ecx, pvFirst + mov edx, pvSecond + + call pTarget +ifdef _DEBUG + nop ; Mark this as a special call site that can + ; directly call unmanaged code +endif + ret +CTPMethodTable__CallTargetHelper2 endp + +; LPVOID __stdcall CTPMethodTable__CallTargetHelper3( +; const void *pTarget, +; LPVOID pvFirst, +; LPVOID pvSecond, +; LPVOID pvThird) +CTPMethodTable__CallTargetHelper3 proc stdcall public, + pTarget : DWORD, + pvFirst : DWORD, + pvSecond : DWORD, + pvThird : DWORD + push pvThird + + mov ecx, pvFirst + mov edx, pvSecond + + call pTarget +ifdef _DEBUG + nop ; Mark this as a special call site that can + ; directly call unmanaged code +endif + ret +CTPMethodTable__CallTargetHelper3 endp + + +; void __stdcall setFPReturn(int fpSize, INT64 retVal) +_setFPReturn@12 proc public + mov ecx, [esp+4] + + ; leave the return value in eax:edx if it is not the floating point case + mov eax, [esp+8] + mov edx, [esp+12] + + cmp ecx, 4 + jz setFPReturn4 + + cmp ecx, 8 + jnz setFPReturnNot8 + fld qword ptr [esp+8] +setFPReturnNot8: + retn 12 + +setFPReturn4: + fld dword ptr [esp+8] + retn 12 +_setFPReturn@12 endp + +; void __stdcall getFPReturn(int fpSize, INT64 *pretVal) +_getFPReturn@8 proc public + mov ecx, [esp+4] + mov eax, [esp+8] + cmp ecx, 4 + jz getFPReturn4 + + cmp ecx, 8 + jnz getFPReturnNot8 + fstp qword ptr [eax] +getFPReturnNot8: + retn 8 + +getFPReturn4: + fstp dword ptr [eax] + retn 8 +_getFPReturn@8 endp + +; void __stdcall UM2MThunk_WrapperHelper(void *pThunkArgs, +; int argLen, +; void *pAddr, +; UMEntryThunk *pEntryThunk, +; Thread *pThread) +UM2MThunk_WrapperHelper proc stdcall public, + pThunkArgs : DWORD, + argLen : DWORD, + pAddr : DWORD, + pEntryThunk : DWORD, + pThread : DWORD + UNREFERENCED argLen + + push ebx + + mov eax, pEntryThunk + mov ecx, pThread + mov ebx, pThunkArgs + call pAddr + + pop ebx + + ret +UM2MThunk_WrapperHelper endp + +; VOID __cdecl UMThunkStubRareDisable() +; +; @todo: this is very similar to StubRareDisable +; +_UMThunkStubRareDisable proc public + push eax + push ecx + + push eax ; Push the UMEntryThunk + push ecx ; Push thread + call _UMThunkStubRareDisableWorker@8 + + pop ecx + pop eax + retn +_UMThunkStubRareDisable endp + + +;+---------------------------------------------------------------------------- +; +; Method: CRemotingServices::CheckForContextMatch public +; +; Synopsis: This code generates a check to see if the current context and +; the context of the proxy match. +; +;+---------------------------------------------------------------------------- +; +; returns zero if contexts match +; returns non-zero if contexts do not match +; +; UINT_PTR __stdcall CRemotingServices__CheckForContextMatch(Object* pStubData) +ifdef FEATURE_REMOTING +_CRemotingServices__CheckForContextMatch@4 proc public + push ebx ; spill ebx + mov ebx, [eax+4] ; Get the internal context id by unboxing + ; the stub data + call _GetThread@0 ; Get the current thread, assumes that the + ; registers are preserved + mov eax, [eax+Thread_m_Context] ; Get the current context from the + ; thread + sub eax, ebx ; Get the pointer to the context from the + ; proxy and compare with the current context + pop ebx ; restore the value of ebx + retn +_CRemotingServices__CheckForContextMatch@4 endp +endif ; FEATURE_REMOTING + +;+---------------------------------------------------------------------------- +; +; Method: CRemotingServices::DispatchInterfaceCall public +; +; Synopsis: +; Push that method desc on the stack and jump to the +; transparent proxy stub to execute the call. +; WARNING!! This MethodDesc is not the methoddesc in the vtable +; of the object instead it is the methoddesc in the vtable of +; the interface class. Since we use the MethodDesc only to probe +; the stack via the signature of the method call we are safe. +; If we want to get any object vtable/class specific +; information this is not safe. +; +; +;+---------------------------------------------------------------------------- +; void __stdcall CRemotingServices__DispatchInterfaceCall() +ifdef FEATURE_REMOTING +_CRemotingServices__DispatchInterfaceCall@0 proc public + ; push MethodDesc* passed in eax by precode and forward to the worker + push eax + + ; NOTE: At this point the stack looks like + ; + ; esp---> saved MethodDesc of Interface method + ; return addr of calling function + ; + mov eax, [ecx + TransparentProxyObject___stubData] + call [ecx + TransparentProxyObject___stub] +ifdef _DEBUG + nop ; Mark this as a special call site that can directly + ; call managed code +endif + test eax, eax + jnz CtxMismatch + jmp _InContextTPQuickDispatchAsmStub@0 + +CtxMismatch: + pop eax ; restore MethodDesc * + jmp _TransparentProxyStub_CrossContext@0 ; jump to slow TP stub +_CRemotingServices__DispatchInterfaceCall@0 endp +endif ; FEATURE_REMOTING + + +;+---------------------------------------------------------------------------- +; +; Method: CRemotingServices::CallFieldGetter private +; +; Synopsis: Calls the field getter function (Object::__FieldGetter) in +; managed code by setting up the stack and calling the target +; +; +;+---------------------------------------------------------------------------- +; void __stdcall CRemotingServices__CallFieldGetter( +; MethodDesc *pMD, +; LPVOID pThis, +; LPVOID pFirst, +; LPVOID pSecond, +; LPVOID pThird) +ifdef FEATURE_REMOTING +CRemotingServices__CallFieldGetter proc stdcall public, + pMD : DWORD, + pThis : DWORD, + pFirst : DWORD, + pSecond : DWORD, + pThird : DWORD + + push [pSecond] ; push the second argument on the stack + push [pThird] ; push the third argument on the stack + + mov ecx, [pThis] ; enregister pThis, the 'this' pointer + mov edx, [pFirst] ; enregister pFirst, the first argument + + mov eax, [pMD] ; load MethodDesc of object::__FieldGetter + call _TransparentProxyStub_CrossContext@0 ; call the TP stub + + ret +CRemotingServices__CallFieldGetter endp +endif ; FEATURE_REMOTING + +;+---------------------------------------------------------------------------- +; +; Method: CRemotingServices::CallFieldSetter private +; +; Synopsis: Calls the field setter function (Object::__FieldSetter) in +; managed code by setting up the stack and calling the target +; +; +;+---------------------------------------------------------------------------- +; void __stdcall CRemotingServices__CallFieldSetter( +; MethodDesc *pMD, +; LPVOID pThis, +; LPVOID pFirst, +; LPVOID pSecond, +; LPVOID pThird) +ifdef FEATURE_REMOTING +CRemotingServices__CallFieldSetter proc stdcall public, + pMD : DWORD, + pThis : DWORD, + pFirst : DWORD, + pSecond : DWORD, + pThird : DWORD + + push [pSecond] ; push the field name (second arg) + push [pThird] ; push the object (third arg) on the stack + + mov ecx, [pThis] ; enregister pThis, the 'this' pointer + mov edx, [pFirst] ; enregister the first argument + + mov eax, [pMD] ; load MethodDesc of object::__FieldGetter + call _TransparentProxyStub_CrossContext@0 ; call the TP stub + + ret +CRemotingServices__CallFieldSetter endp +endif ; FEATURE_REMOTING + +;+---------------------------------------------------------------------------- +; +; Method: CTPMethodTable::GenericCheckForContextMatch private +; +; Synopsis: Calls the stub in the TP & returns TRUE if the contexts +; match, FALSE otherwise. +; +; Note: 1. Called during FieldSet/Get, used for proxy extensibility +; +;+---------------------------------------------------------------------------- +; BOOL __stdcall CTPMethodTable__GenericCheckForContextMatch(Object* orTP) +ifdef FEATURE_REMOTING +CTPMethodTable__GenericCheckForContextMatch proc stdcall public uses ecx, tp : DWORD + + mov ecx, [tp] + mov eax, [ecx + TransparentProxyObject___stubData] + call [ecx + TransparentProxyObject___stub] +ifdef _DEBUG + nop ; Mark this as a special call site that can directly + ; call managed code +endif + test eax, eax + mov eax, 0 + setz al + ; NOTE: In the CheckForXXXMatch stubs (for URT ctx/ Ole32 ctx) eax is + ; non-zero if contexts *do not* match & zero if they do. + ret +CTPMethodTable__GenericCheckForContextMatch endp +endif ; FEATURE_REMOTING + + +; void __stdcall JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle) +_JIT_ProfilerEnterLeaveTailcallStub@4 proc public + ; this function must preserve all registers, including scratch + retn 4 +_JIT_ProfilerEnterLeaveTailcallStub@4 endp + +; +; Used to get the current instruction pointer value +; +; UINT_PTR __stdcall GetCurrentIP(void); +_GetCurrentIP@0 proc public + mov eax, [esp] + retn +_GetCurrentIP@0 endp + +; LPVOID __stdcall GetCurrentSP(void); +_GetCurrentSP@0 proc public + mov eax, esp + retn +_GetCurrentSP@0 endp + + +; void __stdcall ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID); +_ProfileEnterNaked@4 proc public + push esi + push edi + + ; + ; Push in reverse order the fields of ProfilePlatformSpecificData + ; + push dword ptr [esp+8] ; EIP of the managed code that we return to. -- struct ip field + push ebp ; Methods are always EBP framed + add [esp], 8 ; Skip past the return IP, straight to the stack args that were passed to our caller + ; Skip past saved EBP value: 4 bytes + ; - plus return address from caller's caller: 4 bytes + ; + ; Assuming Foo() calls Bar(), and Bar() calls ProfileEnterNake() as illustrated (stack + ; grows up). We want to get what Foo() passed on the stack to Bar(), so we need to pass + ; the return address from caller's caller which is Foo() in this example. + ; + ; ProfileEnterNaked() + ; Bar() + ; Foo() + ; + ; [ESP] is now the ESP of caller's caller pointing to the arguments to the caller. + + push ecx ; -- struct ecx field + push edx ; -- struct edx field + push eax ; -- struct eax field + push 0 ; Create buffer space in the structure -- struct floatingPointValuePresent field + push 0 ; Create buffer space in the structure -- struct floatBuffer field + push 0 ; Create buffer space in the structure -- struct doubleBuffer2 field + push 0 ; Create buffer space in the structure -- struct doubleBuffer1 field + push 0 ; Create buffer space in the structure -- struct functionId field + + mov edx, esp ; the address of the Platform structure + mov ecx, [esp+52]; The functionIDOrClientID parameter that was pushed to FunctionEnter + ; Skip past ProfilePlatformSpecificData we pushed: 40 bytes + ; - plus saved edi, esi : 8 bytes + ; - plus return address from caller: 4 bytes + + call @ProfileEnter@8 + + add esp, 20 ; Remove buffer space + pop eax + pop edx + pop ecx + add esp, 8 ; Remove buffer space + pop edi + pop esi + + retn 4 +_ProfileEnterNaked@4 endp + +; void __stdcall ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID); +_ProfileLeaveNaked@4 proc public + push ecx ; We do not strictly need to save ECX, however + ; emitNoGChelper(CORINFO_HELP_PROF_FCN_LEAVE) returns true in the JITcompiler + push edx ; Return value may be in EAX:EDX + + ; + ; Push in reverse order the fields of ProfilePlatformSpecificData + ; + push dword ptr [esp+8] ; EIP of the managed code that we return to. -- struct ip field + push ebp ; Methods are always EBP framed + add [esp], 8 ; Skip past the return IP, straight to the stack args that were passed to our caller + ; Skip past saved EBP value: 4 bytes + ; - plus return address from caller's caller: 4 bytes + ; + ; Assuming Foo() calls Bar(), and Bar() calls ProfileEnterNake() as illustrated (stack + ; grows up). We want to get what Foo() passed on the stack to Bar(), so we need to pass + ; the return address from caller's caller which is Foo() in this example. + ; + ; ProfileEnterNaked() + ; Bar() + ; Foo() + ; + ; [ESP] is now the ESP of caller's caller pointing to the arguments to the caller. + + push ecx ; -- struct ecx field + push edx ; -- struct edx field + push eax ; -- struct eax field + + ; Check if we need to save off any floating point registers + fstsw ax + and ax, 3800h ; Check the top-of-fp-stack bits + cmp ax, 0 ; If non-zero, we have something to save + jnz SaveFPReg + + push 0 ; Create buffer space in the structure -- struct floatingPointValuePresent field + push 0 ; Create buffer space in the structure -- struct floatBuffer field + push 0 ; Create buffer space in the structure -- struct doubleBuffer2 field + push 0 ; Create buffer space in the structure -- struct doubleBuffer1 field + jmp Continue + +SaveFPReg: + push 1 ; mark that a float value is present -- struct floatingPointValuePresent field + sub esp, 4 ; Make room for the FP value + fst dword ptr [esp] ; Copy the FP value to the buffer as a float -- struct floatBuffer field + sub esp, 8 ; Make room for the FP value + fstp qword ptr [esp] ; Copy FP values to the buffer as a double -- struct doubleBuffer1 and doubleBuffer2 fields + +Continue: + push 0 ; Create buffer space in the structure -- struct functionId field + + mov edx, esp ; the address of the Platform structure + mov ecx, [esp+52]; The clientData that was pushed to FunctionEnter + ; Skip past ProfilePlatformSpecificData we pushed: 40 bytes + ; - plus saved edx, ecx : 8 bytes + ; - plus return address from caller: 4 bytes + + call @ProfileLeave@8 + + ; + ; Now see if we have to restore and floating point registers + ; + + cmp [esp + 16], 0 + jz NoRestore + + fld qword ptr [esp + 4] + +NoRestore: + + add esp, 20 ; Remove buffer space + pop eax + add esp, 16 ; Remove buffer space + pop edx + pop ecx + retn 4 +_ProfileLeaveNaked@4 endp + + +; void __stdcall ProfileTailcallNaked(FunctionIDOrClientID functionIDOrClientID); +_ProfileTailcallNaked@4 proc public + push ecx + push edx + + ; + ; Push in reverse order the fields of ProfilePlatformSpecificData + ; + push dword ptr [esp+8] ; EIP of the managed code that we return to. -- struct ip field + push ebp ; Methods are always EBP framed + add [esp], 8 ; Skip past the return IP, straight to the stack args that were passed to our caller + ; Skip past saved EBP value: 4 bytes + ; - plus return address from caller's caller: 4 bytes + ; + ; Assuming Foo() calls Bar(), and Bar() calls ProfileEnterNake() as illustrated (stack + ; grows up). We want to get what Foo() passed on the stack to Bar(), so we need to pass + ; the return address from caller's caller which is Foo() in this example. + ; + ; ProfileEnterNaked() + ; Bar() + ; Foo() + ; + ; [ESP] is now the ESP of caller's caller pointing to the arguments to the caller. + + push ecx ; -- struct ecx field + push edx ; -- struct edx field + push eax ; -- struct eax field + push 0 ; Create buffer space in the structure -- struct floatingPointValuePresent field + push 0 ; Create buffer space in the structure -- struct floatBuffer field + push 0 ; Create buffer space in the structure -- struct doubleBuffer2 field + push 0 ; Create buffer space in the structure -- struct doubleBuffer1 field + push 0 ; Create buffer space in the structure -- struct functionId field + + mov edx, esp ; the address of the Platform structure + mov ecx, [esp+52]; The clientData that was pushed to FunctionEnter + ; Skip past ProfilePlatformSpecificData we pushed: 40 bytes + ; - plus saved edx, ecx : 8 bytes + ; - plus return address from caller: 4 bytes + + call @ProfileTailcall@8 + + add esp, 40 ; Remove buffer space + pop edx + pop ecx + retn 4 +_ProfileTailcallNaked@4 endp + +;========================================================================== +; Invoked for vararg forward P/Invoke calls as a stub. +; Except for secret return buffer, arguments come on the stack so EDX is available as scratch. +; EAX - the NDirectMethodDesc +; ECX - may be return buffer address +; [ESP + 4] - the VASigCookie +; +_VarargPInvokeStub@0 proc public + ; EDX <- VASigCookie + mov edx, [esp + 4] ; skip retaddr + + mov edx, [edx + VASigCookie__StubOffset] + test edx, edx + + jz GoCallVarargWorker + ; --------------------------------------- + + ; EAX contains MD ptr for the IL stub + jmp edx + +GoCallVarargWorker: + ; + ; MD ptr in EAX, VASigCookie ptr at [esp+4] + ; + + STUB_PROLOG + + mov esi, esp + + ; save pMD + push eax + + push eax ; pMD + push dword ptr [esi + 4*7] ; pVaSigCookie + push esi ; pTransitionBlock + + call _VarargPInvokeStubWorker@12 + + ; restore pMD + pop eax + + STUB_EPILOG + + ; jump back to the helper - this time it won't come back here as the stub already exists + jmp _VarargPInvokeStub@0 + +_VarargPInvokeStub@0 endp + +;========================================================================== +; Invoked for marshaling-required unmanaged CALLI calls as a stub. +; EAX - the unmanaged target +; ECX, EDX - arguments +; [ESP + 4] - the VASigCookie +; +_GenericPInvokeCalliHelper@0 proc public + ; save the target + push eax + + ; EAX <- VASigCookie + mov eax, [esp + 8] ; skip target and retaddr + + mov eax, [eax + VASigCookie__StubOffset] + test eax, eax + + jz GoCallCalliWorker + ; --------------------------------------- + + push eax + + ; stack layout at this point: + ; + ; | ... | + ; | stack arguments | ESP + 16 + ; +----------------------+ + ; | VASigCookie* | ESP + 12 + ; +----------------------+ + ; | return address | ESP + 8 + ; +----------------------+ + ; | CALLI target address | ESP + 4 + ; +----------------------+ + ; | stub entry point | ESP + 0 + ; ------------------------ + + ; remove VASigCookie from the stack + mov eax, [esp + 8] + mov [esp + 12], eax + + ; move stub entry point below the RA + mov eax, [esp] + mov [esp + 8], eax + + ; load EAX with the target address + pop eax + pop eax + + ; stack layout at this point: + ; + ; | ... | + ; | stack arguments | ESP + 8 + ; +----------------------+ + ; | return address | ESP + 4 + ; +----------------------+ + ; | stub entry point | ESP + 0 + ; ------------------------ + + ; CALLI target address is in EAX + ret + +GoCallCalliWorker: + ; the target is on the stack and will become m_Datum of PInvokeCalliFrame + ; call the stub generating worker + pop eax + + ; + ; target ptr in EAX, VASigCookie ptr in EDX + ; + + STUB_PROLOG + + mov esi, esp + + ; save target + push eax + + push eax ; unmanaged target + push dword ptr [esi + 4*7] ; pVaSigCookie (first stack argument) + push esi ; pTransitionBlock + + call _GenericPInvokeCalliStubWorker@12 + + ; restore target + pop eax + + STUB_EPILOG + + ; jump back to the helper - this time it won't come back here as the stub already exists + jmp _GenericPInvokeCalliHelper@0 + +_GenericPInvokeCalliHelper@0 endp + +ifdef MDA_SUPPORTED + +;========================================================================== +; Invoked from on-the-fly generated stubs when the stack imbalance MDA is +; enabled. The common low-level work for both direct P/Invoke and unmanaged +; delegate P/Invoke happens here. PInvokeStackImbalanceWorker is where the +; actual imbalance check is implemented. +; [ESP + 4] - the StackImbalanceCookie +; [EBP + 8] - stack arguments (EBP frame pushed by the calling stub) +; +_PInvokeStackImbalanceHelper@0 proc public + ; StackImbalanceCookie to EBX + push ebx + lea ebx, [esp + 8] + + push esi + push edi + + ; copy stack args + mov edx, ecx + mov ecx, [ebx + StackImbalanceCookie__m_dwStackArgSize] + sub esp, ecx + + shr ecx, 2 + lea edi, [esp] + lea esi, [ebp + 8] + + cld + rep movsd + + ; record pre-call ESP + mov [ebx + StackImbalanceCookie__m_dwSavedEsp], esp + + ; call the target (restore ECX in case it's a thiscall) + mov ecx, edx + call [ebx + StackImbalanceCookie__m_pTarget] + + ; record post-call ESP and restore ESP to pre-pushed state + mov ecx, esp + lea esp, [ebp - SIZEOF_StackImbalanceCookie - 16] ; 4 DWORDs and the cookie have been pushed + + ; save return value + push eax + push edx + sub esp, 12 + +.errnz (StackImbalanceCookie__HAS_FP_RETURN_VALUE AND 00ffffffh), HAS_FP_RETURN_VALUE has changed - update asm code + + ; save top of the floating point stack if the target has FP retval + test byte ptr [ebx + StackImbalanceCookie__m_callConv + 3], (StackImbalanceCookie__HAS_FP_RETURN_VALUE SHR 24) + jz noFPURetVal + fstp tbyte ptr [esp] ; save full 10 bytes to avoid precision loss +noFPURetVal: + + ; call PInvokeStackImbalanceWorker(StackImbalanceCookie *pSICookie, DWORD dwPostESP) + push ecx + push ebx + call _PInvokeStackImbalanceWorker@8 + + ; restore return value + test byte ptr [ebx + StackImbalanceCookie__m_callConv + 3], (StackImbalanceCookie__HAS_FP_RETURN_VALUE SHR 24) + jz noFPURetValToRestore + fld tbyte ptr [esp] +noFPURetValToRestore: + + add esp, 12 + pop edx + pop eax + + ; restore registers + pop edi + pop esi + + pop ebx + + ; EBP frame and original stack arguments will be removed by the caller + ret +_PInvokeStackImbalanceHelper@0 endp + +endif ; MDA_SUPPORTED + +ifdef FEATURE_COMINTEROP + +;========================================================================== +; This is a fast alternative to CallDescr* tailored specifically for +; COM to CLR calls. Stack arguments don't come in a continuous buffer +; and secret argument can be passed in EAX. +; + +; extern "C" ARG_SLOT __fastcall COMToCLRDispatchHelper( +; INT_PTR dwArgECX, ; ecx +; INT_PTR dwArgEDX, ; edx +; PCODE pTarget, ; [esp + 4] +; PCODE pSecretArg, ; [esp + 8] +; INT_PTR *pInputStack, ; [esp + c] +; WORD wOutputStackSlots, ; [esp +10] +; UINT16 *pOutputStackOffsets, ; [esp +14] +; Frame *pCurFrame); ; [esp +18] + +FASTCALL_FUNC COMToCLRDispatchHelper, 32 + + ; ecx: dwArgECX + ; edx: dwArgEDX + + offset_pTarget equ 4 + offset_pSecretArg equ 8 + offset_pInputStack equ 0Ch + offset_wOutputStackSlots equ 10h + offset_pOutputStackOffsets equ 14h + offset_pCurFrame equ 18h + + movzx eax, word ptr [esp + offset_wOutputStackSlots] + test eax, eax + jnz CopyStackArgs + + ; There are no stack args to copy and ECX and EDX are already setup + ; with the correct arguments for the callee, so we just have to + ; push the CPFH and make the call. + + PUSH_CPFH_FOR_COM eax, esp, offset_pCurFrame ; trashes eax + + mov eax, [esp + offset_pSecretArg + CPFH_STACK_SIZE] + call [esp + offset_pTarget + CPFH_STACK_SIZE] +ifdef _DEBUG + nop ; This is a tag that we use in an assert. +endif + + POP_CPFH_FOR_COM ecx ; trashes ecx + + ret 18h + + +CopyStackArgs: + ; eax: num stack slots + ; ecx: dwArgECX + ; edx: dwArgEDX + + push ebp + mov ebp, esp + push ebx + push esi + push edi + + ebpFrame_adjust equ 4h + ebp_offset_pCurFrame equ ebpFrame_adjust + offset_pCurFrame + + PUSH_CPFH_FOR_COM ebx, ebp, ebp_offset_pCurFrame ; trashes ebx + + mov edi, [ebp + ebpFrame_adjust + offset_pOutputStackOffsets] + mov esi, [ebp + ebpFrame_adjust + offset_pInputStack] + + ; eax: num stack slots + ; ecx: dwArgECX + ; edx: dwArgEDX + ; edi: pOutputStackOffsets + ; esi: pInputStack + +CopyStackLoop: + dec eax + movzx ebx, word ptr [edi + 2 * eax] ; ebx <- input stack offset + push [esi + ebx] ; stack <- value on the input stack + jnz CopyStackLoop + + ; ECX and EDX are setup with the correct arguments for the callee, + ; and we've copied the stack arguments over as well, so now it's + ; time to make the call. + + mov eax, [ebp + ebpFrame_adjust + offset_pSecretArg] + call [ebp + ebpFrame_adjust + offset_pTarget] +ifdef _DEBUG + nop ; This is a tag that we use in an assert. +endif + + POP_CPFH_FOR_COM ecx ; trashes ecx + + pop edi + pop esi + pop ebx + pop ebp + + ret 18h + +FASTCALL_ENDFUNC + +endif ; FEATURE_COMINTEROP + +ifndef FEATURE_CORECLR + +;========================================================================== +; This is small stub whose purpose is to record current stack pointer and +; call CopyCtorCallStubWorker to invoke copy constructors and destructors +; as appropriate. This stub operates on arguments already pushed to the +; stack by JITted IL stub and must not create a new frame, i.e. it must tail +; call to the target for it to see the arguments that copy ctors have been +; called on. +; +_CopyCtorCallStub@0 proc public + ; there may be an argument in ecx - save it + push ecx + + ; push pointer to arguments + lea edx, [esp + 8] + push edx + + call _CopyCtorCallStubWorker@4 + + ; restore ecx and tail call to the target + pop ecx + jmp eax +_CopyCtorCallStub@0 endp + +endif ; !FEATURE_CORECLR + +ifdef FEATURE_PREJIT + +;========================================================================== +_StubDispatchFixupStub@0 proc public + + STUB_PROLOG + + mov esi, esp + + push 0 + push 0 + + push eax ; siteAddrForRegisterIndirect (for tailcalls) + push esi ; pTransitionBlock + + call _StubDispatchFixupWorker@16 + + STUB_EPILOG + +_StubDispatchFixupPatchLabel@0: +public _StubDispatchFixupPatchLabel@0 + + ; Tailcall target + jmp eax + + ; This will never be executed. It is just to help out stack-walking logic + ; which disassembles the epilog to unwind the stack. + ret + +_StubDispatchFixupStub@0 endp + +;========================================================================== +_ExternalMethodFixupStub@0 proc public + + pop eax ; pop off the return address to the stub + ; leaving the actual caller's return address on top of the stack + + STUB_PROLOG + + mov esi, esp + + ; EAX is return address into CORCOMPILE_EXTERNAL_METHOD_THUNK. Subtract 5 to get start address. + sub eax, 5 + + push 0 + push 0 + + push eax + + ; pTransitionBlock + push esi + + call _ExternalMethodFixupWorker@16 + + ; eax now contains replacement stub. PreStubWorker will never return + ; NULL (it throws an exception if stub creation fails.) + + ; From here on, mustn't trash eax + + STUB_EPILOG + +_ExternalMethodFixupPatchLabel@0: +public _ExternalMethodFixupPatchLabel@0 + + ; Tailcall target + jmp eax + + ; This will never be executed. It is just to help out stack-walking logic + ; which disassembles the epilog to unwind the stack. + ret + +_ExternalMethodFixupStub@0 endp + +ifdef FEATURE_READYTORUN +;========================================================================== +_DelayLoad_MethodCall@0 proc public + + STUB_PROLOG_2_HIDDEN_ARGS + + mov esi, esp + + push ecx + push edx + + push eax + + ; pTransitionBlock + push esi + + call _ExternalMethodFixupWorker@16 + + ; eax now contains replacement stub. PreStubWorker will never return + ; NULL (it throws an exception if stub creation fails.) + + ; From here on, mustn't trash eax + + STUB_EPILOG + + ; Share the patch label + jmp _ExternalMethodFixupPatchLabel@0 + + ; This will never be executed. It is just to help out stack-walking logic + ; which disassembles the epilog to unwind the stack. + ret + +_DelayLoad_MethodCall@0 endp +endif + +;======================================================================================= +; The call in softbound vtable slots initially points to this function. +; The pupose of this function is to transfer the control to right target and +; to optionally patch the target of the jump so that we do not take this slow path again. +; +_VirtualMethodFixupStub@0 proc public + + pop eax ; Pop the return address. It points right after the call instruction in the thunk. + sub eax,5 ; Calculate the address of the thunk + + ; Push ebp frame to get good callstack under debugger + push ebp + mov ebp, esp + + ; Preserve argument registers + push ecx + push edx + + push eax ; address of the thunk + push ecx ; this ptr + call _VirtualMethodFixupWorker@8 + + ; Restore argument registers + pop edx + pop ecx + + ; Pop ebp frame + pop ebp + +_VirtualMethodFixupPatchLabel@0: +public _VirtualMethodFixupPatchLabel@0 + + ; Proceed to execute the actual method. + jmp eax + + ; This will never be executed. It is just to help out stack-walking logic + ; which disassembles the epilog to unwind the stack. + ret + +_VirtualMethodFixupStub@0 endp + +endif ; FEATURE_PREJIT + +;========================================================================== +; The prestub +_ThePreStub@0 proc public + + STUB_PROLOG + + mov esi, esp + + ; EAX contains MethodDesc* from the precode. Push it here as argument + ; for PreStubWorker + push eax + + push esi + + call _PreStubWorker@8 + + ; eax now contains replacement stub. PreStubWorker will never return + ; NULL (it throws an exception if stub creation fails.) + + ; From here on, mustn't trash eax + + STUB_EPILOG + + ; Tailcall target + jmp eax + + ; This will never be executed. It is just to help out stack-walking logic + ; which disassembles the epilog to unwind the stack. + ret + +_ThePreStub@0 endp + +; This method does nothing. It's just a fixed function for the debugger to put a breakpoint +; on so that it can trace a call target. +_ThePreStubPatch@0 proc public + ; make sure that the basic block is unique + test eax,34 +_ThePreStubPatchLabel@0: +public _ThePreStubPatchLabel@0 + ret +_ThePreStubPatch@0 endp + +ifdef FEATURE_COMINTEROP +;========================================================================== +; CLR -> COM generic or late-bound call +_GenericComPlusCallStub@0 proc public + + STUB_PROLOG + + ; pTransitionBlock + mov esi, esp + + ; return value + sub esp, 8 + + ; save pMD + mov ebx, eax + + push eax ; pMD + push esi ; pTransitionBlock + call _CLRToCOMWorker@8 + + push eax + call _setFPReturn@12 ; pop & set the return value + + ; From here on, mustn't trash eax:edx + + ; Get pComPlusCallInfo for return thunk + mov ecx, [ebx + ComPlusCallMethodDesc__m_pComPlusCallInfo] + + STUB_EPILOG_RETURN + + ; Tailcall return thunk + jmp [ecx + ComPlusCallInfo__m_pRetThunk] + + ; This will never be executed. It is just to help out stack-walking logic + ; which disassembles the epilog to unwind the stack. + ret + +_GenericComPlusCallStub@0 endp +endif ; FEATURE_COMINTEROP + +ifdef FEATURE_REMOTING +_TransparentProxyStub@0 proc public + ; push slot passed in eax + push eax + + ; Move into eax the stub data and call the stub + mov eax, [ecx + TransparentProxyObject___stubData] + call [ecx + TransparentProxyObject___stub] +ifdef _DEBUG + nop ; Mark this as a special call site that can directly + ; call managed code +endif + test eax, eax + jnz CtxMismatch2 + + mov eax, [ecx + TransparentProxyObject___pMT] + + push ebx ; spill EBX + + ; Convert the slot number into the code address + ; See MethodTable.h for details on vtable layout + + mov ebx, [esp + 4] ; Reload the slot + shr ebx, ASM__VTABLE_SLOTS_PER_CHUNK_LOG2 ; indirectionSlotNumber + + mov eax,[eax + ebx*4 + SIZEOF_MethodTable] + + mov ebx, [esp + 4] ; use unchanged slot from above + and ebx, ASM__VTABLE_SLOTS_PER_CHUNK-1 ; offsetInChunk + mov eax, [eax + ebx*4] + + ; At this point, eax contains the code address + + ; Restore EBX + pop ebx + + ; Remove the slot number from the stack + lea esp, [esp+4] + + jmp eax + + ; CONTEXT MISMATCH CASE, call out to the real proxy to dispatch + +CtxMismatch2: + pop eax ; restore MethodDesc * + jmp _TransparentProxyStub_CrossContext@0 ; jump to slow TP stub + +_TransparentProxyStub@0 endp + +_TransparentProxyStub_CrossContext@0 proc public + + STUB_PROLOG + + ; pTransitionBlock + mov esi, esp + + ; return value + sub esp, 3*4 ; 64-bit return value + cb stack pop + + push eax ; pMD + push esi ; pTransitionBlock + call _TransparentProxyStubWorker@8 + + pop ebx ; cbStackPop + + push eax + call _setFPReturn@12 ; pop & set the return value + + ; From here on, mustn't trash eax:edx + mov ecx, ebx ; cbStackPop + + mov ebx, [esp+6*4] ; get retaddr + mov [esp+6*4+ecx], ebx ; put it where it belongs + + STUB_EPILOG_RETURN + + add esp, ecx ; pop all the args + ret + +_TransparentProxyStub_CrossContext@0 endp + +; This method does nothing. It's just a fixed function for the debugger to put a breakpoint +; on so that it can trace a call target. +_TransparentProxyStubPatch@0 proc public + ; make sure that the basic block is unique + test eax,12 +_TransparentProxyStubPatchLabel@0: +public _TransparentProxyStubPatchLabel@0 + ret +_TransparentProxyStubPatch@0 endp + +endif ; FEATURE_REMOTING + +ifdef FEATURE_COMINTEROP +;-------------------------------------------------------------------------- +; This is the code that all com call method stubs run initially. +; Most of the real work occurs in ComStubWorker(), a C++ routine. +; The template only does the part that absolutely has to be in assembly +; language. +;-------------------------------------------------------------------------- +_ComCallPreStub@0 proc public + pop eax ;ComCallMethodDesc* + + ; push ebp-frame + push ebp + mov ebp,esp + + ; save CalleeSavedRegisters + push ebx + push esi + push edi + + push eax ; ComCallMethodDesc* + sub esp, 5*4 ; next, vtable, gscookie, 64-bit error return + + lea edi, [esp] + lea esi, [esp+3*4] + + push edi ; pErrorReturn + push esi ; pFrame + call _ComPreStubWorker@8 + + ; eax now contains replacement stub. ComStubWorker will return NULL if stub creation fails + cmp eax, 0 + je nostub ; oops we could not create a stub + + add esp, 6*4 + + ; pop CalleeSavedRegisters + pop edi + pop esi + pop ebx + pop ebp + + jmp eax ; Reexecute with replacement stub. + ; We will never get here. This "ret" is just so that code-disassembling + ; profilers know to stop disassembling any further + ret + +nostub: + + ; Even though the ComPreStubWorker sets a 64 bit value as the error return code. + ; Only the lower 32 bits contain usefula data. The reason for this is that the + ; possible error return types are: failure HRESULT, 0 and floating point 0. + ; In each case, the data fits in 32 bits. Instead, we use the upper half of + ; the return value to store number of bytes to pop + mov eax, [edi] + mov edx, [edi+4] + + add esp, 6*4 + + ; pop CalleeSavedRegisters + pop edi + pop esi + pop ebx + pop ebp + + pop ecx ; return address + add esp, edx ; pop bytes of the stack + push ecx ; return address + + ; We need to deal with the case where the method is PreserveSig=true and has an 8 + ; byte return type. There are 2 types of 8 byte return types: integer and floating point. + ; For integer 8 byte return types, we always return 0 in case of failure. For floating + ; point return types, we return the value in the floating point register. In both cases + ; edx should be 0. + xor edx, edx ; edx <-- 0 + + ret + +_ComCallPreStub@0 endp +endif ; FEATURE_COMINTEROP + +ifdef FEATURE_READYTORUN +;========================================================================== +; Define helpers for delay loading of readytorun helpers + +DYNAMICHELPER macro frameFlags, suffix + +_DelayLoad_Helper&suffix&@0 proc public + + STUB_PROLOG_2_HIDDEN_ARGS + + mov esi, esp + + push frameFlags + push ecx ; module + push edx ; section index + + push eax ; indirection cell address. + push esi ; pTransitionBlock + + call _DynamicHelperWorker@20 + test eax,eax + jnz @F + + mov eax, [esi] ; The result is stored in the argument area of the transition block + STUB_EPILOG_RETURN + ret + +@@: + STUB_EPILOG + jmp eax + +_DelayLoad_Helper&suffix&@0 endp + + endm + +DYNAMICHELPER DynamicHelperFrameFlags_Default +DYNAMICHELPER DynamicHelperFrameFlags_ObjectArg, _Obj +DYNAMICHELPER , _ObjObj + +endif ; FEATURE_READYTORUN + + end diff --git a/src/vm/i386/cgencpu.h b/src/vm/i386/cgencpu.h new file mode 100644 index 0000000000..2da98821bc --- /dev/null +++ b/src/vm/i386/cgencpu.h @@ -0,0 +1,573 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// CGENX86.H - +// +// Various helper routines for generating x86 assembly code. +// +// DO NOT INCLUDE THIS FILE DIRECTLY - ALWAYS USE CGENSYS.H INSTEAD +// + + + +#ifndef _TARGET_X86_ +#error Should only include "cgenx86.h" for X86 builds +#endif // _TARGET_X86_ + +#ifndef __cgenx86_h__ +#define __cgenx86_h__ + +#include "utilcode.h" + +// Given a return address retrieved during stackwalk, +// this is the offset by which it should be decremented to lend somewhere in a call instruction. +#define STACKWALK_CONTROLPC_ADJUST_OFFSET 1 + +// preferred alignment for data +#define DATA_ALIGNMENT 4 + +class MethodDesc; +class FramedMethodFrame; +class Module; +class ComCallMethodDesc; +class BaseDomain; + +// CPU-dependent functions +Stub * GenerateInitPInvokeFrameHelper(); + +#ifdef MDA_SUPPORTED +EXTERN_C void STDCALL PInvokeStackImbalanceHelper(void); +#endif // MDA_SUPPORTED + +#ifndef FEATURE_CORECLR +EXTERN_C void STDCALL CopyCtorCallStub(void); +#endif // !FEATURE_CORECLR + +BOOL Runtime_Test_For_SSE2(); + +#ifdef CROSSGEN_COMPILE +#define GetEEFuncEntryPoint(pfn) 0x1001 +#else +#define GetEEFuncEntryPoint(pfn) GFN_TADDR(pfn) +#endif + +//********************************************************************** +// To be used with GetSpecificCpuInfo() + +#define CPU_X86_FAMILY(cpuType) (((cpuType) & 0x0F00) >> 8) +#define CPU_X86_MODEL(cpuType) (((cpuType) & 0x00F0) >> 4) +// Stepping is masked out by GetSpecificCpuInfo() +// #define CPU_X86_STEPPING(cpuType) (((cpuType) & 0x000F) ) + +#define CPU_X86_USE_CMOV(cpuFeat) ((cpuFeat & 0x00008001) == 0x00008001) +#define CPU_X86_USE_SSE2(cpuFeat) (((cpuFeat & 0x04000000) == 0x04000000) && Runtime_Test_For_SSE2()) + +// Values for CPU_X86_FAMILY(cpuType) +#define CPU_X86_486 4 +#define CPU_X86_PENTIUM 5 +#define CPU_X86_PENTIUM_PRO 6 +#define CPU_X86_PENTIUM_4 0xF + +// Values for CPU_X86_MODEL(cpuType) for CPU_X86_PENTIUM_PRO +#define CPU_X86_MODEL_PENTIUM_PRO_BANIAS 9 // Pentium M (Mobile PPro with P4 feautres) + +#define COMMETHOD_PREPAD 8 // # extra bytes to allocate in addition to sizeof(ComCallMethodDesc) +#ifdef FEATURE_COMINTEROP +#define COMMETHOD_CALL_PRESTUB_SIZE 5 // x86: CALL(E8) xx xx xx xx +#define COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET 1 // the offset of the call target address inside the prestub +#endif // FEATURE_COMINTEROP + +#define STACK_ALIGN_SIZE 4 + +#define JUMP_ALLOCATE_SIZE 8 // # bytes to allocate for a jump instruction +#define BACK_TO_BACK_JUMP_ALLOCATE_SIZE 8 // # bytes to allocate for a back to back jump instruction + +#define HAS_COMPACT_ENTRYPOINTS 1 + +// Needed for PInvoke inlining in ngened images +#define HAS_NDIRECT_IMPORT_PRECODE 1 + +#ifdef FEATURE_REMOTING +#define HAS_REMOTING_PRECODE 1 +#endif +#ifdef FEATURE_PREJIT +#define HAS_FIXUP_PRECODE 1 +#define HAS_FIXUP_PRECODE_CHUNKS 1 +#endif + +// ThisPtrRetBufPrecode one is necessary for closed delegates over static methods with return buffer +#define HAS_THISPTR_RETBUF_PRECODE 1 + +#define CODE_SIZE_ALIGN 4 +#define CACHE_LINE_SIZE 32 // As per Intel Optimization Manual the cache line size is 32 bytes +#define LOG2SLOT LOG2_PTRSIZE + +#define ENREGISTERED_RETURNTYPE_MAXSIZE 8 +#define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 4 +#define CALLDESCR_ARGREGS 1 // CallDescrWorker has ArgumentRegister parameter + +// Max size of patched TLS helpers +#ifdef _DEBUG +// Debug build needs extra space for last error trashing +#define TLS_GETTER_MAX_SIZE 0x20 +#else +#define TLS_GETTER_MAX_SIZE 0x10 +#endif + +//======================================================================= +// IMPORTANT: This value is used to figure out how much to allocate +// for a fixed array of FieldMarshaler's. That means it must be at least +// as large as the largest FieldMarshaler subclass. This requirement +// is guarded by an assert. +//======================================================================= +#define MAXFIELDMARSHALERSIZE 24 + +//********************************************************************** +// Parameter size +//********************************************************************** + +typedef INT32 StackElemType; +#define STACK_ELEM_SIZE sizeof(StackElemType) + + + +#include "stublinkerx86.h" + + + +// !! This expression assumes STACK_ELEM_SIZE is a power of 2. +#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1))) + + +//********************************************************************** +// Frames +//********************************************************************** +//-------------------------------------------------------------------- +// This represents some of the FramedMethodFrame fields that are +// stored at negative offsets. +//-------------------------------------------------------------------- +typedef DPTR(struct CalleeSavedRegisters) PTR_CalleeSavedRegisters; +struct CalleeSavedRegisters { + INT32 edi; + INT32 esi; + INT32 ebx; + INT32 ebp; +}; + +//-------------------------------------------------------------------- +// This represents the arguments that are stored in volatile registers. +// This should not overlap the CalleeSavedRegisters since those are already +// saved separately and it would be wasteful to save the same register twice. +// If we do use a non-volatile register as an argument, then the ArgIterator +// will probably have to communicate this back to the PromoteCallerStack +// routine to avoid a double promotion. +//-------------------------------------------------------------------- +#define ENUM_ARGUMENT_REGISTERS() \ + ARGUMENT_REGISTER(ECX) \ + ARGUMENT_REGISTER(EDX) + +#define ENUM_ARGUMENT_REGISTERS_BACKWARD() \ + ARGUMENT_REGISTER(EDX) \ + ARGUMENT_REGISTER(ECX) + +typedef DPTR(struct ArgumentRegisters) PTR_ArgumentRegisters; +struct ArgumentRegisters { + #define ARGUMENT_REGISTER(regname) INT32 regname; + ENUM_ARGUMENT_REGISTERS_BACKWARD() + #undef ARGUMENT_REGISTER +}; +#define NUM_ARGUMENT_REGISTERS 2 + +#define SCRATCH_REGISTER_X86REG kEAX + +#define THIS_REG ECX +#define THIS_kREG kECX + +#define ARGUMENT_REG1 ECX +#define ARGUMENT_REG2 EDX + +// forward decl +struct REGDISPLAY; +typedef REGDISPLAY *PREGDISPLAY; + +// Sufficient context for Try/Catch restoration. +struct EHContext { + INT32 Eax; + INT32 Ebx; + INT32 Ecx; + INT32 Edx; + INT32 Esi; + INT32 Edi; + INT32 Ebp; + INT32 Esp; + INT32 Eip; + + void Setup(PCODE resumePC, PREGDISPLAY regs); + void UpdateFrame(PREGDISPLAY regs); + + inline TADDR GetSP() { + LIMITED_METHOD_CONTRACT; + return (TADDR)Esp; + } + inline void SetSP(LPVOID esp) { + LIMITED_METHOD_CONTRACT; + Esp = (INT32)(size_t)esp; + } + + inline LPVOID GetFP() { + LIMITED_METHOD_CONTRACT; + return (LPVOID)(UINT_PTR)Ebp; + } + + inline void SetArg(LPVOID arg) { + LIMITED_METHOD_CONTRACT; + Eax = (INT32)(size_t)arg; + } + + inline void Init() + { + LIMITED_METHOD_CONTRACT; + Eax = 0; + Ebx = 0; + Ecx = 0; + Edx = 0; + Esi = 0; + Edi = 0; + Ebp = 0; + Esp = 0; + Eip = 0; + } +}; + +#define ARGUMENTREGISTERS_SIZE sizeof(ArgumentRegisters) + +//********************************************************************** +// Exception handling +//********************************************************************** + +inline PCODE GetIP(const CONTEXT * context) { + LIMITED_METHOD_DAC_CONTRACT; + + return PCODE(context->Eip); +} + +inline void SetIP(CONTEXT *context, PCODE eip) { + LIMITED_METHOD_DAC_CONTRACT; + + context->Eip = (DWORD)eip; +} + +inline TADDR GetSP(const CONTEXT * context) { + LIMITED_METHOD_DAC_CONTRACT; + + return (TADDR)(context->Esp); +} + +EXTERN_C LPVOID STDCALL GetCurrentSP(); + +inline void SetSP(CONTEXT *context, TADDR esp) { + LIMITED_METHOD_DAC_CONTRACT; + + context->Esp = (DWORD)esp; +} + +inline void SetFP(CONTEXT *context, TADDR ebp) { + LIMITED_METHOD_DAC_CONTRACT; + + context->Ebp = (INT32)ebp; +} + +inline TADDR GetFP(const CONTEXT * context) +{ + LIMITED_METHOD_DAC_CONTRACT; + + return (TADDR)context->Ebp; +} + +// Get Rel32 destination, emit jumpStub if necessary +inline INT32 rel32UsingJumpStub(INT32 UNALIGNED * pRel32, PCODE target, MethodDesc *pMethod = NULL, LoaderAllocator *pLoaderAllocator = NULL) +{ + // We do not need jump stubs on i386 + LIMITED_METHOD_CONTRACT; + + TADDR baseAddr = (TADDR)pRel32 + 4; + return (INT32)(target - baseAddr); +} + +#ifdef FEATURE_COMINTEROP +inline void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target) +{ + WRAPPER_NO_CONTRACT; + + BYTE *pBuffer = (BYTE*)pCOMMethod - COMMETHOD_CALL_PRESTUB_SIZE; + + pBuffer[0] = X86_INSTR_CALL_REL32; //CALLNEAR32 + *((LPVOID*)(1+pBuffer)) = (LPVOID) (((LPBYTE)target) - (pBuffer+5)); + + _ASSERTE(IS_ALIGNED(pBuffer + COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET, sizeof(void*)) && + *((SSIZE_T*)(pBuffer + COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET)) == ((LPBYTE)target - (LPBYTE)pCOMMethod)); +} +#endif // FEATURE_COMINTEROP + +//------------------------------------------------------------------------ +WORD GetUnpatchedCodeData(LPCBYTE pAddr); + +//------------------------------------------------------------------------ +inline WORD GetUnpatchedOpcodeWORD(LPCBYTE pAddr) +{ + WRAPPER_NO_CONTRACT; + if (CORDebuggerAttached()) + { + return GetUnpatchedCodeData(pAddr); + } + else + { + return *((WORD *)pAddr); + } +} + +//------------------------------------------------------------------------ +inline BYTE GetUnpatchedOpcodeBYTE(LPCBYTE pAddr) +{ + WRAPPER_NO_CONTRACT; + if (CORDebuggerAttached()) + { + return (BYTE) GetUnpatchedCodeData(pAddr); + } + else + { + return *pAddr; + } +} + + //------------------------------------------------------------------------ +// The following must be a distinguishable set of instruction sequences for +// various stub dispatch calls. +// +// An x86 JIT which uses full stub dispatch must generate only +// the following stub dispatch calls: +// +// (1) isCallRelativeIndirect: +// call dword ptr [rel32] ; FF 15 ---rel32---- +// (2) isCallRelative: +// call abc ; E8 ---rel32---- +// (3) isCallRegisterIndirect: +// 3-byte nop ; +// call dword ptr [eax] ; FF 10 +// +// NOTE: You must be sure that pRetAddr is a true return address for +// a stub dispatch call. + +BOOL isCallRelativeIndirect(const BYTE *pRetAddr); +BOOL isCallRelative(const BYTE *pRetAddr); +BOOL isCallRegisterIndirect(const BYTE *pRetAddr); + +inline BOOL isCallRelativeIndirect(const BYTE *pRetAddr) +{ + LIMITED_METHOD_CONTRACT; + + BOOL fRet = (GetUnpatchedOpcodeWORD(&pRetAddr[-6]) == X86_INSTR_CALL_IND); + _ASSERTE(!fRet || !isCallRelative(pRetAddr)); + _ASSERTE(!fRet || !isCallRegisterIndirect(pRetAddr)); + return fRet; +} + +inline BOOL isCallRelative(const BYTE *pRetAddr) +{ + LIMITED_METHOD_CONTRACT; + + BOOL fRet = (GetUnpatchedOpcodeBYTE(&pRetAddr[-5]) == X86_INSTR_CALL_REL32); + _ASSERTE(!fRet || !isCallRelativeIndirect(pRetAddr)); + _ASSERTE(!fRet || !isCallRegisterIndirect(pRetAddr)); + return fRet; +} + +inline BOOL isCallRegisterIndirect(const BYTE *pRetAddr) +{ + LIMITED_METHOD_CONTRACT; + + BOOL fRet = (GetUnpatchedOpcodeWORD(&pRetAddr[-5]) == X86_INSTR_NOP3_1) + && (GetUnpatchedOpcodeBYTE(&pRetAddr[-3]) == X86_INSTR_NOP3_3) + && (GetUnpatchedOpcodeWORD(&pRetAddr[-2]) == X86_INSTR_CALL_IND_EAX); + _ASSERTE(!fRet || !isCallRelative(pRetAddr)); + _ASSERTE(!fRet || !isCallRelativeIndirect(pRetAddr)); + return fRet; +} + +//------------------------------------------------------------------------ +inline void emitJump(LPBYTE pBuffer, LPVOID target) +{ + LIMITED_METHOD_CONTRACT; + + pBuffer[0] = X86_INSTR_JMP_REL32; //JUMPNEAR32 + *((LPVOID*)(1+pBuffer)) = (LPVOID) (((LPBYTE)target) - (pBuffer+5)); +} + +//------------------------------------------------------------------------ +inline void emitJumpInd(LPBYTE pBuffer, LPVOID target) +{ + LIMITED_METHOD_CONTRACT; + + *((WORD*)pBuffer) = X86_INSTR_JMP_IND; // 0x25FF jmp dword ptr[addr32] + *((LPVOID*)(2+pBuffer)) = target; +} + +//------------------------------------------------------------------------ +inline PCODE isJump(PCODE pCode) +{ + LIMITED_METHOD_DAC_CONTRACT; + return *PTR_BYTE(pCode) == X86_INSTR_JMP_REL32; +} + +//------------------------------------------------------------------------ +// Given the same pBuffer that was used by emitJump this method +// decodes the instructions and returns the jump target +inline PCODE decodeJump(PCODE pCode) +{ + LIMITED_METHOD_DAC_CONTRACT; + CONSISTENCY_CHECK(*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32); + return rel32Decode(pCode+1); +} + +// +// On IA64 back to back jumps should be separated by a nop bundle to get +// the best performance from the hardware's branch prediction logic. +// For all other platforms back to back jumps don't require anything special +// That is why we have these two wrapper functions that call emitJump and decodeJump +// + +//------------------------------------------------------------------------ +inline void emitBackToBackJump(LPBYTE pBuffer, LPVOID target) +{ + WRAPPER_NO_CONTRACT; + emitJump(pBuffer, target); +} + +//------------------------------------------------------------------------ +inline PCODE isBackToBackJump(PCODE pBuffer) +{ + WRAPPER_NO_CONTRACT; + SUPPORTS_DAC; + return isJump(pBuffer); +} + +//------------------------------------------------------------------------ +inline PCODE decodeBackToBackJump(PCODE pBuffer) +{ + WRAPPER_NO_CONTRACT; + SUPPORTS_DAC; + return decodeJump(pBuffer); +} + +EXTERN_C void __stdcall setFPReturn(int fpSize, INT64 retVal); +EXTERN_C void __stdcall getFPReturn(int fpSize, INT64 *pretval); + + +// SEH info forward declarations + +inline BOOL IsUnmanagedValueTypeReturnedByRef(UINT sizeofvaluetype) +{ + LIMITED_METHOD_CONTRACT; + + // odd-sized small structures are not + // enregistered e.g. struct { char a,b,c; } + return (sizeofvaluetype > 8) || + (sizeofvaluetype & (sizeofvaluetype - 1)); // check that the size is power of two +} + +#include +DECLSPEC_ALIGN(4) struct UMEntryThunkCode +{ + BYTE m_alignpad[2]; // used to guarantee alignment of backpactched portion + BYTE m_movEAX; //MOV EAX,imm32 + LPVOID m_uet; // pointer to start of this structure + BYTE m_jmp; //JMP NEAR32 + const BYTE * m_execstub; // pointer to destination code // make sure the backpatched portion is dword aligned. + + void Encode(BYTE* pTargetCode, void* pvSecretParam); + + LPCBYTE GetEntryPoint() const + { + LIMITED_METHOD_CONTRACT; + + return (LPCBYTE)&m_movEAX; + } + + static int GetEntryPointOffset() + { + LIMITED_METHOD_CONTRACT; + + return 2; + } +}; +#include + +struct HijackArgs +{ + DWORD FPUState[3]; // 12 bytes for FPU state (10 bytes for FP top-of-stack + 2 bytes padding) + DWORD Edi; + DWORD Esi; + DWORD Ebx; + DWORD Edx; + DWORD Ecx; + union + { + DWORD Eax; + size_t ReturnValue[1]; + }; + DWORD Ebp; + union + { + DWORD Eip; + size_t ReturnAddress; + }; +}; + +// ClrFlushInstructionCache is used when we want to call FlushInstructionCache +// for a specific architecture in the common code, but not for other architectures. +// On IA64 ClrFlushInstructionCache calls the Kernel FlushInstructionCache function +// to flush the instruction cache. +// We call ClrFlushInstructionCache whenever we create or modify code in the heap. +// Currently ClrFlushInstructionCache has no effect on X86 +// + +inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode) +{ + // FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode); + MemoryBarrier(); + return TRUE; +} + +#ifndef FEATURE_IMPLICIT_TLS +// +// JIT HELPER ALIASING FOR PORTABILITY. +// +// Create alias for optimized implementations of helpers provided on this platform +// + +#define JIT_MonEnter JIT_MonEnterWorker +#define JIT_MonEnterWorker JIT_MonEnterWorker +#define JIT_MonReliableEnter JIT_MonReliableEnter +#define JIT_MonTryEnter JIT_MonTryEnter +#define JIT_MonExit JIT_MonExitWorker +#define JIT_MonExitWorker JIT_MonExitWorker +#define JIT_MonEnterStatic JIT_MonEnterStatic +#define JIT_MonExitStatic JIT_MonExitStatic + +#endif + +// optimized static helpers generated dynamically at runtime +// #define JIT_GetSharedGCStaticBase +// #define JIT_GetSharedNonGCStaticBase +// #define JIT_GetSharedGCStaticBaseNoCtor +// #define JIT_GetSharedNonGCStaticBaseNoCtor + +#define JIT_ChkCastClass JIT_ChkCastClass +#define JIT_ChkCastClassSpecial JIT_ChkCastClassSpecial +#define JIT_IsInstanceOfClass JIT_IsInstanceOfClass +#define JIT_ChkCastInterface JIT_ChkCastInterface +#define JIT_IsInstanceOfInterface JIT_IsInstanceOfInterface +#define JIT_NewCrossContext JIT_NewCrossContext +#define JIT_Stelem_Ref JIT_Stelem_Ref + +#endif // __cgenx86_h__ diff --git a/src/vm/i386/cgenx86.cpp b/src/vm/i386/cgenx86.cpp new file mode 100644 index 0000000000..ff2f2df5a3 --- /dev/null +++ b/src/vm/i386/cgenx86.cpp @@ -0,0 +1,2257 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// CGENX86.CPP - +// +// Various helper routines for generating x86 assembly code. +// +// + +// Precompiled Header + +#include "common.h" + +#include "field.h" +#include "stublink.h" +#include "cgensys.h" +#include "frames.h" +#include "excep.h" +#include "dllimport.h" +#include "comdelegate.h" +#include "log.h" +#include "security.h" +#include "comdelegate.h" +#include "array.h" +#include "jitinterface.h" +#include "codeman.h" +#ifdef FEATURE_REMOTING +#include "remoting.h" +#endif +#include "dbginterface.h" +#include "eeprofinterfaces.h" +#include "eeconfig.h" +#include "asmconstants.h" +#include "class.h" +#include "virtualcallstub.h" +#include "mdaassistants.h" +#include "jitinterface.h" + +#ifdef FEATURE_COMINTEROP +#include "comtoclrcall.h" +#include "runtimecallablewrapper.h" +#include "comcache.h" +#include "olevariant.h" +#endif // FEATURE_COMINTEROP + +#ifdef FEATURE_PREJIT +#include "compile.h" +#endif + +#include "stublink.inl" + +extern "C" DWORD STDCALL GetSpecificCpuTypeAsm(void); +extern "C" DWORD STDCALL GetSpecificCpuFeaturesAsm(DWORD *pInfo); + +// NOTE on Frame Size C_ASSERT usage in this file +// if the frame size changes then the stubs have to be revisited for correctness +// kindly revist the logic and then update the constants so that the C_ASSERT will again fire +// if someone changes the frame size. You are expected to keep this hard coded constant +// up to date so that changes in the frame size trigger errors at compile time if the code is not altered + +void generate_noref_copy (unsigned nbytes, StubLinkerCPU* sl); + +#ifndef DACCESS_COMPILE + +//============================================================================= +// Runtime test to see if the OS has enabled support for the SSE2 instructions +// +// +BOOL Runtime_Test_For_SSE2() +{ +#ifdef FEATURE_CORESYSTEM + return TRUE; +#else + + BOOL result = IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE); + + if (result == FALSE) + return FALSE; + + // ********************************************************************** + // *** *** + // *** IMPORTANT NOTE: *** + // *** *** + // *** All of these RunningOnXXX APIs return true when *** + // *** the OS that you are running on is that OS or later. *** + // *** For example RunningOnWin2003() will return true *** + // *** when you are running on Win2k3, Vista, Win7 or later. *** + // *** *** + // ********************************************************************** + + + // Windows 7 and later should alwys be using SSE2 instructions + // this is true for both for native and Wow64 + // + if (RunningOnWin7()) + return TRUE; + + if (RunningInWow64()) + { + // There is an issue with saving/restoring the SSE2 registers under wow64 + // So we figure out if we are running on an impacted OS and Service Pack level + // See DevDiv Bugs 89587 for the wow64 bug. + // + + _ASSERTE(ExOSInfoAvailable()); // This is always available on Vista and later + + // + // The issue is fixed in Windows Server 2008 or Vista/SP1 + // + // It is not fixed in Vista/RTM, so check for that case + // + if ((ExOSInfoRunningOnServer() == FALSE)) + { + OSVERSIONINFOEX osvi; + + ZeroMemory(&osvi, sizeof(OSVERSIONINFOEX)); + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); + osvi.wServicePackMajor = 0; + + DWORDLONG dwlConditionMask = 0; + VER_SET_CONDITION( dwlConditionMask, CLR_VER_SERVICEPACKMAJOR, VER_EQUAL); + + if (VerifyVersionInfo(&osvi, CLR_VER_SERVICEPACKMAJOR, dwlConditionMask)) + result = FALSE; + } + } + + return result; +#endif +} + +//--------------------------------------------------------------- +// Returns the type of CPU (the value of x of x86) +// (Please note, that it returns 6 for P5-II) +//--------------------------------------------------------------- +void GetSpecificCpuInfo(CORINFO_CPU * cpuInfo) +{ + LIMITED_METHOD_CONTRACT; + + static CORINFO_CPU val = { 0, 0, 0 }; + + if (val.dwCPUType) + { + *cpuInfo = val; + return; + } + + CORINFO_CPU tempVal; + tempVal.dwCPUType = GetSpecificCpuTypeAsm(); // written in ASM & doesn't participate in contracts + _ASSERTE(tempVal.dwCPUType); + +#ifdef _DEBUG + { + SO_NOT_MAINLINE_REGION(); + + /* Set Family+Model+Stepping string (eg., x690 for Banias, or xF30 for P4 Prescott) + * instead of Family only + */ + + const DWORD cpuDefault = 0xFFFFFFFF; + static ConfigDWORD cpuFamily; + DWORD configCpuFamily = cpuFamily.val_DontUse_(CLRConfig::INTERNAL_CPUFamily, cpuDefault); + if (configCpuFamily != cpuDefault) + { + assert((configCpuFamily & 0xFFF) == configCpuFamily); + tempVal.dwCPUType = (tempVal.dwCPUType & 0xFFFF0000) | configCpuFamily; + } + } +#endif + + tempVal.dwFeatures = GetSpecificCpuFeaturesAsm(&tempVal.dwExtendedFeatures); // written in ASM & doesn't participate in contracts + +#ifdef _DEBUG + { + SO_NOT_MAINLINE_REGION(); + + /* Set the 32-bit feature mask + */ + + const DWORD cpuFeaturesDefault = 0xFFFFFFFF; + static ConfigDWORD cpuFeatures; + DWORD configCpuFeatures = cpuFeatures.val_DontUse_(CLRConfig::INTERNAL_CPUFeatures, cpuFeaturesDefault); + if (configCpuFeatures != cpuFeaturesDefault) + { + tempVal.dwFeatures = configCpuFeatures; + } + } +#endif + + val = *cpuInfo = tempVal; +} + +#endif // #ifndef DACCESS_COMPILE + + +//--------------------------------------------------------------------------------------- +// +// Initialize the EHContext using the resume PC and the REGDISPLAY. The EHContext is currently used in two +// scenarios: to store the register state before calling an EH clause, and to retrieve the ambient SP of a +// particular stack frame. resumePC means different things in the two scenarios. In the former case, it +// is the IP at which we are going to resume execution when we call an EH clause. In the latter case, it +// is just the current IP. +// +// Arguments: +// resumePC - refer to the comment above +// regs - This is the REGDISPLAY obtained from the CrawlFrame used in the stackwalk. It represents the +// stack frame of the method containing the EH clause we are about to call. For getting the +// ambient SP, this is the stack frame we are interested in. +// + +void EHContext::Setup(PCODE resumePC, PREGDISPLAY regs) +{ + LIMITED_METHOD_DAC_CONTRACT; + + // EAX ECX EDX are scratch + this->Esp = regs->Esp; + this->Ebx = *regs->pEbx; + this->Esi = *regs->pEsi; + this->Edi = *regs->pEdi; + this->Ebp = *regs->pEbp; + + this->Eip = (ULONG)(size_t)resumePC; +} + +// +// Update the registers using new context +// +// This is necessary to reflect GC pointer changes during the middle of a unwind inside a +// finally clause, because: +// 1. GC won't see the part of stack inside try (which has thrown an exception) that is already +// unwinded and thus GC won't update GC pointers for this portion of the stack, but rather the +// call stack in finally. +// 2. upon return of finally, the unwind process continues and unwinds stack based on the part +// of stack inside try and won't see the updated values in finally. +// As a result, we need to manually update the context using register values upon return of finally +// +// Note that we only update the registers for finally clause because +// 1. For filter handlers, stack walker is able to see the whole stack (including the try part) +// with the help of ExceptionFilterFrame as filter handlers are called in first pass +// 2. For catch handlers, the current unwinding is already finished +// +void EHContext::UpdateFrame(PREGDISPLAY regs) +{ + LIMITED_METHOD_CONTRACT; + + // EAX ECX EDX are scratch. + // No need to update ESP as unwinder takes care of that for us + + LOG((LF_EH, LL_INFO1000, "Updating saved EBX: *%p= %p\n", regs->pEbx, this->Ebx)); + LOG((LF_EH, LL_INFO1000, "Updating saved ESI: *%p= %p\n", regs->pEsi, this->Esi)); + LOG((LF_EH, LL_INFO1000, "Updating saved EDI: *%p= %p\n", regs->pEdi, this->Edi)); + LOG((LF_EH, LL_INFO1000, "Updating saved EBP: *%p= %p\n", regs->pEbp, this->Ebp)); + + *regs->pEbx = this->Ebx; + *regs->pEsi = this->Esi; + *regs->pEdi = this->Edi; + *regs->pEbp = this->Ebp; +} + +void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + HOST_NOCALLS; + SUPPORTS_DAC; + } + CONTRACT_END; + + ENABLE_FORBID_GC_LOADER_USE_IN_THIS_SCOPE(); + + MethodDesc * pFunc = GetFunction(); + _ASSERTE(pFunc != NULL); + UpdateRegDisplayHelper(pRD, pFunc->CbStackPop()); + + RETURN; +} + +void TransitionFrame::UpdateRegDisplayHelper(const PREGDISPLAY pRD, UINT cbStackPop) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + HOST_NOCALLS; + SUPPORTS_DAC; + } + CONTRACT_END; + + CalleeSavedRegisters* regs = GetCalleeSavedRegisters(); + + // reset pContext; it's only valid for active (top-most) frame + + pRD->pContext = NULL; + + pRD->pEdi = (DWORD*) ®s->edi; + pRD->pEsi = (DWORD*) ®s->esi; + pRD->pEbx = (DWORD*) ®s->ebx; + pRD->pEbp = (DWORD*) ®s->ebp; + pRD->PCTAddr = GetReturnAddressPtr(); + pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr); + pRD->Esp = (DWORD)(pRD->PCTAddr + sizeof(TADDR) + cbStackPop); + + RETURN; +} + +void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + HOST_NOCALLS; + PRECONDITION(m_MachState.isValid()); // InsureInit has been called + SUPPORTS_DAC; + } + CONTRACT_END; + + ENABLE_FORBID_GC_LOADER_USE_IN_THIS_SCOPE(); + + // reset pContext; it's only valid for active (top-most) frame + pRD->pContext = NULL; + +#ifdef DACCESS_COMPILE + + // + // In the dac case we may have gotten here + // without the frame being initialized, so + // try and initialize on the fly. + // + + if (!m_MachState.isValid()) + { + MachState unwindState; + + InsureInit(false, &unwindState); + pRD->PCTAddr = dac_cast(unwindState.pRetAddr()); + pRD->ControlPC = unwindState.GetRetAddr(); + pRD->Esp = unwindState._esp; + + // Get some special host instance memory + // so we have a place to point to. + // This host memory has no target address + // and so won't be looked up or used for + // anything else. + MachState* thisState = (MachState*) + DacAllocHostOnlyInstance(sizeof(*thisState), true); + + thisState->_edi = unwindState._edi; + pRD->pEdi = (DWORD *)&thisState->_edi; + thisState->_esi = unwindState._esi; + pRD->pEsi = (DWORD *)&thisState->_esi; + thisState->_ebx = unwindState._ebx; + pRD->pEbx = (DWORD *)&thisState->_ebx; + thisState->_ebp = unwindState._ebp; + pRD->pEbp = (DWORD *)&thisState->_ebp; + + // InsureInit always sets m_RegArgs to zero + // in the real code. I'm not sure exactly + // what should happen in the on-the-fly case, + // but go with what would happen from an InsureInit. + RETURN; + } + +#endif // #ifdef DACCESS_COMPILE + + // DACCESS: The MachState pointers are kept as PTR_TADDR so + // the host pointers here refer to the appropriate size and + // these casts are not a problem. + pRD->pEdi = (DWORD*) m_MachState.pEdi(); + pRD->pEsi = (DWORD*) m_MachState.pEsi(); + pRD->pEbx = (DWORD*) m_MachState.pEbx(); + pRD->pEbp = (DWORD*) m_MachState.pEbp(); + pRD->PCTAddr = dac_cast(m_MachState.pRetAddr()); + pRD->ControlPC = m_MachState.GetRetAddr(); + pRD->Esp = (DWORD) m_MachState.esp(); + + RETURN; +} + +#ifdef _DEBUG_IMPL +// Confirm that if the machine state was not initialized, then +// any unspilled callee saved registers did not change +EXTERN_C MachState* STDCALL HelperMethodFrameConfirmState(HelperMethodFrame* frame, void* esiVal, void* ediVal, void* ebxVal, void* ebpVal) + { + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + DEBUG_ONLY; + } + CONTRACTL_END; + + MachState* state = frame->MachineState(); + + // if we've already executed this check once for this helper method frame then + // we don't do the check again because it is very expensive. + if (frame->HaveDoneConfirmStateCheck()) + { + return state; + } + + // probe to avoid a kazillion violations in the code that follows. + BEGIN_DEBUG_ONLY_CODE; + if (!state->isValid()) + { + frame->InsureInit(false, NULL); + _ASSERTE(state->_pEsi != &state->_esi || state->_esi == (TADDR)esiVal); + _ASSERTE(state->_pEdi != &state->_edi || state->_edi == (TADDR)ediVal); + _ASSERTE(state->_pEbx != &state->_ebx || state->_ebx == (TADDR)ebxVal); + _ASSERTE(state->_pEbp != &state->_ebp || state->_ebp == (TADDR)ebpVal); + } + END_DEBUG_ONLY_CODE; + + // set that we have executed this check once for this helper method frame. + frame->SetHaveDoneConfirmStateCheck(); + + return state; +} +#endif + +void ExternalMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + HOST_NOCALLS; + SUPPORTS_DAC; + } + CONTRACT_END; + + UpdateRegDisplayHelper(pRD, CbStackPopUsingGCRefMap(GetGCRefMap())); + + RETURN; +} + + +void StubDispatchFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + HOST_NOCALLS; + SUPPORTS_DAC; + } + CONTRACT_END; + + PTR_BYTE pGCRefMap = GetGCRefMap(); + if (pGCRefMap != NULL) + { + UpdateRegDisplayHelper(pRD, CbStackPopUsingGCRefMap(pGCRefMap)); + } + else + if (GetFunction() != NULL) + { + FramedMethodFrame::UpdateRegDisplay(pRD); + } + else + { + UpdateRegDisplayHelper(pRD, 0); + + // If we do not have owning MethodDesc, we need to pretend that + // the call happened on the call instruction to get the ESP unwound properly. + // + // This path is hit when we are throwing null reference exception from + // code:VSD_ResolveWorker or code:StubDispatchFixupWorker + pRD->ControlPC = GetAdjustedCallAddress(pRD->ControlPC); + } + + RETURN; +} + +PCODE StubDispatchFrame::GetReturnAddress() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + PCODE retAddress = FramedMethodFrame::GetReturnAddress(); + if (GetFunction() == NULL && GetGCRefMap() == NULL) + { + // See comment in code:StubDispatchFrame::UpdateRegDisplay + retAddress = GetAdjustedCallAddress(retAddress); + } + return retAddress; +} + +void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + HOST_NOCALLS; + SUPPORTS_DAC; + } + CONTRACT_END; + + CalleeSavedRegisters* regs = GetCalleeSavedRegisters(); + + // reset pContext; it's only valid for active (top-most) frame + pRD->pContext = NULL; + + pRD->pEdi = (DWORD*) ®s->edi; + pRD->pEsi = (DWORD*) ®s->esi; + pRD->pEbx = (DWORD*) ®s->ebx; + pRD->pEbp = (DWORD*) ®s->ebp; + pRD->PCTAddr = GetReturnAddressPtr(); + pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr); + pRD->Esp = m_Esp; + RETURN; +} + +void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + // We should skip over InlinedCallFrame if it is not active. + // It will be part of a JITed method's frame, and the stack-walker + // can handle such a case. +#ifdef PROFILING_SUPPORTED + PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this)); +#endif + HOST_NOCALLS; + MODE_ANY; + SUPPORTS_DAC; + } + CONTRACT_END; + + // @TODO: Remove this after the debugger is fixed to avoid stack-walks from bad places + // @TODO: This may be still needed for sampling profilers + if (!InlinedCallFrame::FrameHasActiveCall(this)) + { + LOG((LF_CORDB, LL_ERROR, "WARNING: InlinedCallFrame::UpdateRegDisplay called on inactive frame %p\n", this)); + return; + } + + DWORD stackArgSize = (DWORD) dac_cast(m_Datum); + + if (stackArgSize & ~0xFFFF) + { + NDirectMethodDesc * pMD = PTR_NDirectMethodDesc(m_Datum); + + /* if this is not an NDirect frame, something is really wrong */ + + _ASSERTE(pMD->SanityCheck() && pMD->IsNDirect()); + + stackArgSize = pMD->GetStackArgumentSize(); + } + + // reset pContext; it's only valid for active (top-most) frame + pRD->pContext = NULL; + + + pRD->pEbp = (DWORD*) &m_pCalleeSavedFP; + + /* The return address is just above the "ESP" */ + pRD->PCTAddr = PTR_HOST_MEMBER_TADDR(InlinedCallFrame, this, + m_pCallerReturnAddress); + pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr); + + /* Now we need to pop off the outgoing arguments */ + pRD->Esp = (DWORD) dac_cast(m_pCallSiteSP) + stackArgSize; + RETURN; +} + +#ifdef FEATURE_HIJACK +//========================== +// Resumable Exception Frame +// +TADDR ResumableFrame::GetReturnAddressPtr() +{ + LIMITED_METHOD_DAC_CONTRACT; + return dac_cast(m_Regs) + offsetof(CONTEXT, Eip); +} + +void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + HOST_NOCALLS; + SUPPORTS_DAC; + } + CONTRACT_END; + + // reset pContext; it's only valid for active (top-most) frame + pRD->pContext = NULL; + + CONTEXT* pUnwoundContext = m_Regs; + +#if !defined(DACCESS_COMPILE) + // "pContextForUnwind" field is only used on X86 since not only is it initialized just for it, + // but its used only under the confines of STACKWALKER_MAY_POP_FRAMES preprocessor define, + // which is defined for x86 only (refer to its definition in stackwalk.cpp). + if (pRD->pContextForUnwind != NULL) + { + pUnwoundContext = pRD->pContextForUnwind; + + pUnwoundContext->Eax = m_Regs->Eax; + pUnwoundContext->Ecx = m_Regs->Ecx; + pUnwoundContext->Edx = m_Regs->Edx; + + pUnwoundContext->Edi = m_Regs->Edi; + pUnwoundContext->Esi = m_Regs->Esi; + pUnwoundContext->Ebx = m_Regs->Ebx; + pUnwoundContext->Ebp = m_Regs->Ebp; + pUnwoundContext->Eip = m_Regs->Eip; + } +#endif // !defined(DACCESS_COMPILE) + + pRD->pEax = &pUnwoundContext->Eax; + pRD->pEcx = &pUnwoundContext->Ecx; + pRD->pEdx = &pUnwoundContext->Edx; + + pRD->pEdi = &pUnwoundContext->Edi; + pRD->pEsi = &pUnwoundContext->Esi; + pRD->pEbx = &pUnwoundContext->Ebx; + pRD->pEbp = &pUnwoundContext->Ebp; + + pRD->ControlPC = pUnwoundContext->Eip; + pRD->PCTAddr = dac_cast(m_Regs) + offsetof(CONTEXT, Eip); + + pRD->Esp = m_Regs->Esp; + + RETURN; +} + +// The HijackFrame has to know the registers that are pushed by OnHijackTripThread +void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + HOST_NOCALLS; + SUPPORTS_DAC; + } + CONTRACTL_END; + + // This only describes the top-most frame + pRD->pContext = NULL; + + pRD->pEdi = &m_Args->Edi; + pRD->pEsi = &m_Args->Esi; + pRD->pEbx = &m_Args->Ebx; + pRD->pEdx = &m_Args->Edx; + pRD->pEcx = &m_Args->Ecx; + pRD->pEax = &m_Args->Eax; + + pRD->pEbp = &m_Args->Ebp; + pRD->PCTAddr = dac_cast(m_Args) + offsetof(HijackArgs, Eip); + pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr); + pRD->Esp = (DWORD)(pRD->PCTAddr + sizeof(TADDR)); +} + +#endif // FEATURE_HIJACK + +void PInvokeCalliFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + HOST_NOCALLS; + SUPPORTS_DAC; + } + CONTRACT_END; + + VASigCookie *pVASigCookie = GetVASigCookie(); + UpdateRegDisplayHelper(pRD, pVASigCookie->sizeOfArgs+sizeof(int)); + + RETURN; +} + +void TailCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + HOST_NOCALLS; + SUPPORTS_DAC; + } + CONTRACT_END; + + // reset pContext; it's only valid for active (top-most) frame + pRD->pContext = NULL; + + pRD->pEdi = (DWORD*)&m_regs.edi; + pRD->pEsi = (DWORD*)&m_regs.esi; + pRD->pEbx = (DWORD*)&m_regs.ebx; + pRD->pEbp = (DWORD*)&m_regs.ebp; + + pRD->PCTAddr = GetReturnAddressPtr(); + pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr); + pRD->Esp = (DWORD)(pRD->PCTAddr + sizeof(TADDR)); + + RETURN; +} + +//------------------------------------------------------------------------ +// This is declared as returning WORD instead of PRD_TYPE because of +// header issues with cgencpu.h including dbginterface.h. +WORD GetUnpatchedCodeData(LPCBYTE pAddr) +{ +#ifndef _TARGET_X86_ +#error Make sure this works before porting to platforms other than x86. +#endif + CONTRACT(WORD) { + NOTHROW; + GC_NOTRIGGER; + PRECONDITION(CORDebuggerAttached()); + PRECONDITION(CheckPointer(pAddr)); + SO_TOLERANT; + } CONTRACT_END; + + // Ordering is because x86 is little-endien. + BYTE bLow = pAddr[0]; + BYTE bHigh = pAddr[1]; + +#ifndef DACCESS_COMPILE + // Need to make sure that the code we're reading is free of breakpoint patches. + PRD_TYPE unpatchedOpcode; + if (g_pDebugInterface->CheckGetPatchedOpcode((CORDB_ADDRESS_TYPE *)pAddr, + &unpatchedOpcode)) + { + // PRD_TYPE is supposed to be an opaque debugger structure representing data to remove a patch. + // Although PRD_TYPE is currently typedef'ed to be a DWORD_PTR, it's actually semantically just a BYTE. + // (since a patch on x86 is just an 0xCC instruction). + // Ideally, the debugger subsystem would expose a patch-code stripper that returns BYTE/WORD/etc, and + // not force us to crack it ourselves here. + bLow = (BYTE) unpatchedOpcode; + } + // +#endif + + WORD w = bLow + (bHigh << 8); + RETURN w; +} + + +#ifndef DACCESS_COMPILE + +//------------------------------------------------------------------------- +// One-time creation of special prestub to initialize UMEntryThunks. +//------------------------------------------------------------------------- +Stub *GenerateUMThunkPrestub() +{ + CONTRACT(Stub*) + { + STANDARD_VM_CHECK; + POSTCONDITION(CheckPointer(RETVAL)); + } + CONTRACT_END; + + CPUSTUBLINKER sl; + CPUSTUBLINKER *psl = &sl; + + CodeLabel* rgRareLabels[] = { psl->NewCodeLabel(), + psl->NewCodeLabel(), + psl->NewCodeLabel() + }; + + + CodeLabel* rgRejoinLabels[] = { psl->NewCodeLabel(), + psl->NewCodeLabel(), + psl->NewCodeLabel() + }; + + // emit the initial prolog + psl->EmitComMethodStubProlog(UMThkCallFrame::GetMethodFrameVPtr(), rgRareLabels, rgRejoinLabels, FALSE /*Don't profile*/); + + // mov ecx, [esi+UMThkCallFrame.pUMEntryThunk] + psl->X86EmitIndexRegLoad(kECX, kESI, UMThkCallFrame::GetOffsetOfUMEntryThunk()); + + // The call conv is a __stdcall + psl->X86EmitPushReg(kECX); + + // call UMEntryThunk::DoRunTimeInit + psl->X86EmitCall(psl->NewExternalCodeLabel((LPVOID)UMEntryThunk::DoRunTimeInit), 4); + + // mov ecx, [esi+UMThkCallFrame.pUMEntryThunk] + psl->X86EmitIndexRegLoad(kEAX, kESI, UMThkCallFrame::GetOffsetOfUMEntryThunk()); + + // lea eax, [eax + UMEntryThunk.m_code] // point to fixedup UMEntryThunk + psl->X86EmitOp(0x8d, kEAX, kEAX, + UMEntryThunk::GetCodeOffset() + UMEntryThunkCode::GetEntryPointOffset()); + + psl->EmitComMethodStubEpilog(UMThkCallFrame::GetMethodFrameVPtr(), rgRareLabels, rgRejoinLabels, FALSE /*Don't profile*/); + + RETURN psl->Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()); +} + +Stub *GenerateInitPInvokeFrameHelper() +{ + CONTRACT(Stub*) + { + STANDARD_VM_CHECK; + POSTCONDITION(CheckPointer(RETVAL)); + } + CONTRACT_END; + + CPUSTUBLINKER sl; + CPUSTUBLINKER *psl = &sl; + + CORINFO_EE_INFO::InlinedCallFrameInfo FrameInfo; + InlinedCallFrame::GetEEInfo(&FrameInfo); + + // EDI contains address of the frame on stack (the frame ptr, not its negspace) + unsigned negSpace = FrameInfo.offsetOfFrameVptr; + + // mov esi, GetThread() + psl->X86EmitCurrentThreadFetch(kESI, (1<X86EmitOffsetModRM(0xc7, (X86Reg)0x0, kEDI, FrameInfo.offsetOfGSCookie - negSpace); + psl->Emit32(GetProcessGSCookie()); + + // mov [edi + FrameInfo.offsetOfFrameVptr], InlinedCallFrame::GetFrameVtable() + psl->X86EmitOffsetModRM(0xc7, (X86Reg)0x0, kEDI, FrameInfo.offsetOfFrameVptr - negSpace); + psl->Emit32(InlinedCallFrame::GetMethodFrameVPtr()); + + // mov eax, [esi + offsetof(Thread, m_pFrame)] + // mov [edi + FrameInfo.offsetOfFrameLink], eax + psl->X86EmitIndexRegLoad(kEAX, kESI, offsetof(Thread, m_pFrame)); + psl->X86EmitIndexRegStore(kEDI, FrameInfo.offsetOfFrameLink - negSpace, kEAX); + + // mov [edi + FrameInfo.offsetOfCalleeSavedEbp], ebp + psl->X86EmitIndexRegStore(kEDI, FrameInfo.offsetOfCalleeSavedFP - negSpace, kEBP); + + // mov [edi + FrameInfo.offsetOfReturnAddress], 0 + psl->X86EmitOffsetModRM(0xc7, (X86Reg)0x0, kEDI, FrameInfo.offsetOfReturnAddress - negSpace); + psl->Emit32(0); + + // mov [esi + offsetof(Thread, m_pFrame)], edi + psl->X86EmitIndexRegStore(kESI, offsetof(Thread, m_pFrame), kEDI); + + // leave current Thread in ESI + psl->X86EmitReturn(0); + + // A single process-wide stub that will never unload + RETURN psl->Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()); +} + +#ifdef FEATURE_INCLUDE_ALL_INTERFACES + +static void STDCALL LeaveRuntimeHelperWithFrame (Thread *pThread, size_t target, Frame *pFrame) +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; + MODE_PREEMPTIVE; + ENTRY_POINT; + } + CONTRACTL_END; + + Thread::LeaveRuntimeThrowComplus(target); + GCX_COOP_THREAD_EXISTS(pThread); + pFrame->Push(pThread); + +} + +static void STDCALL EnterRuntimeHelperWithFrame (Thread *pThread, Frame *pFrame) +{ + // make sure we restore the original Win32 last error before leaving this function - we are + // called right after returning from the P/Invoke target and the error has not been saved yet + BEGIN_PRESERVE_LAST_ERROR; + + CONTRACTL + { + THROWS; + GC_TRIGGERS; + MODE_PREEMPTIVE; + ENTRY_POINT; + } + CONTRACTL_END; + + { + HRESULT hr = Thread::EnterRuntimeNoThrow(); + GCX_COOP_THREAD_EXISTS(pThread); + if (FAILED(hr)) + { + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + ThrowHR (hr); + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + } + + pFrame->Pop(pThread); + } + + END_PRESERVE_LAST_ERROR; +} + +// "ip" is the return address +// This function disassembles the code at the return address to determine +// how many arguments to pop off. +// Returns the number of DWORDs that should be popped off on return. + +static int STDCALL GetStackSizeForVarArgCall(BYTE* ip) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + } + CONTRACTL_END; + + int retValue = 0; + //BEGIN_ENTRYPOINT_VOIDRET; + + // The instruction immediately following the call may be a move into esp used for + // P/Invoke stack resilience. For caller-pop calls it's always mov esp, [ebp-n]. + if (ip[0] == 0x8b) + { + if (ip[1] == 0x65) + { + // mov esp, [ebp+disp8] + ip += 3; + } + else if (ip[1] == 0xa5) + { + // mov esp, [ebp+disp32] + ip += 6; + } + } + + if (ip[0] == 0x81 && ip[1] == 0xc4) + { + // add esp, imm32 + retValue = (*(int*)&ip[2])/4; + } + else if (ip[0] == 0x83 && ip[1] == 0xc4) + { + // add esp, imm8 + retValue = ip[2]/4; + } + else if (ip[0] == 0x59) + { + // pop ecx + retValue = 1; + } + else + { + retValue = 0; + } + //END_ENTRYPOINT_VOIDRET; + return retValue; +} + +void LeaveRuntimeStackProbeOnly() +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + ENTRY_POINT; + } + CONTRACTL_END; + +#ifdef FEATURE_STACK_PROBE + RetailStackProbe(ADJUST_PROBE(DEFAULT_ENTRY_PROBE_AMOUNT)); +#endif +} + +//----------------------------------------------------------------------------- +// Hosting stub for calls from CLR code to unmanaged code +// +// We push a LeaveRuntimeFrame, and then re-push all the arguments. +// Note that we have to support all the different native calling conventions +// viz. stdcall, thiscall, cdecl, varargs + +#if 0 + +This is a diagramatic description of what the stub does: + + (lower addresses) + + | | + +----------------+ <--- ESP + | | + | copied | + | arguments | + | | + | | + +----------------+ + | EDX | + | ECX | + +----------------+ +| | | GSCookie | +| | +----------------+ <--- ESI +| | | vptr | +| | +----------------+ +| | | m_Next | +| | +----------------+ +| | | EDI | Scratch register +| | | ESI | For LeaveRuntimeFrame* +| | | EBX | For Thread* +| | +----------------+ <--- EBP +| | | EBP | ++----------------+ <---ESP +----------------+ +| ret addr | | ret addr | ++----------------+ +----------------+ +| | | | +| arguments | | arguments | +| | | | +| | | | ++----------------+ +----------------+ +| | | | +| caller's frame | | caller's frame | +| | | | + + (higher addresses) + + Stack on entry Stack before the call + to this stub. to unmanaged code. + +#endif + +//----------------------------------------------------------------------------- +// This the layout of the frame of the stub + +struct StubForHostStackFrame +{ + LPVOID m_outgingArgs[1]; + ArgumentRegisters m_argumentRegisters; + GSCookie m_gsCookie; + LeaveRuntimeFrame m_LeaveRuntimeFrame; + CalleeSavedRegisters m_calleeSavedRegisters; + LPVOID m_retAddr; + LPVOID m_incomingArgs[1]; + +public: + + // Where does the FP/EBP point to? + static INT32 GetFPpositionOffset() + { + LIMITED_METHOD_CONTRACT; + return offsetof(StubForHostStackFrame, m_calleeSavedRegisters) + + offsetof(CalleeSavedRegisters, ebp); + } + + static INT32 GetFPrelOffsOfArgumentRegisters() + { + LIMITED_METHOD_CONTRACT; + return offsetof(StubForHostStackFrame, m_argumentRegisters) - GetFPpositionOffset(); + } + + static INT32 GetFPrelOffsOfCalleeSavedRegisters() + { + LIMITED_METHOD_CONTRACT; + return offsetof(StubForHostStackFrame, m_calleeSavedRegisters) - GetFPpositionOffset(); + } + + static INT32 GetFPrelOffsOfRetAddr() + { + LIMITED_METHOD_CONTRACT; + return offsetof(StubForHostStackFrame, m_retAddr) - GetFPpositionOffset(); + } + + static INT32 GetFPrelOffsOfIncomingArgs() + { + LIMITED_METHOD_CONTRACT; + return offsetof(StubForHostStackFrame, m_incomingArgs) - GetFPpositionOffset(); + } +}; + +static Stub *GenerateStubForHostWorker(LoaderHeap *pHeap, + LPVOID pNativeTarget, // NULL to fetch from the last pushed argument (COM) + Stub *pInnerStub, // stub to call instead of pNativeTarget, or NULL + LONG dwComSlot, // only valid if pNativeTarget is NULL + WORD wStackArgumentSize, // -1 for varargs + WORD wStackPopSize) // 0 for cdecl +{ + STANDARD_VM_CONTRACT; + + // We need to call LeaveRuntime before the target, and EnterRuntime after the target + CPUSTUBLINKER sl; + + sl.X86EmitPushEBPframe(); + + // save EBX, ESI, EDI + sl.X86EmitPushReg(kEBX); + sl.X86EmitPushReg(kESI); + sl.X86EmitPushReg(kEDI); + + // Frame + sl.X86EmitPushReg(kDummyPushReg); // m_Next + sl.X86EmitPushImm32((UINT)(size_t)LeaveRuntimeFrame::GetMethodFrameVPtr()); + + // mov esi, esp; esi is Frame + sl.X86EmitMovRegSP(kESI); + + sl.X86EmitPushImmPtr((LPVOID)GetProcessGSCookie()); + + // Save outgoing arguments on the stack + sl.X86EmitPushReg(kECX); + sl.X86EmitPushReg(kEDX); + + INT32 offs = 0; + if (wStackArgumentSize == (WORD)-1) + { + // Re-push the return address as an argument to GetStackSizeForVarArgCall() + // This will return the number of stack arguments (in DWORDs) + sl.X86EmitIndexPush(kEBP, StubForHostStackFrame::GetFPrelOffsOfRetAddr()); + sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID)GetStackSizeForVarArgCall), 4); + + // We generate the following code sequence to re-push all the arguments + // + // Note that we cannot use "sub ESP, EAX" as ESP might jump past the + // stack guard-page. + // + // cmp EAX, 0 + // LoopTop: + // jz LoopDone + // push dword ptr[EBP + EAX*4 + 4] + // sub EAX, 1 + // jmp LoopTop + // LoopDone: + // ... + + sl.X86EmitCmpRegImm32(kEAX, 0); + CodeLabel * pLoopTop = sl.EmitNewCodeLabel(); + CodeLabel * pLoopDone = sl.NewCodeLabel(); + sl.X86EmitCondJump(pLoopDone, X86CondCode::kJZ); + sl.X86EmitBaseIndexPush(kEBP, kEAX, 4, StubForHostStackFrame::GetFPrelOffsOfIncomingArgs() - sizeof(LPVOID)); + sl.X86EmitSubReg(kEAX, 1); + sl.X86EmitNearJump(pLoopTop); + sl.EmitLabel(pLoopDone); + } + else + { + offs = StubForHostStackFrame::GetFPrelOffsOfIncomingArgs() + wStackArgumentSize; + + int numStackSlots = wStackArgumentSize / sizeof(LPVOID); + for (int i = 0; i < numStackSlots; i++) { + offs -= sizeof(LPVOID); + sl.X86EmitIndexPush(kEBP, offs); + } + } + + //------------------------------------------------------------------------- + + // EBX has Thread* + // X86TLSFetch_TRASHABLE_REGS will get trashed + sl.X86EmitCurrentThreadFetch(kEBX, 0); + + if (pNativeTarget != NULL) + { + // push Frame + sl.X86EmitPushReg(kESI); + + // push target + if (pNativeTarget == (LPVOID)-1) + { + // target comes right above arguments + sl.X86EmitIndexPush(kEBP, StubForHostStackFrame::GetFPrelOffsOfIncomingArgs() + wStackArgumentSize); + } + else + { + // target is fixed + sl.X86EmitPushImm32((UINT)(size_t)pNativeTarget); + } + } + else + { + // mov eax, [first_arg] + // mov eax, [eax] + // push [eax + slot_offset] + sl.X86EmitIndexRegLoad(kEAX, kEBP, offs); + sl.X86EmitIndexRegLoad(kEAX, kEAX, 0); + sl.X86EmitIndexPush(kEAX, sizeof(LPVOID) * dwComSlot); + + // push Frame + sl.X86EmitPushReg(kESI); + // push [esp + 4] + sl.X86EmitEspOffset(0xff, (X86Reg)6, 4); + } + + // push Thread + sl.X86EmitPushReg(kEBX); + sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID)LeaveRuntimeHelperWithFrame), 0xc); + + //------------------------------------------------------------------------- + // call NDirect + // See diagram above to see what the stack looks like at this point + + // Restore outgoing arguments + unsigned offsToArgRegs = StubForHostStackFrame::GetFPrelOffsOfArgumentRegisters(); + sl.X86EmitIndexRegLoad(kECX, kEBP, offsToArgRegs + offsetof(ArgumentRegisters, ECX)); + sl.X86EmitIndexRegLoad(kEDX, kEBP, offsToArgRegs + offsetof(ArgumentRegisters, EDX)); + + if (pNativeTarget != NULL || pInnerStub != NULL) + { + if (pNativeTarget == (LPVOID)-1) + { + // mov eax, target + sl.X86EmitIndexRegLoad(kEAX, kEBP, StubForHostStackFrame::GetFPrelOffsOfIncomingArgs() + wStackArgumentSize); + // call eax + sl.Emit16(X86_INSTR_CALL_EAX); + } + else + { + if (pNativeTarget == NULL) + { + // pop target and discard it (we go to the inner stub) + _ASSERTE(pInnerStub != NULL); + sl.X86EmitPopReg(kEAX); + } + + LPVOID pTarget = (pInnerStub != NULL ? (LPVOID)pInnerStub->GetEntryPoint() : pNativeTarget); + sl.X86EmitCall(sl.NewExternalCodeLabel(pTarget), wStackPopSize / 4); + } + } + else + { + // pop target + sl.X86EmitPopReg(kEAX); + // call eax + sl.Emit16(X86_INSTR_CALL_EAX); + } + + //------------------------------------------------------------------------- + // Save return value registers and call EnterRuntimeHelperWithFrame + // + + sl.X86EmitPushReg(kEAX); + sl.X86EmitPushReg(kEDX); + + // push Frame + sl.X86EmitPushReg(kESI); + // push Thread + sl.X86EmitPushReg(kEBX); + // call EnterRuntime + sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID)EnterRuntimeHelperWithFrame), 8); + + sl.X86EmitPopReg(kEDX); + sl.X86EmitPopReg(kEAX); + + //------------------------------------------------------------------------- + // Tear down the frame + // + + sl.EmitCheckGSCookie(kESI, LeaveRuntimeFrame::GetOffsetOfGSCookie()); + + // lea esp, [ebp - offsToCalleeSavedRegs] + unsigned offsToCalleeSavedRegs = StubForHostStackFrame::GetFPrelOffsOfCalleeSavedRegisters(); + sl.X86EmitIndexLea((X86Reg)kESP_Unsafe, kEBP, offsToCalleeSavedRegs); + + sl.X86EmitPopReg(kEDI); + sl.X86EmitPopReg(kESI); + sl.X86EmitPopReg(kEBX); + + sl.X86EmitPopReg(kEBP); + + // ret [wStackPopSize] + sl.X86EmitReturn(wStackPopSize); + + if (pInnerStub != NULL) + { + // this stub calls another stub + return sl.LinkInterceptor(pHeap, pInnerStub, pNativeTarget); + } + else + { + return sl.Link(pHeap); + } +} + + +//----------------------------------------------------------------------------- +Stub *NDirectMethodDesc::GenerateStubForHost(LPVOID pNativeTarget, Stub *pInnerStub) +{ + STANDARD_VM_CONTRACT; + + // We need to call LeaveRuntime before the target, and EnterRuntime after the target + + if (IsQCall()) + { + // We need just the stack probe for QCalls + CPUSTUBLINKER sl; + sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID)LeaveRuntimeStackProbeOnly), 0); + + sl.X86EmitNearJump(sl.NewExternalCodeLabel((LPVOID)pNativeTarget)); + + return sl.Link(GetLoaderAllocator()->GetStubHeap()); + } + + WORD wArgSize = (IsVarArgs() ? (WORD)-1 : GetStackArgumentSize()); + WORD wPopSize = ((IsStdCall() || IsThisCall()) ? GetStackArgumentSize() : 0); + + return GenerateStubForHostWorker(GetDomain()->GetLoaderAllocator()->GetStubHeap(), + pNativeTarget, + pInnerStub, + 0, + wArgSize, + wPopSize); +} + + +#ifdef FEATURE_COMINTEROP + +//----------------------------------------------------------------------------- +Stub *ComPlusCallInfo::GenerateStubForHost(LoaderHeap *pHeap, Stub *pInnerStub) +{ + STANDARD_VM_CONTRACT; + + WORD wArgSize = GetStackArgumentSize(); + + return GenerateStubForHostWorker(pHeap, + NULL, + pInnerStub, + m_cachedComSlot, + wArgSize, + wArgSize); // always stdcall +} + +#endif // FEATURE_COMINTEROP + +//----------------------------------------------------------------------------- +// static +Stub *COMDelegate::GenerateStubForHost(MethodDesc *pInvokeMD, MethodDesc *pStubMD, LPVOID pNativeTarget, Stub *pInnerStub) +{ + STANDARD_VM_CONTRACT; + + // get unmanaged calling convention from pInvokeMD's metadata + PInvokeStaticSigInfo sigInfo(pInvokeMD); + CorPinvokeMap callConv = sigInfo.GetCallConv(); + + WORD wArgSize = pStubMD->AsDynamicMethodDesc()->GetNativeStackArgSize(); + WORD wPopSize = (callConv == pmCallConvCdecl ? 0 : wArgSize); + + return GenerateStubForHostWorker(NULL, // we want to free this stub when the delegate dies + pNativeTarget, + pInnerStub, + 0, + wArgSize, + wPopSize); +} + +//----------------------------------------------------------------------------- +// static +Stub *NDirect::GenerateStubForHost(Module *pModule, CorUnmanagedCallingConvention callConv, WORD wArgSize) +{ + STANDARD_VM_CONTRACT; + + // This one is for unmanaged CALLI where the target is passed as last argument + // (first pushed to stack) + + WORD wPopSize = (callConv == IMAGE_CEE_CS_CALLCONV_C ? 0 : (wArgSize + STACK_ELEM_SIZE)); + + return GenerateStubForHostWorker(pModule->GetDomain()->GetLoaderAllocator()->GetStubHeap(), + (LPVOID)-1, + NULL, + 0, + wArgSize, + wPopSize); +} + +#endif // FEATURE_INCLUDE_ALL_INTERFACES + + +#ifdef MDA_SUPPORTED + +//----------------------------------------------------------------------------- +Stub *NDirectMethodDesc::GenerateStubForMDA(LPVOID pNativeTarget, Stub *pInnerStub, BOOL fCalledByStub) +{ + STANDARD_VM_CONTRACT; + + CPUSTUBLINKER sl; + sl.X86EmitPushEBPframe(); + + DWORD callConv = (DWORD)(IsThisCall() ? pmCallConvThiscall : (IsStdCall() ? pmCallConvStdcall : pmCallConvCdecl)); + _ASSERTE((callConv & StackImbalanceCookie::HAS_FP_RETURN_VALUE) == 0); + + MetaSig msig(this); + if (msig.HasFPReturn()) + { + // check for the HRESULT swapping impl flag + DWORD dwImplFlags; + IfFailThrow(GetMDImport()->GetMethodImplProps(GetMemberDef(), NULL, &dwImplFlags)); + + if (dwImplFlags & miPreserveSig) + { + // pass a flag to PInvokeStackImbalanceHelper that it should save & restore FPU return value + callConv |= StackImbalanceCookie::HAS_FP_RETURN_VALUE; + } + } + + // init StackImbalanceCookie + sl.X86EmitPushReg(kEAX); // m_dwSavedEsp (just making space) + sl.X86EmitPushImm32(callConv); // m_callConv + + if (IsVarArgs()) + { + // Re-push the return address as an argument to GetStackSizeForVarArgCall() + if (fCalledByStub) + { + // We will be called by another stub that doesn't know the stack size, + // so we need to skip a frame to get to the managed caller. + sl.X86EmitIndexRegLoad(kEAX, kEBP, 0); + sl.X86EmitIndexPush(kEAX, 4); + } + else + { + sl.X86EmitIndexPush(kEBP, 4); + } + + // This will return the number of stack arguments (in DWORDs) + sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID)GetStackSizeForVarArgCall), 4); + + // shl eax,2 + sl.Emit16(0xe0c1); + sl.Emit8(0x02); + + sl.X86EmitPushReg(kEAX); // m_dwStackArgSize + } + else + { + sl.X86EmitPushImm32(GetStackArgumentSize()); // m_dwStackArgSize + } + + LPVOID pTarget = (pInnerStub != NULL ? (LPVOID)pInnerStub->GetEntryPoint() : pNativeTarget); + sl.X86EmitPushImmPtr(pTarget); // m_pTarget + sl.X86EmitPushImmPtr(this); // m_pMD + + // stack layout at this point + + // | ... | + // | stack arguments | EBP + 8 + // +-----------------------+ + // | return address | EBP + 4 + // +-----------------------+ + // | saved EBP | EBP + 0 + // +-----------------------+ + // | SIC::m_dwSavedEsp | + // | SIC::m_callConv | + // | SIC::m_dwStackArgSize | + // | SIC::m_pTarget | + // | SIC::m_pMD | EBP - 20 + // ------------------------ + + // call the helper + sl.X86EmitCall(sl.NewExternalCodeLabel(PInvokeStackImbalanceHelper), sizeof(StackImbalanceCookie)); + + // pop StackImbalanceCookie + sl.X86EmitMovSPReg(kEBP); + + sl.X86EmitPopReg(kEBP); + sl.X86EmitReturn((IsStdCall() || IsThisCall()) ? GetStackArgumentSize() : 0); + + if (pInnerStub) + { + return sl.LinkInterceptor(GetLoaderAllocator()->GetStubHeap(), pInnerStub, pNativeTarget); + } + else + { + return sl.Link(GetLoaderAllocator()->GetStubHeap()); + } +} + +//----------------------------------------------------------------------------- +// static +Stub *COMDelegate::GenerateStubForMDA(MethodDesc *pInvokeMD, MethodDesc *pStubMD, LPVOID pNativeTarget, Stub *pInnerStub) +{ + STANDARD_VM_CONTRACT; + + WORD wStackArgSize = pStubMD->AsDynamicMethodDesc()->GetNativeStackArgSize(); + + // get unmanaged calling convention from pInvokeMD's metadata + PInvokeStaticSigInfo sigInfo(pInvokeMD); + DWORD callConv = (DWORD)sigInfo.GetCallConv(); + _ASSERTE((callConv & StackImbalanceCookie::HAS_FP_RETURN_VALUE) == 0); + + MetaSig msig(pInvokeMD); + if (msig.HasFPReturn()) + { + // pass a flag to PInvokeStackImbalanceHelper that it should save & restore FPU return value + callConv |= StackImbalanceCookie::HAS_FP_RETURN_VALUE; + } + + CPUSTUBLINKER sl; + sl.X86EmitPushEBPframe(); + + LPVOID pTarget = (pInnerStub != NULL ? (LPVOID)pInnerStub->GetEntryPoint() : pNativeTarget); + + // init StackImbalanceCookie + sl.X86EmitPushReg(kEAX); // m_dwSavedEsp (just making space) + sl.X86EmitPushImm32(callConv); // m_callConv + sl.X86EmitPushImm32(wStackArgSize); // m_dwStackArgSize + sl.X86EmitPushImmPtr(pTarget); // m_pTarget + sl.X86EmitPushImmPtr(pInvokeMD); // m_pMD + + // stack layout at this point + + // | ... | + // | stack arguments | EBP + 8 + // +-----------------------+ + // | return address | EBP + 4 + // +-----------------------+ + // | saved EBP | EBP + 0 + // +-----------------------+ + // | SIC::m_dwSavedEsp | + // | SIC::m_callConv | + // | SIC::m_dwStackArgSize | + // | SIC::m_pTarget | + // | SIC::m_pMD | EBP - 20 + // ------------------------ + + // call the helper + sl.X86EmitCall(sl.NewExternalCodeLabel(PInvokeStackImbalanceHelper), sizeof(StackImbalanceCookie)); + + // pop StackImbalanceCookie + sl.X86EmitMovSPReg(kEBP); + + sl.X86EmitPopReg(kEBP); + sl.X86EmitReturn(callConv == pmCallConvCdecl ? 0 : wStackArgSize); + + if (pInnerStub != NULL) + { + return sl.LinkInterceptor(pInnerStub, pNativeTarget); + } + else + { + return sl.Link(); // don't use loader heap as we want to be able to free the stub + } +} + +#endif // MDA_SUPPORTED + +extern "C" VOID STDCALL StubRareEnableWorker(Thread *pThread) +{ + WRAPPER_NO_CONTRACT; + + //printf("RareEnable\n"); + pThread->RareEnablePreemptiveGC(); +} + + + + +// Disable when calling into managed code from a place that fails via Exceptions +extern "C" VOID STDCALL StubRareDisableTHROWWorker(Thread *pThread) +{ + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + + // Do not add a CONTRACT here. We haven't set up SEH. We rely + // on HandleThreadAbort and COMPlusThrowBoot dealing with this situation properly. + + // WARNING!!!! + // when we start executing here, we are actually in cooperative mode. But we + // haven't synchronized with the barrier to reentry yet. So we are in a highly + // dangerous mode. If we call managed code, we will potentially be active in + // the GC heap, even as GC's are occuring! + + // Check for ShutDown scenario. This happens only when we have initiated shutdown + // and someone is trying to call in after the CLR is suspended. In that case, we + // must either raise an unmanaged exception or return an HRESULT, depending on the + // expectations of our caller. + if (!CanRunManagedCode()) + { + // DO NOT IMPROVE THIS EXCEPTION! It cannot be a managed exception. It + // cannot be a real exception object because we cannot execute any managed + // code here. + pThread->m_fPreemptiveGCDisabled = 0; + COMPlusThrowBoot(E_PROCESS_SHUTDOWN_REENTRY); + } + + // We must do the following in this order, because otherwise we would be constructing + // the exception for the abort without synchronizing with the GC. Also, we have no + // CLR SEH set up, despite the fact that we may throw a ThreadAbortException. + pThread->RareDisablePreemptiveGC(); + pThread->HandleThreadAbort(); +} + +// Note that this logic is copied below, in PopSEHRecords +__declspec(naked) +VOID __cdecl PopSEHRecords(LPVOID pTargetSP) +{ + // No CONTRACT possible on naked functions + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + + __asm{ + mov ecx, [esp+4] ;; ecx <- pTargetSP + mov eax, fs:[0] ;; get current SEH record + poploop: + cmp eax, ecx + jge done + mov eax, [eax] ;; get next SEH record + jmp poploop + done: + mov fs:[0], eax + retn + } +} + +////////////////////////////////////////////////////////////////////////////// +// +// JITInterface +// +////////////////////////////////////////////////////////////////////////////// + +/*********************************************************************/ +#ifdef EnC_SUPPORTED +#pragma warning (disable : 4731) +void ResumeAtJit(PCONTEXT pContext, LPVOID oldESP) +{ + // No CONTRACT here, because we can't run the risk of it pushing any SEH into the + // current method. + + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + +#ifdef _DEBUG + DWORD curESP; + __asm mov curESP, esp +#endif + + if (oldESP) + { + _ASSERTE(curESP < (DWORD)(size_t)oldESP); + // should have popped the SEH records by now as stack has been overwritten + _ASSERTE(GetCurrentSEHRecord() > oldESP); + } + + // For the "push Eip, ..., ret" + _ASSERTE(curESP < pContext->Esp - sizeof(DWORD)); + pContext->Esp -= sizeof(DWORD); + + __asm { + mov ebp, pContext + + // Push Eip onto the targetESP, so that the final "ret" will consume it + mov ecx, [ebp]CONTEXT.Esp + mov edx, [ebp]CONTEXT.Eip + mov [ecx], edx + + // Restore all registers except Esp, Ebp, Eip + mov eax, [ebp]CONTEXT.Eax + mov ebx, [ebp]CONTEXT.Ebx + mov ecx, [ebp]CONTEXT.Ecx + mov edx, [ebp]CONTEXT.Edx + mov esi, [ebp]CONTEXT.Esi + mov edi, [ebp]CONTEXT.Edi + + push [ebp]CONTEXT.Esp // pContext->Esp is (targetESP-sizeof(DWORD)) + push [ebp]CONTEXT.Ebp + pop ebp + pop esp + + // esp is (targetESP-sizeof(DWORD)), and [esp] is the targetEIP. + // The ret will set eip to targetEIP and esp will be automatically + // incremented to targetESP + + ret + } +} +#pragma warning (default : 4731) +#endif // !EnC_SUPPORTED + + +#pragma warning(push) +#pragma warning(disable: 4035) +DWORD getcpuid(DWORD arg, unsigned char result[16]) +{ + LIMITED_METHOD_CONTRACT + + __asm + { + push ebx + push esi + mov eax, arg + cpuid + mov esi, result + mov [esi+ 0], eax + mov [esi+ 4], ebx + mov [esi+ 8], ecx + mov [esi+12], edx + pop esi + pop ebx + } +} + +// The following function uses Deterministic Cache Parameter leafs to determine the cache hierarchy information on Prescott & Above platforms. +// This function takes 3 arguments: +// Arg1 is an input to ECX. Used as index to specify which cache level to return infoformation on by CPUID. +// Arg2 is an input to EAX. For deterministic code enumeration, we pass in 4H in arg2. +// Arg3 is a pointer to the return buffer +// No need to check whether or not CPUID is supported because we have already called CPUID with success to come here. + +DWORD getextcpuid(DWORD arg1, DWORD arg2, unsigned char result[16]) +{ + LIMITED_METHOD_CONTRACT + + __asm + { + push ebx + push esi + mov ecx, arg1 + mov eax, arg2 + cpuid + mov esi, result + mov [esi+ 0], eax + mov [esi+ 4], ebx + mov [esi+ 8], ecx + mov [esi+12], edx + pop esi + pop ebx + } +} + +#pragma warning(pop) + + +// This function returns the number of logical processors on a given physical chip. If it cannot +// determine the number of logical cpus, or the machine is not populated uniformly with the same +// type of processors, this function returns 1. +DWORD GetLogicalCpuCount() +{ + // No CONTRACT possible because GetLogicalCpuCount uses SEH + + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_NOTRIGGER; + + static DWORD val = 0; + + // cache value for later re-use + if (val) + { + return val; + } + + struct Param : DefaultCatchFilterParam + { + DWORD retVal; + } param; + param.pv = COMPLUS_EXCEPTION_EXECUTE_HANDLER; + param.retVal = 1; + + PAL_TRY(Param *, pParam, ¶m) + { + unsigned char buffer[16]; + + DWORD maxCpuId = getcpuid(0, buffer); + + if (maxCpuId < 1) + goto lDone; + + DWORD* dwBuffer = (DWORD*)buffer; + + if (dwBuffer[1] == 'uneG') { + if (dwBuffer[3] == 'Ieni') { + if (dwBuffer[2] == 'letn') { // get SMT/multicore enumeration for Intel EM64T + + // TODO: Currently GetLogicalCpuCountFromOS() and GetLogicalCpuCountFallback() are broken on + // multi-core processor, but we never call into those two functions since we don't halve the + // gen0size when it's prescott and above processor. We keep the old version here for earlier + // generation system(Northwood based), perf data suggests on those systems, halve gen0 size + // still boost the performance(ex:Biztalk boosts about 17%). So on earlier systems(Northwood) + // based, we still go ahead and halve gen0 size. The logic in GetLogicalCpuCountFromOS() + // and GetLogicalCpuCountFallback() works fine for those earlier generation systems. + // If it's a Prescott and above processor or Multi-core, perf data suggests not to halve gen0 + // size at all gives us overall better performance. + // This is going to be fixed with a new version in orcas time frame. + + if( (maxCpuId > 3) && (maxCpuId < 0x80000000) ) + goto lDone; + + val = GetLogicalCpuCountFromOS(); //try to obtain HT enumeration from OS API + if (val ) + { + pParam->retVal = val; // OS API HT enumeration successful, we are Done + goto lDone; + } + + val = GetLogicalCpuCountFallback(); // OS API failed, Fallback to HT enumeration using CPUID + if( val ) + pParam->retVal = val; + } + } + } +lDone: ; + } + PAL_EXCEPT_FILTER(DefaultCatchFilter) + { + } + PAL_ENDTRY + + if (val == 0) + { + val = param.retVal; + } + + return param.retVal; +} + +void UMEntryThunkCode::Encode(BYTE* pTargetCode, void* pvSecretParam) +{ + LIMITED_METHOD_CONTRACT; + +#ifdef _DEBUG + m_alignpad[0] = X86_INSTR_INT3; + m_alignpad[1] = X86_INSTR_INT3; +#endif // _DEBUG + m_movEAX = X86_INSTR_MOV_EAX_IMM32; + m_uet = pvSecretParam; + m_jmp = X86_INSTR_JMP_REL32; + m_execstub = (BYTE*) ((pTargetCode) - (4+((BYTE*)&m_execstub))); + + FlushInstructionCache(GetCurrentProcess(),GetEntryPoint(),sizeof(UMEntryThunkCode)); +} + +UMEntryThunk* UMEntryThunk::Decode(LPVOID pCallback) +{ + LIMITED_METHOD_CONTRACT; + + if (*((BYTE*)pCallback) != X86_INSTR_MOV_EAX_IMM32 || + ( ((size_t)pCallback) & 3) != 2) { + return NULL; + } + return *(UMEntryThunk**)( 1 + (BYTE*)pCallback ); +} + +BOOL DoesSlotCallPrestub(PCODE pCode) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + PRECONDITION(pCode != NULL); + PRECONDITION(pCode != GetPreStubEntryPoint()); + } CONTRACTL_END; + + // x86 has the following possible sequences for prestub logic: + // 1. slot -> temporary entrypoint -> prestub + // 2. slot -> precode -> prestub + // 3. slot -> precode -> jumprel32 (NGEN case) -> prestub + +#ifdef HAS_COMPACT_ENTRYPOINTS + if (MethodDescChunk::GetMethodDescFromCompactEntryPoint(pCode, TRUE) != NULL) + { + return TRUE; + } +#endif // HAS_COMPACT_ENTRYPOINTS + + if (!IS_ALIGNED(pCode, PRECODE_ALIGNMENT)) + { + return FALSE; + } + +#ifdef HAS_FIXUP_PRECODE + if (*PTR_BYTE(pCode) == X86_INSTR_CALL_REL32) + { + // Note that call could have been patched to jmp in the meantime + pCode = rel32Decode(pCode+1); + + // NGEN case + if (*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32) { + pCode = rel32Decode(pCode+1); + } + + return pCode == (TADDR)PrecodeFixupThunk; + } +#endif + + if (*PTR_BYTE(pCode) != X86_INSTR_MOV_EAX_IMM32 || + *PTR_BYTE(pCode+5) != X86_INSTR_MOV_RM_R || + *PTR_BYTE(pCode+7) != X86_INSTR_JMP_REL32) + { + return FALSE; + } + pCode = rel32Decode(pCode+8); + + // NGEN case + if (*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32) { + pCode = rel32Decode(pCode+1); + } + + return pCode == GetPreStubEntryPoint(); +} + +//========================================================================================== +// In NGen image, virtual slots inherited from cross-module dependencies point to jump thunks. +// These jump thunk initially point to VirtualMethodFixupStub which transfers control here. +// This method 'VirtualMethodFixupWorker' will patch the jump thunk to point to the actual +// inherited method body after we have execute the precode and a stable entry point. +// +EXTERN_C PVOID STDCALL VirtualMethodFixupWorker(Object * pThisPtr, CORCOMPILE_VIRTUAL_IMPORT_THUNK *pThunk) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + ENTRY_POINT; + } + CONTRACTL_END; + + _ASSERTE(pThisPtr != NULL); + VALIDATEOBJECT(pThisPtr); + + MethodTable * pMT = pThisPtr->GetTrueMethodTable(); + + WORD slotNumber = pThunk->slotNum; + _ASSERTE(slotNumber != (WORD)-1); + + PCODE pCode = pMT->GetRestoredSlot(slotNumber); + + if (!DoesSlotCallPrestub(pCode)) + { + // Skip fixup precode jump for better perf + PCODE pDirectTarget = Precode::TryToSkipFixupPrecode(pCode); + if (pDirectTarget != NULL) + pCode = pDirectTarget; + + INT64 oldValue = *(INT64*)pThunk; + BYTE* pOldValue = (BYTE*)&oldValue; + + if (pOldValue[0] == X86_INSTR_CALL_REL32) + { + INT64 newValue = oldValue; + BYTE* pNewValue = (BYTE*)&newValue; + pNewValue[0] = X86_INSTR_JMP_REL32; + + INT_PTR pcRelOffset = (BYTE*)pCode - &pThunk->callJmp[5]; + *(INT32 *)(&pNewValue[1]) = (INT32) pcRelOffset; + + _ASSERTE(IS_ALIGNED(pThunk, sizeof(INT64))); + if (EnsureWritableExecutablePagesNoThrow(pThunk, sizeof(INT64))) + FastInterlockCompareExchangeLong((INT64*)pThunk, newValue, oldValue); + + FlushInstructionCache(GetCurrentProcess(), pThunk, 8); + } + } + + return PVOID(pCode); +} + + +#ifdef FEATURE_READYTORUN + +// +// Allocation of dynamic helpers +// + +#define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR) + +#define BEGIN_DYNAMIC_HELPER_EMIT(size) \ + SIZE_T cb = size; \ + SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \ + BYTE * pStart = (BYTE *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \ + BYTE * p = pStart; + +#define END_DYNAMIC_HELPER_EMIT() \ + _ASSERTE(pStart + cb == p); \ + while (p < pStart + cbAligned) *p++ = X86_INSTR_INT3; \ + ClrFlushInstructionCache(pStart, cbAligned); \ + return (PCODE)pStart + +PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + STANDARD_VM_CONTRACT; + + BEGIN_DYNAMIC_HELPER_EMIT(10); + + *p++ = 0xB9; // mov ecx, XXXXXX + *(INT32 *)p = (INT32)arg; + p += 4; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target); + p += 4; + + END_DYNAMIC_HELPER_EMIT(); +} + +void DynamicHelpers::EmitHelperWithArg(BYTE*& p, LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + CONTRACTL + { + GC_NOTRIGGER; + PRECONDITION(p != NULL && target != NULL); + } + CONTRACTL_END; + + // Move an an argument into the second argument register and jump to a target function. + + *p++ = 0xBA; // mov edx, XXXXXX + *(INT32 *)p = (INT32)arg; + p += 4; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target); + p += 4; +} + +PCODE DynamicHelpers::CreateHelperWithArg(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(10); + + EmitHelperWithArg(p, pAllocator, arg, target); + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(15); + + *p++ = 0xB9; // mov ecx, XXXXXX + *(INT32 *)p = (INT32)arg; + p += 4; + + *p++ = 0xBA; // mov edx, XXXXXX + *(INT32 *)p = (INT32)arg2; + p += 4; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target); + p += 4; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelperArgMove(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(12); + + *(UINT16 *)p = 0xD18B; // mov edx, ecx + p += 2; + + *p++ = 0xB9; // mov ecx, XXXXXX + *(INT32 *)p = (INT32)arg; + p += 4; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target); + p += 4; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateReturn(LoaderAllocator * pAllocator) +{ + BEGIN_DYNAMIC_HELPER_EMIT(1); + + *p++ = 0xC3; // ret + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateReturnConst(LoaderAllocator * pAllocator, TADDR arg) +{ + BEGIN_DYNAMIC_HELPER_EMIT(6); + + *p++ = 0xB8; // mov eax, XXXXXX + *(INT32 *)p = (INT32)arg; + p += 4; + + *p++ = 0xC3; // ret + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateReturnIndirConst(LoaderAllocator * pAllocator, TADDR arg, INT8 offset) +{ + BEGIN_DYNAMIC_HELPER_EMIT((offset != 0) ? 9 : 6); + + *p++ = 0xA1; // mov eax, [XXXXXX] + *(INT32 *)p = (INT32)arg; + p += 4; + + if (offset != 0) + { + // add eax, + *p++ = 0x83; + *p++ = 0xC0; + *p++ = offset; + } + + *p++ = 0xC3; // ret + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(12); + + // pop eax + *p++ = 0x58; + + // push arg + *p++ = 0x68; + *(INT32 *)p = arg; + p += 4; + + // push eax + *p++ = 0x50; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target); + p += 4; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(17); + + // pop eax + *p++ = 0x58; + + // push arg + *p++ = 0x68; + *(INT32 *)p = arg; + p += 4; + + // push arg2 + *p++ = 0x68; + *(INT32 *)p = arg2; + p += 4; + + // push eax + *p++ = 0x50; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target); + p += 4; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, CORINFO_RUNTIME_LOOKUP * pLookup, DWORD dictionaryIndexAndSlot, Module * pModule) +{ + STANDARD_VM_CONTRACT; + + PCODE helperAddress = (pLookup->helper == CORINFO_HELP_RUNTIMEHANDLE_METHOD ? + GetEEFuncEntryPoint(JIT_GenericHandleMethodWithSlotAndModule) : + GetEEFuncEntryPoint(JIT_GenericHandleClassWithSlotAndModule)); + + GenericHandleArgs * pArgs = (GenericHandleArgs *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(sizeof(GenericHandleArgs), DYNAMIC_HELPER_ALIGNMENT); + pArgs->dictionaryIndexAndSlot = dictionaryIndexAndSlot; + pArgs->signature = pLookup->signature; + pArgs->module = (CORINFO_MODULE_HANDLE)pModule; + + // It's available only via the run-time helper function + if (pLookup->indirections == CORINFO_USEHELPER) + { + BEGIN_DYNAMIC_HELPER_EMIT(10); + + // ecx contains the generic context parameter + // mov edx,pArgs + // jmp helperAddress + EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress); + + END_DYNAMIC_HELPER_EMIT(); + } + else + { + int indirectionsSize = 0; + for (WORD i = 0; i < pLookup->indirections; i++) + indirectionsSize += (pLookup->offsets[i] >= 0x80 ? 6 : 3); + + int codeSize = indirectionsSize + (pLookup->testForNull ? 21 : 3); + + BEGIN_DYNAMIC_HELPER_EMIT(codeSize); + + if (pLookup->testForNull) + { + // ecx contains the generic context parameter. Save a copy of it in the eax register + // mov eax,ecx + *(UINT16*)p = 0xc889; p += 2; + } + + for (WORD i = 0; i < pLookup->indirections; i++) + { + // mov ecx,qword ptr [ecx+offset] + if (pLookup->offsets[i] >= 0x80) + { + *(UINT16*)p = 0x898b; p += 2; + *(UINT32*)p = (UINT32)pLookup->offsets[i]; p += 4; + } + else + { + *(UINT16*)p = 0x498b; p += 2; + *p++ = (BYTE)pLookup->offsets[i]; + } + } + + // No null test required + if (!pLookup->testForNull) + { + // No fixups needed for R2R + + // mov eax,ecx + *(UINT16*)p = 0xc889; p += 2; + *p++ = 0xC3; // ret + } + else + { + // ecx contains the value of the dictionary slot entry + + _ASSERTE(pLookup->indirections != 0); + + // test ecx,ecx + *(UINT16*)p = 0xc985; p += 2; + + // je 'HELPER_CALL' (a jump of 3 bytes) + *(UINT16*)p = 0x0374; p += 2; + + // mov eax,ecx + *(UINT16*)p = 0xc889; p += 2; + *p++ = 0xC3; // ret + + // 'HELPER_CALL' + { + // Put the generic context back into rcx (was previously saved in eax) + // mov ecx,eax + *(UINT16*)p = 0xc189; p += 2; + + // mov edx,pArgs + // jmp helperAddress + EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress); + } + } + + END_DYNAMIC_HELPER_EMIT(); + } +} + +#endif // FEATURE_READYTORUN + + +#endif // DACCESS_COMPILE diff --git a/src/vm/i386/excepcpu.h b/src/vm/i386/excepcpu.h new file mode 100644 index 0000000000..3f2f0810a7 --- /dev/null +++ b/src/vm/i386/excepcpu.h @@ -0,0 +1,87 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +// +// EXCEPX86.H - +// +// This header file is optionally included from Excep.h if the target platform is x86 +// + + +#ifndef __excepx86_h__ +#define __excepx86_h__ + +#include "corerror.h" // HResults for the COM+ Runtime + +#include "../dlls/mscorrc/resource.h" + +#define THROW_CONTROL_FOR_THREAD_FUNCTION ThrowControlForThread + +#define STATUS_CLR_GCCOVER_CODE STATUS_PRIVILEGED_INSTRUCTION + +class Thread; + +#if defined(_MSC_VER) +#pragma warning(disable:4733) // Inline asm assigning to `FS:0` : handler not registered as safe handler + // Actually, the handler getting set is properly registered +#endif + +#define INSTALL_EXCEPTION_HANDLING_RECORD(record) \ + { \ + PEXCEPTION_REGISTRATION_RECORD __record = (record); \ + _ASSERTE(__record < GetCurrentSEHRecord()); \ + __record->Next = (PEXCEPTION_REGISTRATION_RECORD)__readfsdword(0); \ + __writefsdword(0, (DWORD)__record); \ + } + +// +// Note: this only pops a handler from the top of the stack. It will not remove a record from the middle of the +// chain, and I can assure you that you don't want to do that anyway. +// +#define UNINSTALL_EXCEPTION_HANDLING_RECORD(record) \ + { \ + PEXCEPTION_REGISTRATION_RECORD __record = (record); \ + _ASSERTE(__record == GetCurrentSEHRecord()); \ + __writefsdword(0, (DWORD)__record->Next); \ + } + +// stackOverwriteBarrier is used to detect overwriting of stack which will mess up handler registration +#if defined(_DEBUG) +#define DECLARE_CPFH_EH_RECORD(pCurThread) \ + FrameHandlerExRecordWithBarrier *___pExRecordWithBarrier = (FrameHandlerExRecordWithBarrier *)_alloca(sizeof(FrameHandlerExRecordWithBarrier)); \ + for (int ___i =0; ___i < STACK_OVERWRITE_BARRIER_SIZE; ___i++) \ + ___pExRecordWithBarrier->m_StackOverwriteBarrier[___i] = STACK_OVERWRITE_BARRIER_VALUE; \ + FrameHandlerExRecord *___pExRecord = &(___pExRecordWithBarrier->m_ExRecord); \ + ___pExRecord->m_ExReg.Handler = (PEXCEPTION_ROUTINE)COMPlusFrameHandler; \ + ___pExRecord->m_pEntryFrame = (pCurThread)->GetFrame(); + +#else +#define DECLARE_CPFH_EH_RECORD(pCurThread) \ + FrameHandlerExRecord *___pExRecord = (FrameHandlerExRecord *)_alloca(sizeof(FrameHandlerExRecord)); \ + ___pExRecord->m_ExReg.Handler = (PEXCEPTION_ROUTINE)COMPlusFrameHandler; \ + ___pExRecord->m_pEntryFrame = (pCurThread)->GetFrame(); + +#endif + +// +// Retrieves the redirected CONTEXT* from the stack frame of one of the +// RedirectedHandledJITCaseForXXX_Stub's. +// +PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(CONTEXT * pContext); + +PEXCEPTION_REGISTRATION_RECORD GetCurrentSEHRecord(); +PEXCEPTION_REGISTRATION_RECORD GetFirstCOMPlusSEHRecord(Thread*); + +// Determine the address of the instruction that made the current call. +inline +PCODE GetAdjustedCallAddress(PCODE returnAddress) +{ + LIMITED_METHOD_CONTRACT; + return returnAddress - 5; +} + +BOOL AdjustContextForVirtualStub(EXCEPTION_RECORD *pExceptionRecord, CONTEXT *pContext); + +#endif // __excepx86_h__ diff --git a/src/vm/i386/excepx86.cpp b/src/vm/i386/excepx86.cpp new file mode 100644 index 0000000000..27c923b749 --- /dev/null +++ b/src/vm/i386/excepx86.cpp @@ -0,0 +1,3734 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +// + +/* EXCEP.CPP: + * + */ +#include "common.h" + +#include "frames.h" +#include "excep.h" +#include "object.h" +#include "field.h" +#include "dbginterface.h" +#include "cgensys.h" +#include "comutilnative.h" +#include "sigformat.h" +#include "siginfo.hpp" +#include "gc.h" +#include "eedbginterfaceimpl.h" //so we can clearexception in COMPlusThrow +#include "perfcounters.h" +#include "eventtrace.h" +#include "eetoprofinterfacewrapper.inl" +#include "eedbginterfaceimpl.inl" +#include "dllimportcallback.h" +#include "threads.h" +#ifdef FEATURE_REMOTING +#include "appdomainhelper.h" +#endif +#include "eeconfig.h" +#include "vars.hpp" +#include "generics.h" +#include "securityprincipal.h" + +#include "asmconstants.h" +#include "virtualcallstub.h" + +MethodDesc * GetUserMethodForILStub(Thread * pThread, UINT_PTR uStubSP, MethodDesc * pILStubMD, Frame ** ppFrameOut); + +#if !defined(DACCESS_COMPILE) + +#define FORMAT_MESSAGE_BUFFER_LENGTH 1024 + +BOOL ComPlusFrameSEH(EXCEPTION_REGISTRATION_RECORD*); +PEXCEPTION_REGISTRATION_RECORD GetPrevSEHRecord(EXCEPTION_REGISTRATION_RECORD*); + +extern "C" { +// in asmhelpers.asm: +VOID STDCALL ResumeAtJitEHHelper(EHContext *pContext); +int STDCALL CallJitEHFilterHelper(size_t *pShadowSP, EHContext *pContext); +VOID STDCALL CallJitEHFinallyHelper(size_t *pShadowSP, EHContext *pContext); + +BOOL CallRtlUnwind(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame, + void *callback, + EXCEPTION_RECORD *pExceptionRecord, + void *retval); + +BOOL CallRtlUnwindSafe(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame, + void *callback, + EXCEPTION_RECORD *pExceptionRecord, + void *retval); +} + +static inline BOOL +CPFH_ShouldUnwindStack(const EXCEPTION_RECORD * pCER) { + + LIMITED_METHOD_CONTRACT; + + _ASSERTE(pCER != NULL); + + // We can only unwind those exceptions whose context/record we don't need for a + // rethrow. This is complus, and stack overflow. For all the others, we + // need to keep the context around for a rethrow, which means they can't + // be unwound. + if (IsComPlusException(pCER) || pCER->ExceptionCode == STATUS_STACK_OVERFLOW) + return TRUE; + else + return FALSE; +} + +static inline BOOL IsComPlusNestedExceptionRecord(EXCEPTION_REGISTRATION_RECORD* pEHR) +{ + LIMITED_METHOD_CONTRACT; + if (pEHR->Handler == (PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler) + return TRUE; + return FALSE; +} + +EXCEPTION_REGISTRATION_RECORD *TryFindNestedEstablisherFrame(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame) +{ + LIMITED_METHOD_CONTRACT; + while (pEstablisherFrame->Handler != (PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler) { + pEstablisherFrame = pEstablisherFrame->Next; + if (pEstablisherFrame == EXCEPTION_CHAIN_END) return 0; + } + return pEstablisherFrame; +} + +#ifdef _DEBUG +// stores last handler we went to in case we didn't get an endcatch and stack is +// corrupted we can figure out who did it. +static MethodDesc *gLastResumedExceptionFunc = NULL; +static DWORD gLastResumedExceptionHandler = 0; +#endif + +//--------------------------------------------------------------------- +// void RtlUnwindCallback() +// call back function after global unwind, rtlunwind calls this function +//--------------------------------------------------------------------- +static void RtlUnwindCallback() +{ + LIMITED_METHOD_CONTRACT; + _ASSERTE(!"Should never get here"); +} + +BOOL NExportSEH(EXCEPTION_REGISTRATION_RECORD* pEHR) +{ + LIMITED_METHOD_CONTRACT; + + if ((LPVOID)pEHR->Handler == (LPVOID)UMThunkPrestubHandler) + { + return TRUE; + } + return FALSE; +} + +BOOL FastNExportSEH(EXCEPTION_REGISTRATION_RECORD* pEHR) +{ + LIMITED_METHOD_CONTRACT; + + if ((LPVOID)pEHR->Handler == (LPVOID)FastNExportExceptHandler) + return TRUE; + return FALSE; +} + +BOOL ReverseCOMSEH(EXCEPTION_REGISTRATION_RECORD* pEHR) +{ + LIMITED_METHOD_CONTRACT; + +#ifdef FEATURE_COMINTEROP + if ((LPVOID)pEHR->Handler == (LPVOID)COMPlusFrameHandlerRevCom) + return TRUE; +#endif // FEATURE_COMINTEROP + return FALSE; +} + + +// +// Returns true if the given SEH handler is one of our SEH handlers that is responsible for managing exceptions in +// regions of managed code. +// +BOOL IsUnmanagedToManagedSEHHandler(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame) +{ + WRAPPER_NO_CONTRACT; + + // + // ComPlusFrameSEH() is for COMPlusFrameHandler & COMPlusNestedExceptionHandler. + // FastNExportSEH() is for FastNExportExceptHandler. + // NExportSEH() is for UMThunkPrestubHandler. + // + return (ComPlusFrameSEH(pEstablisherFrame) || FastNExportSEH(pEstablisherFrame) || NExportSEH(pEstablisherFrame) || ReverseCOMSEH(pEstablisherFrame)); +} + +Frame *GetCurrFrame(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame) +{ + Frame *pFrame; + WRAPPER_NO_CONTRACT; + _ASSERTE(IsUnmanagedToManagedSEHHandler(pEstablisherFrame)); + if (NExportSEH(pEstablisherFrame)) + pFrame = ((ComToManagedExRecord *)pEstablisherFrame)->GetCurrFrame(); + else + pFrame = ((FrameHandlerExRecord *)pEstablisherFrame)->GetCurrFrame(); + + _ASSERTE(GetThread() == NULL || GetThread()->GetFrame() <= pFrame); + + return pFrame; +} + +EXCEPTION_REGISTRATION_RECORD* GetNextCOMPlusSEHRecord(EXCEPTION_REGISTRATION_RECORD* pRec) { + WRAPPER_NO_CONTRACT; + if (pRec == EXCEPTION_CHAIN_END) + return EXCEPTION_CHAIN_END; + + do { + _ASSERTE(pRec != 0); + pRec = pRec->Next; + } while (pRec != EXCEPTION_CHAIN_END && !IsUnmanagedToManagedSEHHandler(pRec)); + + _ASSERTE(pRec == EXCEPTION_CHAIN_END || IsUnmanagedToManagedSEHHandler(pRec)); + return pRec; +} + + +/* + * GetClrSEHRecordServicingStackPointer + * + * This function searchs all the Frame SEH records, and finds the one that is + * currently signed up to do all exception handling for the given stack pointer + * on the given thread. + * + * Parameters: + * pThread - The thread to search on. + * pStackPointer - The stack location that we are finding the Frame SEH Record for. + * + * Returns + * A pointer to the SEH record, or EXCEPTION_CHAIN_END if none was found. + * + */ + +PEXCEPTION_REGISTRATION_RECORD +GetClrSEHRecordServicingStackPointer(Thread *pThread, + void *pStackPointer) +{ + ThreadExceptionState* pExState = pThread->GetExceptionState(); + + // + // We can only do this if there is a context in the pExInfo. There are cases (most notably the + // EEPolicy::HandleFatalError case) where we don't have that. In these cases we will return + // no enclosing handler since we cannot accurately determine the FS:0 entry which services + // this stack address. + // + // The side effect of this is that for these cases, the debugger cannot intercept + // the exception + // + CONTEXT* pContextRecord = pExState->GetContextRecord(); + if (pContextRecord == NULL) + { + return EXCEPTION_CHAIN_END; + } + + void *exceptionSP = dac_cast(GetSP(pContextRecord)); + + + // + // Now set the establishing frame. What this means in English is that we need to find + // the fs:0 entry that handles exceptions for the place on the stack given in stackPointer. + // + PEXCEPTION_REGISTRATION_RECORD pSEHRecord = GetFirstCOMPlusSEHRecord(pThread); + + while (pSEHRecord != EXCEPTION_CHAIN_END) + { + + // + // Skip any SEHRecord which is not a CLR record or was pushed after the exception + // on this thread occurred. + // + if (IsUnmanagedToManagedSEHHandler(pSEHRecord) && (exceptionSP <= (void *)pSEHRecord)) + { + Frame *pFrame = GetCurrFrame(pSEHRecord); + // + // Arcane knowledge here. All Frame records are stored on the stack by the runtime + // in ever decreasing address space. So, we merely have to search back until + // we find the first frame record with a higher stack value to find the + // establishing frame for the given stack address. + // + if (((void *)pFrame) >= pStackPointer) + { + break; + } + + } + + pSEHRecord = GetNextCOMPlusSEHRecord(pSEHRecord); + } + + return pSEHRecord; +} + +#ifdef _DEBUG +// We've deteremined during a stack walk that managed code is transitioning to unamanaged (EE) code. Check that the +// state of the EH chain is correct. +// +// For x86, check that we do INSTALL_COMPLUS_EXCEPTION_HANDLER before calling managed code. This check should be +// done for all managed code sites, not just transistions. But this will catch most problem cases. +void VerifyValidTransitionFromManagedCode(Thread *pThread, CrawlFrame *pCF) +{ + WRAPPER_NO_CONTRACT; + + _ASSERTE(ExecutionManager::IsManagedCode(GetControlPC(pCF->GetRegisterSet()))); + + // Cannot get to the TEB of other threads. So ignore them. + if (pThread != GetThread()) + { + return; + } + + // Find the EH record guarding the current region of managed code, based on the CrawlFrame passed in. + PEXCEPTION_REGISTRATION_RECORD pEHR = GetCurrentSEHRecord(); + + while ((pEHR != EXCEPTION_CHAIN_END) && ((ULONG_PTR)pEHR < GetRegdisplaySP(pCF->GetRegisterSet()))) + { + pEHR = pEHR->Next; + } + + // VerifyValidTransitionFromManagedCode can be called before the CrawlFrame's MethodDesc is initialized. + // Fix that if necessary for the consistency check. + MethodDesc * pFunction = pCF->GetFunction(); + if ((!IsUnmanagedToManagedSEHHandler(pEHR)) && // Will the assert fire? If not, don't waste our time. + (pFunction == NULL)) + { + _ASSERTE(pCF->GetRegisterSet()); + PCODE ip = GetControlPC(pCF->GetRegisterSet()); + pFunction = ExecutionManager::GetCodeMethodDesc(ip); + _ASSERTE(pFunction); + } + + // Great, we've got the EH record that's next up the stack from the current SP (which is in managed code). That + // had better be a record for one of our handlers responsible for handling exceptions in managed code. If its + // not, then someone made it into managed code without setting up one of our EH handlers, and that's really + // bad. + CONSISTENCY_CHECK_MSGF(IsUnmanagedToManagedSEHHandler(pEHR), + ("Invalid transition into managed code!\n\n" + "We're walking this thread's stack and we've reached a managed frame at Esp=0x%p. " + "(The method is %s::%s) " + "The very next FS:0 record (0x%p) up from this point on the stack should be one of " + "our 'unmanaged to managed SEH handlers', but its not... its something else, and " + "that's very bad. It indicates that someone managed to call into managed code without " + "setting up the proper exception handling.\n\n" + "Get a good unmanaged stack trace for this thread. All FS:0 records are on the stack, " + "so you can see who installed the last handler. Somewhere between that function and " + "where the thread is now is where the bad transition occurred.\n\n" + "A little extra info: FS:0 = 0x%p, pEHR->Handler = 0x%p\n", + GetRegdisplaySP(pCF->GetRegisterSet()), + pFunction ->m_pszDebugClassName, + pFunction ->m_pszDebugMethodName, + pEHR, + GetCurrentSEHRecord(), + pEHR->Handler)); +} + +#endif + +//================================================================================ + +// There are some things that should never be true when handling an +// exception. This function checks for them. Will assert or trap +// if it finds an error. +static inline void +CPFH_VerifyThreadIsInValidState(Thread* pThread, DWORD exceptionCode, EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame) { + WRAPPER_NO_CONTRACT; + + if ( exceptionCode == STATUS_BREAKPOINT + || exceptionCode == STATUS_SINGLE_STEP) { + return; + } + +#ifdef _DEBUG + // check for overwriting of stack + CheckStackBarrier(pEstablisherFrame); + // trigger check for bad fs:0 chain + GetCurrentSEHRecord(); +#endif + + if (!g_fEEShutDown) { + // An exception on the GC thread, or while holding the thread store lock, will likely lock out the entire process. + if (::IsGCThread() || ThreadStore::HoldingThreadStore()) + { + _ASSERTE(!"Exception during garbage collection or while holding thread store"); + EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE); + } + } +} + + +#ifdef FEATURE_HIJACK +void +CPFH_AdjustContextForThreadSuspensionRace(CONTEXT *pContext, Thread *pThread) +{ + WRAPPER_NO_CONTRACT; + + PCODE f_IP = GetIP(pContext); + if (Thread::IsAddrOfRedirectFunc((PVOID)f_IP)) { + + // This is a very rare case where we tried to redirect a thread that was + // just about to dispatch an exception, and our update of EIP took, but + // the thread continued dispatching the exception. + // + // If this should happen (very rare) then we fix it up here. + // + _ASSERTE(pThread->GetSavedRedirectContext()); + SetIP(pContext, GetIP(pThread->GetSavedRedirectContext())); + STRESS_LOG1(LF_EH, LL_INFO100, "CPFH_AdjustContextForThreadSuspensionRace: Case 1 setting IP = %x\n", pContext->Eip); + } + + if (f_IP == GetEEFuncEntryPoint(THROW_CONTROL_FOR_THREAD_FUNCTION)) { + + // This is a very rare case where we tried to redirect a thread that was + // just about to dispatch an exception, and our update of EIP took, but + // the thread continued dispatching the exception. + // + // If this should happen (very rare) then we fix it up here. + // + SetIP(pContext, GetIP(pThread->m_OSContext)); + STRESS_LOG1(LF_EH, LL_INFO100, "CPFH_AdjustContextForThreadSuspensionRace: Case 2 setting IP = %x\n", pContext->Eip); + } + +// We have another even rarer race condition: +// - A) On thread A, Debugger puts an int 3 in the code stream at address X +// - A) We hit it and the begin an exception. The eip will be X + 1 (int3 is special) +// - B) Meanwhile, thread B redirects A's eip to Y. (Although A is really somewhere +// in the kernel, it looks like it's still in user code, so it can fall under the +// HandledJitCase and can be redirected) +// - A) The OS, trying to be nice, expects we have a breakpoint exception at X+1, +// but does -1 on the address since it knows int3 will leave the eip +1. +// So the context structure it will pass to the Handler is ideally (X+1)-1 = X +// +// ** Here's the race: Since thread B redirected A, the eip is actually Y (not X+1), +// but the kernel still touches it up to Y-1. So there's a window between when we hit a +// bp and when the handler gets called that this can happen. +// This causes an unhandled BP (since the debugger doesn't recognize the bp at Y-1) +// +// So what to do: If we land at Y-1 (ie, if f_IP+1 is the addr of a Redirected Func), +// then restore the EIP back to X. This will skip the redirection. +// Fortunately, this only occurs in cases where it's ok +// to skip. The debugger will recognize the patch and handle it. + + if (Thread::IsAddrOfRedirectFunc((PVOID)(f_IP + 1))) { + _ASSERTE(pThread->GetSavedRedirectContext()); + SetIP(pContext, GetIP(pThread->GetSavedRedirectContext()) - 1); + STRESS_LOG1(LF_EH, LL_INFO100, "CPFH_AdjustContextForThreadSuspensionRace: Case 3 setting IP = %x\n", pContext->Eip); + } + + if (f_IP + 1 == GetEEFuncEntryPoint(THROW_CONTROL_FOR_THREAD_FUNCTION)) { + SetIP(pContext, GetIP(pThread->m_OSContext) - 1); + STRESS_LOG1(LF_EH, LL_INFO100, "CPFH_AdjustContextForThreadSuspensionRace: Case 4 setting IP = %x\n", pContext->Eip); + } +} +#endif // FEATURE_HIJACK + + +// We want to leave true null reference exceptions alone. But if we are +// trashing memory, we don't want the application to swallow it. The 0x100 +// below will give us false positives for debugging, if the app is accessing +// a field more than 256 bytes down an object, where the reference is null. +// +// Removed use of the IgnoreUnmanagedExceptions reg key...simply return false now. +// +static inline BOOL +CPFH_ShouldIgnoreException(EXCEPTION_RECORD *pExceptionRecord) { + LIMITED_METHOD_CONTRACT; + return FALSE; +} + +static inline void +CPFH_UpdatePerformanceCounters() { + WRAPPER_NO_CONTRACT; + COUNTER_ONLY(GetPerfCounters().m_Excep.cThrown++); +} + + +//****************************************************************************** +EXCEPTION_DISPOSITION COMPlusAfterUnwind( + EXCEPTION_RECORD *pExceptionRecord, + EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame, + ThrowCallbackType& tct) +{ + WRAPPER_NO_CONTRACT; + + // Note: we've completed the unwind pass up to the establisher frame, and we're headed off to finish our + // cleanup and end up back in jitted code. Any more FS0 handlers pushed from this point on out will _not_ be + // unwound. We go ahead and assert right here that indeed there are no handlers below the establisher frame + // before we go any further. + _ASSERTE(pEstablisherFrame == GetCurrentSEHRecord()); + + Thread* pThread = GetThread(); + + _ASSERTE(tct.pCurrentExceptionRecord == pEstablisherFrame); + + NestedHandlerExRecord nestedHandlerExRecord; + nestedHandlerExRecord.Init((PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler, GetCurrFrame(pEstablisherFrame)); + + // ... and now, put the nested record back on. + INSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg)); + + // We entered COMPlusAfterUnwind in PREEMP, but we need to be in COOP from here on out + GCX_COOP_NO_DTOR(); + + tct.bIsUnwind = TRUE; + tct.pProfilerNotify = NULL; + + LOG((LF_EH, LL_INFO100, "COMPlusFrameHandler: unwinding\n")); + + tct.bUnwindStack = CPFH_ShouldUnwindStack(pExceptionRecord); + + LOG((LF_EH, LL_INFO1000, "COMPlusAfterUnwind: going to: pFunc:%#X, pStack:%#X\n", + tct.pFunc, tct.pStack)); + + // TODO: UnwindFrames ends up calling into StackWalkFrames which is SO_INTOLERANT + // as is UnwindFrames, etc... Should we make COMPlusAfterUnwind SO_INTOLERANT??? + ANNOTATION_VIOLATION(SOToleranceViolation); + + UnwindFrames(pThread, &tct); + +#ifdef DEBUGGING_SUPPORTED + ExInfo* pExInfo = pThread->GetExceptionState()->GetCurrentExceptionTracker(); + if (pExInfo->m_ValidInterceptionContext) + { + // By now we should have all unknown FS:[0] handlers unwinded along with the managed Frames until + // the interception point. We can now pop nested exception handlers and resume at interception context. + EHContext context = pExInfo->m_InterceptionContext; + pExInfo->m_InterceptionContext.Init(); + pExInfo->m_ValidInterceptionContext = FALSE; + + UnwindExceptionTrackerAndResumeInInterceptionFrame(pExInfo, &context); + } +#endif // DEBUGGING_SUPPORTED + + _ASSERTE(!"Should not get here"); + return ExceptionContinueSearch; +} // EXCEPTION_DISPOSITION COMPlusAfterUnwind() + +#ifdef DEBUGGING_SUPPORTED + +//--------------------------------------------------------------------------------------- +// +// This function is called to intercept an exception and start an unwind. +// +// Arguments: +// pCurrentEstablisherFrame - the exception registration record covering the stack range +// containing the interception point +// pExceptionRecord - EXCEPTION_RECORD of the exception being intercepted +// +// Return Value: +// ExceptionContinueSearch if the exception cannot be intercepted +// +// Notes: +// If the exception is intercepted, this function never returns. +// + +EXCEPTION_DISPOSITION ClrDebuggerDoUnwindAndIntercept(EXCEPTION_REGISTRATION_RECORD *pCurrentEstablisherFrame, + EXCEPTION_RECORD *pExceptionRecord) +{ + WRAPPER_NO_CONTRACT; + + if (!CheckThreadExceptionStateForInterception()) + { + return ExceptionContinueSearch; + } + + Thread* pThread = GetThread(); + ThreadExceptionState* pExState = pThread->GetExceptionState(); + + EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame; + ThrowCallbackType tct; + tct.Init(); + + pExState->GetDebuggerState()->GetDebuggerInterceptInfo(&pEstablisherFrame, + &(tct.pFunc), + &(tct.dHandler), + &(tct.pStack), + NULL, + &(tct.pBottomFrame) + ); + + // + // If the handler that we've selected as the handler for the target frame of the unwind is in fact above the + // handler that we're currently executing in, then use the current handler instead. Why? Our handlers for + // nested exceptions actually process managed frames that live above them, up to the COMPlusFrameHanlder that + // pushed the nested handler. If the user selectes a frame above the nested handler, then we will have selected + // the COMPlusFrameHandler above the current nested handler. But we don't want to ask RtlUnwind to unwind past + // the nested handler that we're currently executing in. + // + if (pEstablisherFrame > pCurrentEstablisherFrame) + { + // This should only happen if we're in a COMPlusNestedExceptionHandler. + _ASSERTE(IsComPlusNestedExceptionRecord(pCurrentEstablisherFrame)); + + pEstablisherFrame = pCurrentEstablisherFrame; + } + +#ifdef _DEBUG + tct.pCurrentExceptionRecord = pEstablisherFrame; +#endif + + LOG((LF_EH|LF_CORDB, LL_INFO100, "ClrDebuggerDoUnwindAndIntercept: Intercepting at %s\n", tct.pFunc->m_pszDebugMethodName)); + LOG((LF_EH|LF_CORDB, LL_INFO100, "\t\t: pFunc is 0x%X\n", tct.pFunc)); + LOG((LF_EH|LF_CORDB, LL_INFO100, "\t\t: pStack is 0x%X\n", tct.pStack)); + + CallRtlUnwindSafe(pEstablisherFrame, RtlUnwindCallback, pExceptionRecord, 0); + + ExInfo* pExInfo = pThread->GetExceptionState()->GetCurrentExceptionTracker(); + if (pExInfo->m_ValidInterceptionContext) + { + // By now we should have all unknown FS:[0] handlers unwinded along with the managed Frames until + // the interception point. We can now pop nested exception handlers and resume at interception context. + GCX_COOP(); + EHContext context = pExInfo->m_InterceptionContext; + pExInfo->m_InterceptionContext.Init(); + pExInfo->m_ValidInterceptionContext = FALSE; + + UnwindExceptionTrackerAndResumeInInterceptionFrame(pExInfo, &context); + } + + // on x86 at least, RtlUnwind always returns + + // Note: we've completed the unwind pass up to the establisher frame, and we're headed off to finish our + // cleanup and end up back in jitted code. Any more FS0 handlers pushed from this point on out will _not_ be + // unwound. + return COMPlusAfterUnwind(pExState->GetExceptionRecord(), pEstablisherFrame, tct); +} // EXCEPTION_DISPOSITION ClrDebuggerDoUnwindAndIntercept() + +#endif // DEBUGGING_SUPPORTED + +// This is a wrapper around the assembly routine that invokes RtlUnwind in the OS. +// When we invoke RtlUnwind, the OS will modify the ExceptionFlags field in the +// exception record to reflect unwind. Since we call RtlUnwind in the first pass +// with a valid exception record when we find an exception handler AND because RtlUnwind +// returns on x86, the OS would have flagged the exception record for unwind. +// +// Incase the exception is rethrown from the catch/filter-handler AND it's a non-COMPLUS +// exception, the runtime will use the reference to the saved exception record to reraise +// the exception, as part of rethrow fixup. Since the OS would have modified the exception record +// to reflect unwind, this wrapper will "reset" the ExceptionFlags field when RtlUnwind returns. +// Otherwise, the rethrow will result in second pass, as opposed to first, since the ExceptionFlags +// would indicate an unwind. +// +// This rethrow issue does not affect COMPLUS exceptions since we always create a brand new exception +// record for them in RaiseTheExceptionInternalOnly. +BOOL CallRtlUnwindSafe(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame, + void *callback, + EXCEPTION_RECORD *pExceptionRecord, + void *retval) +{ + LIMITED_METHOD_CONTRACT; + + // Save the ExceptionFlags value before invoking RtlUnwind. + DWORD dwExceptionFlags = pExceptionRecord->ExceptionFlags; + + BOOL fRetVal = CallRtlUnwind(pEstablisherFrame, callback, pExceptionRecord, retval); + + // Reset ExceptionFlags field, if applicable + if (pExceptionRecord->ExceptionFlags != dwExceptionFlags) + { + // We would expect the 32bit OS to have set the unwind flag at this point. + _ASSERTE(pExceptionRecord->ExceptionFlags & EXCEPTION_UNWINDING); + LOG((LF_EH, LL_INFO100, "CallRtlUnwindSafe: Resetting ExceptionFlags from %lu to %lu\n", pExceptionRecord->ExceptionFlags, dwExceptionFlags)); + pExceptionRecord->ExceptionFlags = dwExceptionFlags; + } + + return fRetVal; +} + +//****************************************************************************** +// The essence of the first pass handler (after we've decided to actually do +// the first pass handling). +//****************************************************************************** +inline EXCEPTION_DISPOSITION __cdecl +CPFH_RealFirstPassHandler( // ExceptionContinueSearch, etc. + EXCEPTION_RECORD *pExceptionRecord, // The exception record, with exception type. + EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame, // Exception frame on whose behalf this is called. + CONTEXT *pContext, // Context from the exception. + void *pDispatcherContext, // @todo + BOOL bAsynchronousThreadStop, // @todo + BOOL fPGCDisabledOnEntry) // @todo +{ + // We don't want to use a runtime contract here since this codepath is used during + // the processing of a hard SO. Contracts use a significant amount of stack + // which we can't afford for those cases. + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + STATIC_CONTRACT_MODE_COOPERATIVE; + STATIC_CONTRACT_SO_TOLERANT; + +#ifdef _DEBUG + static int breakOnFirstPass = -1; + + if (breakOnFirstPass == -1) + breakOnFirstPass = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_BreakOnFirstPass); + + if (breakOnFirstPass != 0) + { + _ASSERTE(!"First pass exception handler"); + } +#endif + + EXCEPTION_DISPOSITION retval; + DWORD exceptionCode = pExceptionRecord->ExceptionCode; + Thread *pThread = GetThread(); + +#ifdef _DEBUG + static int breakOnSO = -1; + + if (breakOnSO == -1) + breakOnSO = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_BreakOnSO); + + if (breakOnSO != 0 && exceptionCode == STATUS_STACK_OVERFLOW) + { + DebugBreak(); // ASSERTing will overwrite the guard region + } +#endif + + // We always want to be in co-operative mode when we run this function and whenever we return + // from it, want to go to pre-emptive mode because are returning to OS. + _ASSERTE(pThread->PreemptiveGCDisabled()); + + BOOL bPopNestedHandlerExRecord = FALSE; + LFH found = LFH_NOT_FOUND; // Result of calling LookForHandler. + BOOL bRethrownException = FALSE; + BOOL bNestedException = FALSE; + +#if defined(USE_FEF) + BOOL bPopFaultingExceptionFrame = FALSE; + FrameWithCookie faultingExceptionFrame; +#endif // USE_FEF + ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo); + + ThrowCallbackType tct; + tct.Init(); + + tct.pTopFrame = GetCurrFrame(pEstablisherFrame); // highest frame to search to + +#ifdef _DEBUG + tct.pCurrentExceptionRecord = pEstablisherFrame; + tct.pPrevExceptionRecord = GetPrevSEHRecord(pEstablisherFrame); +#endif // _DEBUG + + BOOL fIsManagedCode = pContext ? ExecutionManager::IsManagedCode(GetIP(pContext)) : FALSE; + + + // this establishes a marker so can determine if are processing a nested exception + // don't want to use the current frame to limit search as it could have been unwound by + // the time get to nested handler (ie if find an exception, unwind to the call point and + // then resume in the catch and then get another exception) so make the nested handler + // have the same boundary as this one. If nested handler can't find a handler, we won't + // end up searching this frame list twice because the nested handler will set the search + // boundary in the thread and so if get back to this handler it will have a range that starts + // and ends at the same place. + + NestedHandlerExRecord nestedHandlerExRecord; + nestedHandlerExRecord.Init((PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler, GetCurrFrame(pEstablisherFrame)); + + INSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg)); + bPopNestedHandlerExRecord = TRUE; + +#if defined(USE_FEF) + // Note: don't attempt to push a FEF for an exception in managed code if we weren't in cooperative mode when + // the exception was received. If preemptive GC was enabled when we received the exception, then it means the + // exception was rethrown from unmangaed code (including EE impl), and we shouldn't push a FEF. + if (fIsManagedCode && + fPGCDisabledOnEntry && + (pThread->m_pFrame == FRAME_TOP || + pThread->m_pFrame->GetVTablePtr() != FaultingExceptionFrame::GetMethodFrameVPtr() || + (size_t)pThread->m_pFrame > (size_t)pEstablisherFrame)) + { + // setup interrupted frame so that GC during calls to init won't collect the frames + // only need it for non COM+ exceptions in managed code when haven't already + // got one on the stack (will have one already if we have called rtlunwind because + // the instantiation that called unwind would have installed one) + faultingExceptionFrame.InitAndLink(pContext); + bPopFaultingExceptionFrame = TRUE; + } +#endif // USE_FEF + + OBJECTREF e; + e = pThread->LastThrownObject(); + + STRESS_LOG7(LF_EH, LL_INFO10, "CPFH_RealFirstPassHandler: code:%X, LastThrownObject:%p, MT:%pT" + ", IP:%p, SP:%p, pContext:%p, pEstablisherFrame:%p\n", + exceptionCode, OBJECTREFToObject(e), (e!=0)?e->GetMethodTable():0, + pContext ? GetIP(pContext) : 0, pContext ? GetSP(pContext) : 0, + pContext, pEstablisherFrame); + +#ifdef LOGGING + // If it is a complus exception, and there is a thrown object, get its name, for better logging. + if (IsComPlusException(pExceptionRecord)) + { + const char * eClsName = "!EXCEPTION_COMPLUS"; + if (e != 0) + { + eClsName = e->GetTrueMethodTable()->GetDebugClassName(); + } + LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: exception: 0x%08X, class: '%s', IP: 0x%p\n", + exceptionCode, eClsName, pContext ? GetIP(pContext) : NULL)); + } +#endif + + EXCEPTION_POINTERS exceptionPointers = {pExceptionRecord, pContext}; + + STRESS_LOG4(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: setting boundaries: Exinfo: 0x%p, BottomMostHandler:0x%p, SearchBoundary:0x%p, TopFrame:0x%p\n", + pExInfo, pExInfo->m_pBottomMostHandler, pExInfo->m_pSearchBoundary, tct.pTopFrame); + + // Here we are trying to decide if we are coming in as: + // 1) first handler in a brand new exception + // 2) a subsequent handler in an exception + // 3) a nested exception + // m_pBottomMostHandler is the registration structure (establisher frame) for the most recent (ie lowest in + // memory) non-nested handler that was installed and pEstablisher frame is what the current handler + // was registered with. + // The OS calls each registered handler in the chain, passing its establisher frame to it. + if (pExInfo->m_pBottomMostHandler != NULL && pEstablisherFrame > pExInfo->m_pBottomMostHandler) + { + STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: detected subsequent handler. ExInfo:0x%p, BottomMost:0x%p SearchBoundary:0x%p\n", + pExInfo, pExInfo->m_pBottomMostHandler, pExInfo->m_pSearchBoundary); + + // If the establisher frame of this handler is greater than the bottommost then it must have been + // installed earlier and therefore we are case 2 + if (pThread->GetThrowable() == NULL) + { + // Bottommost didn't setup a throwable, so not exception not for us + retval = ExceptionContinueSearch; + goto exit; + } + + // setup search start point + tct.pBottomFrame = pExInfo->m_pSearchBoundary; + + if (tct.pTopFrame == tct.pBottomFrame) + { + // this will happen if our nested handler already searched for us so we don't want + // to search again + retval = ExceptionContinueSearch; + goto exit; + } + } + else + { // we are either case 1 or case 3 +#if defined(_DEBUG_IMPL) + //@todo: merge frames, context, handlers + if (pThread->GetFrame() != FRAME_TOP) + pThread->GetFrame()->LogFrameChain(LF_EH, LL_INFO1000); +#endif // _DEBUG_IMPL + + // If the exception was rethrown, we'll create a new ExInfo, which will represent the rethrown exception. + // The original exception is not the rethrown one. + if (pExInfo->m_ExceptionFlags.IsRethrown() && pThread->LastThrownObject() != NULL) + { + pExInfo->m_ExceptionFlags.ResetIsRethrown(); + bRethrownException = TRUE; + +#if defined(USE_FEF) + if (bPopFaultingExceptionFrame) + { + // if we added a FEF, it will refer to the frame at the point of the original exception which is + // already unwound so don't want it. + // If we rethrew the exception we have already added a helper frame for the rethrow, so don't + // need this one. If we didn't rethrow it, (ie rethrow from native) then there the topmost frame will + // be a transition to native frame in which case we don't need it either + faultingExceptionFrame.Pop(); + bPopFaultingExceptionFrame = FALSE; + } +#endif + } + + // If the establisher frame is less than the bottommost handler, then this is nested because the + // establisher frame was installed after the bottommost. + if (pEstablisherFrame < pExInfo->m_pBottomMostHandler + /* || IsComPlusNestedExceptionRecord(pEstablisherFrame) */ ) + { + bNestedException = TRUE; + + // case 3: this is a nested exception. Need to save and restore the thread info + STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: ExInfo:0x%p detected nested exception 0x%p < 0x%p\n", + pExInfo, pEstablisherFrame, pExInfo->m_pBottomMostHandler); + + EXCEPTION_REGISTRATION_RECORD* pNestedER = TryFindNestedEstablisherFrame(pEstablisherFrame); + ExInfo *pNestedExInfo; + + if (!pNestedER || pNestedER >= pExInfo->m_pBottomMostHandler ) + { + // RARE CASE. We've re-entered the EE from an unmanaged filter. + // + // OR + // + // We can be here if we dont find a nested exception handler. This is exemplified using + // call chain of scenario 2 explained further below. + // + // Assuming __try of NativeB throws an exception E1 and it gets caught in ManagedA2, then + // bottom-most handler (BMH) is going to be CPFH_A. The catch will trigger an unwind + // and invoke __finally in NativeB. Let the __finally throw a new exception E2. + // + // Assuming ManagedB2 has a catch block to catch E2, when we enter CPFH_B looking for a + // handler for E2, our establisher frame will be that of CPFH_B, which will be lower + // in stack than current BMH (which is CPFH_A). Thus, we will come here, determining + // E2 to be nested exception correctly but not find a nested exception handler. + void *limit = (void *) GetPrevSEHRecord(pExInfo->m_pBottomMostHandler); + + pNestedExInfo = new (nothrow) ExInfo(); // Very rare failure here; need robust allocator. + if (pNestedExInfo == NULL) + { // if we can't allocate memory, we can't correctly continue. + #if defined(_DEBUG) + if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_NestedEhOom)) + _ASSERTE(!"OOM in callback from unmanaged filter."); + #endif // _DEBUG + + EEPOLICY_HANDLE_FATAL_ERROR(COR_E_OUTOFMEMORY); + } + + + pNestedExInfo->m_StackAddress = limit; // Note: this is also the flag that tells us this + // ExInfo was stack allocated. + } + else + { + pNestedExInfo = &((NestedHandlerExRecord*)pNestedER)->m_handlerInfo; + } + + LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: PushExInfo() current: 0x%p previous: 0x%p\n", + pExInfo->m_StackAddress, pNestedExInfo->m_StackAddress)); + + _ASSERTE(pNestedExInfo); + pNestedExInfo->m_hThrowable = NULL; // pNestedExInfo may be stack allocated, and as such full of + // garbage. m_hThrowable must be sane, so set it to NULL. (We could + // zero the entire record, but this is cheaper.) + + pNestedExInfo->CopyAndClearSource(pExInfo); + + pExInfo->m_pPrevNestedInfo = pNestedExInfo; // Save at head of nested info chain + +#if 0 +/* the following code was introduced in Whidbey as part of the Faulting Exception Frame removal (12/03). + However it isn't correct. If any nested exceptions occur while processing a rethrow, we would + incorrectly consider the nested exception to be a rethrow. See VSWhidbey 349379 for an example. + + Therefore I am disabling this code until we see a failure that explains why it was added in the first + place. cwb 9/04. +*/ + // If we're here as a result of a rethrown exception, set the rethrown flag on the new ExInfo. + if (bRethrownException) + { + pExInfo->m_ExceptionFlags.SetIsRethrown(); + } +#endif + } + else + { + // At this point, either: + // + // 1) the bottom-most handler is NULL, implying this is a new exception for which we are getting ready, OR + // 2) the bottom-most handler is not-NULL, implying that a there is already an existing exception in progress. + // + // Scenario 1 is that of a new throw and is easy to understand. Scenario 2 is the interesting one. + // + // ManagedA1 -> ManagedA2 -> ManagedA3 -> NativeCodeA -> ManagedB1 -> ManagedB2 -> ManagedB3 -> NativeCodeB + // + // On x86, each block of managed code is protected by one COMPlusFrameHandler [CPFH] (CLR's exception handler + // for managed code), unlike 64bit where each frame has a personality routine attached to it. Thus, + // for the example above, assume CPFH_A protects ManagedA* blocks and is setup just before the call to + // ManagedA1. Likewise, CPFH_B protects ManagedB* blocks and is setup just before the call to ManagedB1. + // + // When ManagedB3 throws an exception, CPFH_B is invoked to look for a handler in all of the ManagedB* blocks. + // At this point, it is setup as the "bottom-most-handler" (BMH). If no handler is found and exception reaches + // ManagedA* blocks, CPFH_A is invoked to look for a handler and thus, becomes BMH. + // + // Thus, in the first pass on x86 for a given exception, a particular CPFH will be invoked only once when looking + // for a handler and thus, registered as BMH only once. Either the exception goes unhandled and the process will + // terminate or a handler will be found and second pass will commence. + // + // However, assume NativeCodeB had a __try/__finally and raised an exception [E1] within the __try. Let's assume + // it gets caught in ManagedB1 and thus, unwind is triggered. At this point, the active exception tracker + // has context about the exception thrown out of __try and CPFH_B is registered as BMH. + // + // If the __finally throws a new exception [E2], CPFH_B will be invoked again for first pass while looking for + // a handler for the thrown exception. Since BMH is already non-NULL, we will come here since EstablisherFrame will be + // the same as BMH (because EstablisherFrame will be that of CPFH_B). We will proceed to overwrite the "required" parts + // of the existing exception tracker with the details of E2 (see setting of exception record and context below), erasing + // any artifact of E1. + // + // This is unlike Scenario 1 when exception tracker is completely initialized to default values. This is also + // unlike 64bit which will detect that E1 and E2 are different exceptions and hence, will setup a new tracker + // to track E2, effectively behaving like Scenario 1 above. X86 cannot do this since there is no nested exception + // tracker setup that gets to see the new exception. + // + // Thus, if E1 was a CSE and E2 isn't, we will come here and treat E2 as a CSE as well since corruption severity + // is initialized as part of exception tracker initialization. Thus, E2 will start to be treated as CSE, which is + // incorrect. Similar argument applies to delivery of First chance exception notification delivery. + // + // Another example why we should unify EH systems :) + // + // To address this issue, we will need to reset exception tracker here, just like the overwriting of "required" + // parts of exception tracker. + + // If the current establisher frame is the same as the bottom-most-handler and we are here + // in the first pass, assert that current exception and the one tracked by active exception tracker + // are indeed different exceptions. In such a case, we must reset the exception tracker so that it can be + // setup correctly further down when CEHelper::SetupCorruptionSeverityForActiveException is invoked. + + if ((pExInfo->m_pBottomMostHandler != NULL) && + (pEstablisherFrame == pExInfo->m_pBottomMostHandler)) + { + // Current exception should be different from the one exception tracker is already tracking. + _ASSERTE(pExceptionRecord != pExInfo->m_pExceptionRecord); + + // This cannot be nested exceptions - they are handled earlier (see above). + _ASSERTE(!bNestedException); + + LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: Bottom-most handler (0x%p) is the same as EstablisherFrame.\n", + pExInfo->m_pBottomMostHandler)); + LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: Exception record in exception tracker is 0x%p, while that of new exception is 0x%p.\n", + pExInfo->m_pExceptionRecord, pExceptionRecord)); + LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: Resetting exception tracker (0x%p).\n", pExInfo)); + + // This will reset the exception tracker state, including the corruption severity. + pExInfo->Init(); + } + } + + // If we are handling a fault from managed code, we need to set the Thread->ExInfo->pContext to + // the current fault context, which is used in the stack walk to get back into the managed + // stack with the correct registers. (Previously, this was done by linking in a FaultingExceptionFrame + // record.) + // We are about to create the managed exception object, which may trigger a GC, so set this up now. + + pExInfo->m_pExceptionRecord = pExceptionRecord; + pExInfo->m_pContext = pContext; + if (pContext && ShouldHandleManagedFault(pExceptionRecord, pContext, pEstablisherFrame, pThread)) + { // If this was a fault in managed code, rather than create a Frame for stackwalking, + // we can use this exinfo (after all, it has all the register info.) + pExInfo->m_ExceptionFlags.SetUseExInfoForStackwalk(); + } + + // It should now be safe for a GC to happen. + + // case 1 & 3: this is the first time through of a new, nested, or rethrown exception, so see if we can + // find a handler. Only setup throwable if are bottommost handler + if (IsComPlusException(pExceptionRecord) && (!bAsynchronousThreadStop)) + { + + // Update the throwable from the last thrown object. Note: this may cause OOM, in which case we replace + // both throwables with the preallocated OOM exception. + pThread->SafeSetThrowables(pThread->LastThrownObject()); + + // now we've got a COM+ exception, fall through to so see if we handle it + + STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: fall through ExInfo:0x%p setting m_pBottomMostHandler to 0x%p from 0x%p\n", + pExInfo, pEstablisherFrame, pExInfo->m_pBottomMostHandler); + pExInfo->m_pBottomMostHandler = pEstablisherFrame; + } + else if (bRethrownException) + { + // If it was rethrown and not COM+, will still be the last one thrown. Either we threw it last and + // stashed it here or someone else caught it and rethrew it, in which case it will still have been + // originally stashed here. + + // Update the throwable from the last thrown object. Note: this may cause OOM, in which case we replace + // both throwables with the preallocated OOM exception. + pThread->SafeSetThrowables(pThread->LastThrownObject()); + STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: rethrow non-COM+ ExInfo:0x%p setting m_pBottomMostHandler to 0x%p from 0x%p\n", + pExInfo, pEstablisherFrame, pExInfo->m_pBottomMostHandler); + pExInfo->m_pBottomMostHandler = pEstablisherFrame; + } + else + { + if (!fIsManagedCode) + { + tct.bDontCatch = false; + } + + if (exceptionCode == STATUS_BREAKPOINT) + { + // don't catch int 3 + retval = ExceptionContinueSearch; + goto exit; + } + + // We need to set m_pBottomMostHandler here, Thread::IsExceptionInProgress returns 1. + // This is a necessary part of suppressing thread abort exceptions in the constructor + // of any exception object we might create. + STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_RealFirstPassHandler: setting ExInfo:0x%p m_pBottomMostHandler for IsExceptionInProgress to 0x%p from 0x%p\n", + pExInfo, pEstablisherFrame, pExInfo->m_pBottomMostHandler); + pExInfo->m_pBottomMostHandler = pEstablisherFrame; + + // Create the managed exception object. + OBJECTREF throwable = CreateCOMPlusExceptionObject(pThread, pExceptionRecord, bAsynchronousThreadStop); + + // Set the throwables on the thread to the newly created object. If this fails, it will return a + // preallocated exception object instead. This also updates the last thrown exception, for rethrows. + throwable = pThread->SafeSetThrowables(throwable); + + // Set the exception code and pointers. We set these after setting the throwables on the thread, + // because if the proper exception is replaced by an OOM exception, we still want the exception code + // and pointers set in the OOM exception. + EXCEPTIONREF exceptionRef = (EXCEPTIONREF)throwable; + exceptionRef->SetXCode(pExceptionRecord->ExceptionCode); + exceptionRef->SetXPtrs(&exceptionPointers); + } + + tct.pBottomFrame = NULL; + + EEToProfilerExceptionInterfaceWrapper::ExceptionThrown(pThread); + + CPFH_UpdatePerformanceCounters(); + } // End of case-1-or-3 + + { + // Allocate storage for the stack trace. + OBJECTREF throwable = NULL; + GCPROTECT_BEGIN(throwable); + throwable = pThread->GetThrowable(); + +#ifdef FEATURE_CORRUPTING_EXCEPTIONS + { + BEGIN_SO_INTOLERANT_CODE(GetThread()); + // Setup the state in current exception tracker indicating the corruption severity + // of the active exception. + CEHelper::SetupCorruptionSeverityForActiveException(bRethrownException, bNestedException, + CEHelper::ShouldTreatActiveExceptionAsNonCorrupting()); + END_SO_INTOLERANT_CODE; + } +#endif // FEATURE_CORRUPTING_EXCEPTIONS + +#ifdef FEATURE_CORECLR + // Check if we are dealing with AV or not and if we are, + // ensure that this is a real AV and not managed AV exception + BOOL fIsThrownExceptionAV = FALSE; + if ((pExceptionRecord->ExceptionCode == STATUS_ACCESS_VIOLATION) && + (MscorlibBinder::GetException(kAccessViolationException) == throwable->GetMethodTable())) + { + // Its an AV - set the flag + fIsThrownExceptionAV = TRUE; + } + + // Did we get an AV? + if (fIsThrownExceptionAV == TRUE) + { + // Get the escalation policy action for handling AV + EPolicyAction actionAV = GetEEPolicy()->GetActionOnFailure(FAIL_AccessViolation); + + // Valid actions are: eNoAction (default behviour) or eRudeExitProcess + _ASSERTE(((actionAV == eNoAction) || (actionAV == eRudeExitProcess))); + if (actionAV == eRudeExitProcess) + { + LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: AccessViolation handler found and doing RudeExitProcess due to escalation policy (eRudeExitProcess)\n")); + + // EEPolicy::HandleFatalError will help us RudeExit the process. + // RudeExitProcess due to AV is to prevent a security risk - we are ripping + // at the boundary, without looking for the handlers. + EEPOLICY_HANDLE_FATAL_ERROR(COR_E_SECURITY); + } + } +#endif // FEATURE_CORECLR + + // If we're out of memory, then we figure there's probably not memory to maintain a stack trace, so we skip it. + // If we've got a stack overflow, then we figure the stack will be so huge as to make tracking the stack trace + // impracticle, so we skip it. + if ((throwable == CLRException::GetPreallocatedOutOfMemoryException()) || + (throwable == CLRException::GetPreallocatedStackOverflowException())) + { + tct.bAllowAllocMem = FALSE; + } + else + { + pExInfo->m_StackTraceInfo.AllocateStackTrace(); + } + + GCPROTECT_END(); + } + + // Set up information for GetExceptionPointers()/GetExceptionCode() callback. + pExInfo->SetExceptionCode(pExceptionRecord); + + pExInfo->m_pExceptionPointers = &exceptionPointers; + + if (bRethrownException || bNestedException) + { + _ASSERTE(pExInfo->m_pPrevNestedInfo != NULL); + + BEGIN_SO_INTOLERANT_CODE(GetThread()); + SetStateForWatsonBucketing(bRethrownException, pExInfo->GetPreviousExceptionTracker()->GetThrowableAsHandle()); + END_SO_INTOLERANT_CODE; + } + +#ifdef DEBUGGING_SUPPORTED + // + // At this point the exception is still fresh to us, so assert that + // there should be nothing from the debugger on it. + // + _ASSERTE(!pExInfo->m_ExceptionFlags.DebuggerInterceptInfo()); +#endif + + if (pThread->IsRudeAbort()) + { + OBJECTREF rudeAbortThrowable = CLRException::GetPreallocatedRudeThreadAbortException(); + + if (pThread->GetThrowable() != rudeAbortThrowable) + { + // Neither of these sets will throw because the throwable that we're setting is a preallocated + // exception. This also updates the last thrown exception, for rethrows. + pThread->SafeSetThrowables(rudeAbortThrowable); + } + + if (!pThread->IsRudeAbortInitiated()) + { + pThread->PreWorkForThreadAbort(); + } + } + + LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: looking for handler bottom %x, top %x\n", + tct.pBottomFrame, tct.pTopFrame)); + tct.bReplaceStack = pExInfo->m_pBottomMostHandler == pEstablisherFrame && !bRethrownException; + tct.bSkipLastElement = bRethrownException && bNestedException; + found = LookForHandler(&exceptionPointers, + pThread, + &tct); + + // We have searched this far. + pExInfo->m_pSearchBoundary = tct.pTopFrame; + LOG((LF_EH, LL_INFO1000, "CPFH_RealFirstPassHandler: set pSearchBoundary to 0x%p\n", pExInfo->m_pSearchBoundary)); + + if ((found == LFH_NOT_FOUND) +#ifdef DEBUGGING_SUPPORTED + && !pExInfo->m_ExceptionFlags.DebuggerInterceptInfo() +#endif + ) + { + LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: NOT_FOUND\n")); + + if (tct.pTopFrame == FRAME_TOP) + { + LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: NOT_FOUND at FRAME_TOP\n")); + } + + retval = ExceptionContinueSearch; + goto exit; + } + else + { + // so we are going to handle the exception + + // Remove the nested exception record -- before calling RtlUnwind. + // The second-pass callback for a NestedExceptionRecord assumes that if it's + // being unwound, it should pop one exception from the pExInfo chain. This is + // true for any older NestedRecords that might be unwound -- but not for the + // new one we're about to add. To avoid this, we remove the new record + // before calling Unwind. + // + // @NICE: This can probably be a little cleaner -- the nested record currently + // is also used to guard the running of the filter code. When we clean up the + // behaviour of exceptions within filters, we should be able to get rid of this + // PUSH/POP/PUSH behaviour. + _ASSERTE(bPopNestedHandlerExRecord); + + UNINSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg)); + + // Since we are going to handle the exception we switch into preemptive mode + GCX_PREEMP_NO_DTOR(); + +#ifdef DEBUGGING_SUPPORTED + // + // Check if the debugger wants to intercept this frame at a different point than where we are. + // + if (pExInfo->m_ExceptionFlags.DebuggerInterceptInfo()) + { + ClrDebuggerDoUnwindAndIntercept(pEstablisherFrame, pExceptionRecord); + + // + // If this returns, then the debugger couldn't do it's stuff and we default to the found handler. + // + if (found == LFH_NOT_FOUND) + { + retval = ExceptionContinueSearch; + // we need to be sure to switch back into Cooperative mode since we are going to + // jump to the exit: label and follow the normal return path (it is expected that + // CPFH_RealFirstPassHandler returns in COOP. + GCX_PREEMP_NO_DTOR_END(); + goto exit; + } + } +#endif + + LOG((LF_EH, LL_INFO100, "CPFH_RealFirstPassHandler: handler found: %s\n", tct.pFunc->m_pszDebugMethodName)); + + CallRtlUnwindSafe(pEstablisherFrame, RtlUnwindCallback, pExceptionRecord, 0); + // on x86 at least, RtlUnwind always returns + + // Note: we've completed the unwind pass up to the establisher frame, and we're headed off to finish our + // cleanup and end up back in jitted code. Any more FS0 handlers pushed from this point on out will _not_ be + // unwound. + // Note: we are still in Preemptive mode here and that is correct, COMPlusAfterUnwind will switch us back + // into Cooperative mode. + return COMPlusAfterUnwind(pExceptionRecord, pEstablisherFrame, tct); + } + +exit: + { + // We need to be in COOP if we get here + GCX_ASSERT_COOP(); + } + + // If we got as far as saving pExInfo, save the context pointer so it's available for the unwind. + if (pExInfo) + { + pExInfo->m_pContext = pContext; + // pExInfo->m_pExceptionPointers points to a local structure, which is now going out of scope. + pExInfo->m_pExceptionPointers = NULL; + } + +#if defined(USE_FEF) + if (bPopFaultingExceptionFrame) + { + faultingExceptionFrame.Pop(); + } +#endif // USE_FEF + + if (bPopNestedHandlerExRecord) + { + UNINSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg)); + } + return retval; +} // CPFH_RealFirstPassHandler() + + +//****************************************************************************** +// +void InitializeExceptionHandling() +{ + WRAPPER_NO_CONTRACT; + + InitSavedExceptionInfo(); + + CLRAddVectoredHandlers(); + + // Initialize the lock used for synchronizing access to the stacktrace in the exception object + g_StackTraceArrayLock.Init(LOCK_TYPE_DEFAULT, TRUE); +} + +//****************************************************************************** +static inline EXCEPTION_DISPOSITION __cdecl +CPFH_FirstPassHandler(EXCEPTION_RECORD *pExceptionRecord, + EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame, + CONTEXT *pContext, + DISPATCHER_CONTEXT *pDispatcherContext) +{ + WRAPPER_NO_CONTRACT; + EXCEPTION_DISPOSITION retval; + + _ASSERTE (!(pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND))); + + DWORD exceptionCode = pExceptionRecord->ExceptionCode; + + Thread *pThread = GetThread(); + + STRESS_LOG4(LF_EH, LL_INFO100, + "CPFH_FirstPassHandler: pEstablisherFrame = %x EH code = %x EIP = %x with ESP = %x\n", + pEstablisherFrame, exceptionCode, pContext ? GetIP(pContext) : 0, pContext ? GetSP(pContext) : 0); + + EXCEPTION_POINTERS ptrs = { pExceptionRecord, pContext }; + + // Call to the vectored handler to give other parts of the Runtime a chance to jump in and take over an + // exception before we do too much with it. The most important point in the vectored handler is not to toggle + // the GC mode. + DWORD filter = CLRVectoredExceptionHandler(&ptrs); + + if (filter == (DWORD) EXCEPTION_CONTINUE_EXECUTION) + { + return ExceptionContinueExecution; + } + else if (filter == EXCEPTION_CONTINUE_SEARCH) + { + return ExceptionContinueSearch; + } + +#if defined(STRESS_HEAP) + // + // Check to see if this exception is due to GCStress. Since the GCStress mechanism only injects these faults + // into managed code, we only need to check for them in CPFH_FirstPassHandler. + // + if (IsGcMarker(exceptionCode, pContext)) + { + return ExceptionContinueExecution; + } +#endif // STRESS_HEAP + + // We always want to be in co-operative mode when we run this function and whenever we return + // from it, want to go to pre-emptive mode because are returning to OS. + BOOL disabled = pThread->PreemptiveGCDisabled(); + GCX_COOP_NO_DTOR(); + + BOOL bAsynchronousThreadStop = IsThreadHijackedForThreadStop(pThread, pExceptionRecord); + + if (bAsynchronousThreadStop) + { + // If we ever get here in preemptive mode, we're in trouble. We've + // changed the thread's IP to point at a little function that throws ... if + // the thread were to be in preemptive mode and a GC occurred, the stack + // crawl would have been all messed up (becuase we have no frame that points + // us back to the right place in managed code). + _ASSERTE(disabled); + + AdjustContextForThreadStop(pThread, pContext); + LOG((LF_EH, LL_INFO100, "CPFH_FirstPassHandler is Asynchronous Thread Stop or Abort\n")); + } + + pThread->ResetThrowControlForThread(); + + CPFH_VerifyThreadIsInValidState(pThread, exceptionCode, pEstablisherFrame); + + // If we were in cooperative mode when we came in here, then its okay to see if we should do HandleManagedFault + // and push a FaultingExceptionFrame. If we weren't in coop mode coming in here, then it means that there's no + // way the exception could really be from managed code. I might look like it was from managed code, but in + // reality its a rethrow from unmanaged code, either unmanaged user code, or unmanaged EE implementation. + if (disabled && ShouldHandleManagedFault(pExceptionRecord, pContext, pEstablisherFrame, pThread)) + { +#if defined(USE_FEF) + HandleManagedFault(pExceptionRecord, pContext, pEstablisherFrame, pThread); + retval = ExceptionContinueExecution; + goto exit; +#else // USE_FEF + // Save the context pointer in the Thread's EXInfo, so that a stack crawl can recover the + // register values from the fault. + + //@todo: I haven't yet found any case where we need to do anything here. If there are none, eliminate + // this entire if () {} block. +#endif // USE_FEF + } + + // OK. We're finally ready to start the real work. Nobody else grabbed the exception in front of us. Now we can + // get started. + retval = CPFH_RealFirstPassHandler(pExceptionRecord, + pEstablisherFrame, + pContext, + pDispatcherContext, + bAsynchronousThreadStop, + disabled); + +#if defined(USE_FEF) // This label is only used in the HandleManagedFault() case above. +exit: +#endif + if (retval != ExceptionContinueExecution || !disabled) + { + GCX_PREEMP_NO_DTOR(); + } + + STRESS_LOG1(LF_EH, LL_INFO100, "CPFH_FirstPassHandler: exiting with retval %d\n", retval); + return retval; +} // CPFH_FirstPassHandler() + +//****************************************************************************** +inline void +CPFH_UnwindFrames1(Thread* pThread, EXCEPTION_REGISTRATION_RECORD* pEstablisherFrame, DWORD exceptionCode) +{ + WRAPPER_NO_CONTRACT; + + ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo); + + // Ready to unwind the stack... + ThrowCallbackType tct; + tct.Init(); + tct.bIsUnwind = TRUE; + tct.pTopFrame = GetCurrFrame(pEstablisherFrame); // highest frame to search to + tct.pBottomFrame = NULL; + + // Set the flag indicating if the current exception represents a longjmp. + // See comment in COMPlusUnwindCallback for details. + CORRUPTING_EXCEPTIONS_ONLY(tct.m_fIsLongJump = (exceptionCode == STATUS_LONGJUMP);) + + #ifdef _DEBUG + tct.pCurrentExceptionRecord = pEstablisherFrame; + tct.pPrevExceptionRecord = GetPrevSEHRecord(pEstablisherFrame); + #endif + + #ifdef DEBUGGING_SUPPORTED + EXCEPTION_REGISTRATION_RECORD *pInterceptEstablisherFrame = NULL; + + // If the exception is intercepted, use information stored in the DebuggerExState to unwind the stack. + if (pExInfo->m_ExceptionFlags.DebuggerInterceptInfo()) + { + pExInfo->m_DebuggerExState.GetDebuggerInterceptInfo(&pInterceptEstablisherFrame, + NULL, // MethodDesc **ppFunc, + NULL, // int *pdHandler, + NULL, // BYTE **ppStack + NULL, // ULONG_PTR *pNativeOffset, + NULL // Frame **ppFrame) + ); + LOG((LF_EH, LL_INFO1000, "CPFH_UnwindFrames1: frames are Est 0x%X, Intercept 0x%X\n", + pEstablisherFrame, pInterceptEstablisherFrame)); + + // + // When we set up for the interception we store off the CPFH or CPNEH that we + // *know* will handle unwinding the destination of the intercept. + // + // However, a CPNEH with the same limiting Capital-F-rame could do the work + // and unwind us, so... + // + // If this is the exact frame handler we are supposed to search for, or + // if this frame handler services the same Capital-F-rame as the frame handler + // we are looking for (i.e. this frame handler may do the work that we would + // expect our frame handler to do), + // then + // we need to pass the interception destination during this unwind. + // + _ASSERTE(IsUnmanagedToManagedSEHHandler(pEstablisherFrame)); + + if ((pEstablisherFrame == pInterceptEstablisherFrame) || + (GetCurrFrame(pEstablisherFrame) == GetCurrFrame(pInterceptEstablisherFrame))) + { + pExInfo->m_DebuggerExState.GetDebuggerInterceptInfo(NULL, + &(tct.pFunc), + &(tct.dHandler), + &(tct.pStack), + NULL, + &(tct.pBottomFrame) + ); + + LOG((LF_EH, LL_INFO1000, "CPFH_UnwindFrames1: going to: pFunc:%#X, pStack:%#X\n", + tct.pFunc, tct.pStack)); + + } + + } + #endif + + UnwindFrames(pThread, &tct); + + LOG((LF_EH, LL_INFO1000, "CPFH_UnwindFrames1: after unwind ec:%#x, tct.pTopFrame:0x%p, pSearchBndry:0x%p\n" + " pEstFrame:0x%p, IsC+NestExRec:%d, !Nest||Active:%d\n", + exceptionCode, tct.pTopFrame, pExInfo->m_pSearchBoundary, pEstablisherFrame, + IsComPlusNestedExceptionRecord(pEstablisherFrame), + (!IsComPlusNestedExceptionRecord(pEstablisherFrame) || reinterpret_cast(pEstablisherFrame)->m_ActiveForUnwind))); + + if (tct.pTopFrame >= pExInfo->m_pSearchBoundary && + (!IsComPlusNestedExceptionRecord(pEstablisherFrame) || + reinterpret_cast(pEstablisherFrame)->m_ActiveForUnwind) ) + { + // If this is the search boundary, and we're not a nested handler, then + // this is the last time we'll see this exception. Time to unwind our + // exinfo. + STRESS_LOG0(LF_EH, LL_INFO100, "CPFH_UnwindFrames1: Exception unwind -- unmanaged catcher detected\n"); + pExInfo->UnwindExInfo((VOID*)pEstablisherFrame); + } +} // CPFH_UnwindFrames1() + +//****************************************************************************** +inline EXCEPTION_DISPOSITION __cdecl +CPFH_UnwindHandler(EXCEPTION_RECORD *pExceptionRecord, + EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame, + CONTEXT *pContext, + void *pDispatcherContext) +{ + WRAPPER_NO_CONTRACT; + _ASSERTE (pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND)); + + #ifdef _DEBUG + // Note: you might be inclined to write "static int breakOnSecondPass = CLRConfig::GetConfigValue(...);", but + // you can't do that here. That causes C++ EH to be generated under the covers for this function, and this + // function isn't allowed to have any C++ EH in it because its never going to return. + static int breakOnSecondPass; // = 0 + static BOOL breakOnSecondPassSetup; // = FALSE + if (!breakOnSecondPassSetup) + { + breakOnSecondPass = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_BreakOnSecondPass); + breakOnSecondPassSetup = TRUE; + } + if (breakOnSecondPass != 0) + { + _ASSERTE(!"Unwind handler"); + } + #endif + + DWORD exceptionCode = pExceptionRecord->ExceptionCode; + Thread *pThread = GetThread(); + + ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo); + + STRESS_LOG4(LF_EH, LL_INFO100, "In CPFH_UnwindHandler EHCode = %x EIP = %x with ESP = %x, pEstablisherFrame = 0x%p\n", exceptionCode, + pContext ? GetIP(pContext) : 0, pContext ? GetSP(pContext) : 0, pEstablisherFrame); + + // We always want to be in co-operative mode when we run this function. Whenever we return + // from it, want to go to pre-emptive mode because are returning to OS. + + { + // needs to be in its own scope to avoid polluting the namespace, since + // we don't do a _END then we don't revert the state + GCX_COOP_NO_DTOR(); + } + + CPFH_VerifyThreadIsInValidState(pThread, exceptionCode, pEstablisherFrame); + + if (IsComPlusNestedExceptionRecord(pEstablisherFrame)) + { + NestedHandlerExRecord *pHandler = reinterpret_cast(pEstablisherFrame); + if (pHandler->m_pCurrentExInfo != NULL) + { + // See the comment at the end of COMPlusNestedExceptionHandler about nested exception. + // OS is going to skip the EstablisherFrame before our NestedHandler. + if (pHandler->m_pCurrentExInfo->m_pBottomMostHandler <= pHandler->m_pCurrentHandler) + { + // We're unwinding -- the bottom most handler is potentially off top-of-stack now. If + // it is, change it to the next COM+ frame. (This one is not good, as it's about to + // disappear.) + EXCEPTION_REGISTRATION_RECORD *pNextBottomMost = GetNextCOMPlusSEHRecord(pHandler->m_pCurrentHandler); + + STRESS_LOG3(LF_EH, LL_INFO10000, "COMPlusNestedExceptionHandler: setting ExInfo:0x%p m_pBottomMostHandler from 0x%p to 0x%p\n", + pHandler->m_pCurrentExInfo, pHandler->m_pCurrentExInfo->m_pBottomMostHandler, pNextBottomMost); + + pHandler->m_pCurrentExInfo->m_pBottomMostHandler = pNextBottomMost; + } + } + } + + // this establishes a marker so can determine if are processing a nested exception + // don't want to use the current frame to limit search as it could have been unwound by + // the time get to nested handler (ie if find an exception, unwind to the call point and + // then resume in the catch and then get another exception) so make the nested handler + // have the same boundary as this one. If nested handler can't find a handler, we won't + // end up searching this frame list twice because the nested handler will set the search + // boundary in the thread and so if get back to this handler it will have a range that starts + // and ends at the same place. + NestedHandlerExRecord nestedHandlerExRecord; + nestedHandlerExRecord.Init((PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler, GetCurrFrame(pEstablisherFrame)); + + nestedHandlerExRecord.m_ActiveForUnwind = TRUE; + nestedHandlerExRecord.m_pCurrentExInfo = pExInfo; + nestedHandlerExRecord.m_pCurrentHandler = pEstablisherFrame; + + INSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg)); + + // Unwind the stack. The establisher frame sets the boundary. + CPFH_UnwindFrames1(pThread, pEstablisherFrame, exceptionCode); + + // We're unwinding -- the bottom most handler is potentially off top-of-stack now. If + // it is, change it to the next COM+ frame. (This one is not good, as it's about to + // disappear.) + if (pExInfo->m_pBottomMostHandler && + pExInfo->m_pBottomMostHandler <= pEstablisherFrame) + { + EXCEPTION_REGISTRATION_RECORD *pNextBottomMost = GetNextCOMPlusSEHRecord(pEstablisherFrame); + + // If there is no previous COM+ SEH handler, GetNextCOMPlusSEHRecord() will return -1. Much later, we will dereference that and AV. + _ASSERTE (pNextBottomMost != EXCEPTION_CHAIN_END); + + STRESS_LOG3(LF_EH, LL_INFO10000, "CPFH_UnwindHandler: setting ExInfo:0x%p m_pBottomMostHandler from 0x%p to 0x%p\n", + pExInfo, pExInfo->m_pBottomMostHandler, pNextBottomMost); + + pExInfo->m_pBottomMostHandler = pNextBottomMost; + } + + { + // needs to be in its own scope to avoid polluting the namespace, since + // we don't do a _END then we don't revert the state + GCX_PREEMP_NO_DTOR(); + } + UNINSTALL_EXCEPTION_HANDLING_RECORD(&(nestedHandlerExRecord.m_ExReg)); + + // If we are here, then exception was not caught in managed code protected by this + // ComplusFrameHandler. Hence, reset thread abort state if this is the last personality routine, + // for managed code, on the stack. + ResetThreadAbortState(pThread, pEstablisherFrame); + + STRESS_LOG0(LF_EH, LL_INFO100, "CPFH_UnwindHandler: Leaving with ExceptionContinueSearch\n"); + return ExceptionContinueSearch; +} // CPFH_UnwindHandler() + +//****************************************************************************** +// This is the first handler that is called in the context of managed code +// It is the first level of defense and tries to find a handler in the user +// code to handle the exception +//------------------------------------------------------------------------- +// EXCEPTION_DISPOSITION __cdecl COMPlusFrameHandler( +// EXCEPTION_RECORD *pExceptionRecord, +// _EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame, +// CONTEXT *pContext, +// DISPATCHER_CONTEXT *pDispatcherContext) +// +// See http://www.microsoft.com/msj/0197/exception/exception.aspx for a background piece on Windows +// unmanaged structured exception handling. +EXCEPTION_HANDLER_IMPL(COMPlusFrameHandler) +{ + WRAPPER_NO_CONTRACT; + _ASSERTE(!DebugIsEECxxException(pExceptionRecord) && "EE C++ Exception leaked into managed code!"); + + STRESS_LOG5(LF_EH, LL_INFO100, "In COMPlusFrameHander EH code = %x flag = %x EIP = %x with ESP = %x, pEstablisherFrame = 0x%p\n", + pExceptionRecord->ExceptionCode, pExceptionRecord->ExceptionFlags, + pContext ? GetIP(pContext) : 0, pContext ? GetSP(pContext) : 0, pEstablisherFrame); + + _ASSERTE((pContext == NULL) || ((pContext->ContextFlags & CONTEXT_CONTROL) == CONTEXT_CONTROL)); + + if (g_fNoExceptions) + return ExceptionContinueSearch; // No EH during EE shutdown. + + // Check if the exception represents a GCStress Marker. If it does, + // we shouldnt record its entry in the TLS as such exceptions are + // continuable and can confuse the VM to treat them as CSE, + // as they are implemented using illegal instruction exception. + + bool fIsGCMarker = false; + +#ifdef HAVE_GCCOVER // This is a debug only macro + if (GCStress::IsEnabled()) + { + // UnsafeTlsGetValue trashes last error. When Complus_GCStress=4, GC is invoked + // on every allowable JITed instruction by means of our exception handling machanism + // it is very easy to trash the last error. For example, a p/invoke called a native method + // which sets last error. Before we getting the last error in the IL stub, it is trashed here + DWORD dwLastError = GetLastError(); + fIsGCMarker = IsGcMarker(pExceptionRecord->ExceptionCode, pContext); + if (!fIsGCMarker) + { + SaveCurrentExceptionInfo(pExceptionRecord, pContext); + } + SetLastError(dwLastError); + } + else +#endif + { + // GCStress does not exist on retail builds (see IsGcMarker implementation for details). + SaveCurrentExceptionInfo(pExceptionRecord, pContext); + } + + if (fIsGCMarker) + { + // If this was a GCStress marker exception, then return + // ExceptionContinueExecution to the OS. + return ExceptionContinueExecution; + } + + EXCEPTION_DISPOSITION retVal = ExceptionContinueSearch; + + Thread *pThread = GetThread(); + if ((pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND)) == 0) + { + if (IsSOExceptionCode(pExceptionRecord->ExceptionCode)) + { + EEPolicy::HandleStackOverflow(SOD_ManagedFrameHandler, (void*)pEstablisherFrame); + + // VC's unhandled exception filter plays with stack. It VirtualAlloc's a new stack, and + // then launch Watson from the new stack. When Watson asks CLR to save required data, we + // are not able to walk the stack. + // Setting Context in ExInfo so that our Watson dump routine knows how to walk this stack. + ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo); + pExInfo->m_pContext = pContext; + + // Save the reference to the topmost handler we see during first pass when an SO goes past us. + // When an unwind gets triggered for the exception, we will reset the frame chain when we reach + // the topmost handler we saw during the first pass. + // + // This unifies, behaviour-wise, 32bit with 64bit. + if ((pExInfo->m_pTopMostHandlerDuringSO == NULL) || + (pEstablisherFrame > pExInfo->m_pTopMostHandlerDuringSO)) + { + pExInfo->m_pTopMostHandlerDuringSO = pEstablisherFrame; + } + + // Switch to preemp mode since we are returning back to the OS. + // We will do the quick switch since we are short of stack + FastInterlockAnd (&pThread->m_fPreemptiveGCDisabled, 0); + + return ExceptionContinueSearch; + } + else + { +#ifdef FEATURE_STACK_PROBE + if (GetEEPolicy()->GetActionOnFailure(FAIL_StackOverflow) == eRudeUnloadAppDomain) + { + RetailStackProbe(static_cast(ADJUST_PROBE(BACKOUT_CODE_STACK_LIMIT)), pThread); + } +#endif + } + } + else + { + DWORD exceptionCode = pExceptionRecord->ExceptionCode; + + if (exceptionCode == STATUS_UNWIND) + { + // If exceptionCode is STATUS_UNWIND, RtlUnwind is called with a NULL ExceptionRecord, + // therefore OS uses a faked ExceptionRecord with STATUS_UNWIND code. Then we need to + // look at our saved exception code. + exceptionCode = GetCurrentExceptionCode(); + } + + if (IsSOExceptionCode(exceptionCode)) + { + // We saved the context during the first pass in case the stack overflow exception is + // unhandled and Watson dump code needs it. Now we are in the second pass, therefore + // either the exception is handled by user code, or we have finished unhandled exception + // filter process, and the OS is unwinding the stack. Either way, we don't need the + // context any more. It is very important to reset the context so that our code does not + // accidentally walk the frame using the dangling context in ExInfoWalker::WalkToPosition. + ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo); + pExInfo->m_pContext = NULL; + + // We should have the reference to the topmost handler seen during the first pass of SO + _ASSERTE(pExInfo->m_pTopMostHandlerDuringSO != NULL); + + // Reset frame chain till we reach the topmost establisher frame we saw in the first pass. + // This will ensure that if any intermediary frame calls back into managed (e.g. native frame + // containing a __finally that reverse pinvokes into managed), then we have the correct + // explicit frame on the stack. Resetting the frame chain only when we reach the topmost + // personality routine seen in the first pass may not result in expected behaviour, + // specially during stack walks when crawl frame needs to be initialized from + // explicit frame. + if (pEstablisherFrame <= pExInfo->m_pTopMostHandlerDuringSO) + { + GCX_COOP_NO_DTOR(); + + if (pThread->GetFrame() < GetCurrFrame(pEstablisherFrame)) + { + // We are very short of stack. We avoid calling UnwindFrame which may + // run unknown code here. + pThread->SetFrame(GetCurrFrame(pEstablisherFrame)); + } + } + + // Switch to preemp mode since we are returning back to the OS. + // We will do the quick switch since we are short of stack + FastInterlockAnd(&pThread->m_fPreemptiveGCDisabled, 0); + + return ExceptionContinueSearch; + } + } + + // . We need to probe here, but can't introduce destructors etc. + BEGIN_CONTRACT_VIOLATION(SOToleranceViolation); + + if (pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND)) + { + retVal = CPFH_UnwindHandler(pExceptionRecord, + pEstablisherFrame, + pContext, + pDispatcherContext); + } + else + { + + /* Make no assumptions about the current machine state. + @PERF: Only needs to be called by the very first handler invoked by SEH */ + ResetCurrentContext(); + + retVal = CPFH_FirstPassHandler(pExceptionRecord, + pEstablisherFrame, + pContext, + pDispatcherContext); + + } + + END_CONTRACT_VIOLATION; + + return retVal; +} // COMPlusFrameHandler() + + +//------------------------------------------------------------------------- +// This is called by the EE to restore the stack pointer if necessary. +//------------------------------------------------------------------------- + +// This can't be inlined into the caller to avoid introducing EH frame +NOINLINE LPVOID COMPlusEndCatchWorker(Thread * pThread) +{ + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + STATIC_CONTRACT_MODE_COOPERATIVE; + STATIC_CONTRACT_SO_INTOLERANT; + + LOG((LF_EH, LL_INFO1000, "COMPlusPEndCatch:called with " + "pThread:0x%x\n",pThread)); + + // indicate that we are out of the managed clause as early as possible + ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo); + pExInfo->m_EHClauseInfo.SetManagedCodeEntered(FALSE); + + void* esp = NULL; + + // @todo . We need to probe in the EH code, but can't introduce destructors etc. + BEGIN_CONTRACT_VIOLATION(SOToleranceViolation); + + // Notify the profiler that the catcher has finished running + // IL stubs don't contain catch blocks so inability to perform this check does not matter. + // if (!pFunc->IsILStub()) + EEToProfilerExceptionInterfaceWrapper::ExceptionCatcherLeave(); + + // no need to set pExInfo->m_ClauseType = (DWORD)COR_PRF_CLAUSE_NONE now that the + // notification is done because because the ExInfo record is about to be popped off anyway + + LOG((LF_EH, LL_INFO1000, "COMPlusPEndCatch:pThread:0x%x\n",pThread)); + +#ifdef _DEBUG + gLastResumedExceptionFunc = NULL; + gLastResumedExceptionHandler = 0; +#endif + // Set the thrown object to NULL as no longer needed. This also sets the last thrown object to NULL. + pThread->SafeSetThrowables(NULL); + + // reset the stashed exception info + pExInfo->m_pExceptionRecord = NULL; + pExInfo->m_pContext = NULL; + pExInfo->m_pExceptionPointers = NULL; + + if (pExInfo->m_pShadowSP) + { + *pExInfo->m_pShadowSP = 0; // Reset the shadow SP + } + + // pExInfo->m_dEsp was set in ResumeAtJITEH(). It is the Esp of the + // handler nesting level which catches the exception. + esp = (void*)(size_t)pExInfo->m_dEsp; + + pExInfo->UnwindExInfo(esp); + + // Prepare to sync managed exception state + // + // In a case when we're nested inside another catch block, the domain in which we're executing may not be the + // same as the one the domain of the throwable that was just made the current throwable above. Therefore, we + // make a special effort to preserve the domain of the throwable as we update the the last thrown object. + // + // This function (COMPlusEndCatch) can also be called by the in-proc debugger helper thread on x86 when + // an attempt to SetIP takes place to set IP outside the catch clause. In such a case, managed thread object + // will not be available. Thus, we should reset the severity only if its not such a thread. + // + // This behaviour (of debugger doing SetIP) is not allowed on 64bit since the catch clauses are implemented + // as a seperate funclet and it's just not allowed to set the IP across EH scopes, such as from inside a catch + // clause to outside of the catch clause. + bool fIsDebuggerHelperThread = (g_pDebugInterface == NULL) ? false : g_pDebugInterface->ThisIsHelperThread(); + + // Sync managed exception state, for the managed thread, based upon any active exception tracker + pThread->SyncManagedExceptionState(fIsDebuggerHelperThread); + + LOG((LF_EH, LL_INFO1000, "COMPlusPEndCatch: esp=%p\n", esp)); + + END_CONTRACT_VIOLATION; + + return esp; +} + +// +// This function works in conjunction with JIT_EndCatch. On input, the parameters are set as follows: +// ebp, ebx, edi, esi: the values of these registers at the end of the catch block +// *pRetAddress: the next instruction after the call to JIT_EndCatch +// +// On output, *pRetAddress is the instruction at which to resume execution. This may be user code, +// or it may be ThrowControlForThread (which will re-raise a pending ThreadAbortException). +// +// Returns the esp to set before resuming at *pRetAddress. +// +LPVOID STDCALL COMPlusEndCatch(LPVOID ebp, DWORD ebx, DWORD edi, DWORD esi, LPVOID* pRetAddress) +{ + // + // PopNestedExceptionRecords directly manipulates fs:[0] chain. This method can't have any EH! + // + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + STATIC_CONTRACT_MODE_COOPERATIVE; + STATIC_CONTRACT_SO_INTOLERANT; + + ETW::ExceptionLog::ExceptionCatchEnd(); + ETW::ExceptionLog::ExceptionThrownEnd(); + + void* esp = COMPlusEndCatchWorker(GetThread()); + + // We are going to resume at a handler nesting level whose esp is dEsp. Pop off any SEH records below it. This + // would be the COMPlusNestedExceptionHandler we had inserted. + PopNestedExceptionRecords(esp); + + // + // Set up m_OSContext for the call to COMPlusCheckForAbort + // + Thread* pThread = GetThread(); + _ASSERTE(pThread != NULL); + + SetIP(pThread->m_OSContext, (PCODE)*pRetAddress); + SetSP(pThread->m_OSContext, (TADDR)esp); + SetFP(pThread->m_OSContext, (TADDR)ebp); + pThread->m_OSContext->Ebx = ebx; + pThread->m_OSContext->Edi = edi; + pThread->m_OSContext->Esi = esi; + + LPVOID throwControl = COMPlusCheckForAbort((UINT_PTR)*pRetAddress); + if (throwControl) + *pRetAddress = throwControl; + + return esp; +} + +#endif // !DACCESS_COMPILE + +PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(CONTEXT * pContext) +{ + LIMITED_METHOD_DAC_CONTRACT; + + UINT_PTR stackSlot = pContext->Ebp + REDIRECTSTUB_EBP_OFFSET_CONTEXT; + PTR_PTR_CONTEXT ppContext = dac_cast((TADDR)stackSlot); + return *ppContext; +} + +#if !defined(DACCESS_COMPILE) + +PEXCEPTION_REGISTRATION_RECORD GetCurrentSEHRecord() +{ + WRAPPER_NO_CONTRACT; + + LPVOID fs0 = (LPVOID)__readfsdword(0); + +#if 0 // This walk is too expensive considering we hit it every time we a CONTRACT(NOTHROW) +#ifdef _DEBUG + EXCEPTION_REGISTRATION_RECORD *pEHR = (EXCEPTION_REGISTRATION_RECORD *)fs0; + LPVOID spVal; + __asm { + mov spVal, esp + } + + // check that all the eh frames are all greater than the current stack value. If not, the + // stack has been updated somehow w/o unwinding the SEH chain. + + // LOG((LF_EH, LL_INFO1000000, "ER Chain:\n")); + while (pEHR != NULL && pEHR != EXCEPTION_CHAIN_END) { + // LOG((LF_EH, LL_INFO1000000, "\tp: prev:p handler:%x\n", pEHR, pEHR->Next, pEHR->Handler)); + if (pEHR < spVal) { + if (gLastResumedExceptionFunc != 0) + _ASSERTE(!"Stack is greater than start of SEH chain - possible missing leave in handler. See gLastResumedExceptionHandler & gLastResumedExceptionFunc for info"); + else + _ASSERTE(!"Stack is greater than start of SEH chain (FS:0)"); + } + if (pEHR->Handler == (void *)-1) + _ASSERTE(!"Handler value has been corrupted"); + + _ASSERTE(pEHR < pEHR->Next); + + pEHR = pEHR->Next; + } +#endif +#endif + + return (EXCEPTION_REGISTRATION_RECORD*) fs0; +} + +PEXCEPTION_REGISTRATION_RECORD GetFirstCOMPlusSEHRecord(Thread *pThread) { + WRAPPER_NO_CONTRACT; + EXCEPTION_REGISTRATION_RECORD *pEHR = *(pThread->GetExceptionListPtr()); + if (pEHR == EXCEPTION_CHAIN_END || IsUnmanagedToManagedSEHHandler(pEHR)) { + return pEHR; + } else { + return GetNextCOMPlusSEHRecord(pEHR); + } +} + + +PEXCEPTION_REGISTRATION_RECORD GetPrevSEHRecord(EXCEPTION_REGISTRATION_RECORD *next) +{ + WRAPPER_NO_CONTRACT; + _ASSERTE(IsUnmanagedToManagedSEHHandler(next)); + + EXCEPTION_REGISTRATION_RECORD *pEHR = GetCurrentSEHRecord(); + _ASSERTE(pEHR != 0 && pEHR != EXCEPTION_CHAIN_END); + + EXCEPTION_REGISTRATION_RECORD *pBest = 0; + while (pEHR != next) { + if (IsUnmanagedToManagedSEHHandler(pEHR)) + pBest = pEHR; + pEHR = pEHR->Next; + _ASSERTE(pEHR != 0 && pEHR != EXCEPTION_CHAIN_END); + } + + return pBest; +} + +VOID SetCurrentSEHRecord(EXCEPTION_REGISTRATION_RECORD *pSEH) +{ + WRAPPER_NO_CONTRACT; + *GetThread()->GetExceptionListPtr() = pSEH; +} + + +// +// Unwind pExinfo, pops FS:[0] handlers until the interception context SP, and +// resumes at interception context. +// +VOID UnwindExceptionTrackerAndResumeInInterceptionFrame(ExInfo* pExInfo, EHContext* context) +{ + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + STATIC_CONTRACT_MODE_COOPERATIVE; + STATIC_CONTRACT_SO_TOLERANT; + + _ASSERTE(pExInfo && context); + + pExInfo->UnwindExInfo((LPVOID)(size_t)context->Esp); + PopNestedExceptionRecords((LPVOID)(size_t)context->Esp); + + STRESS_LOG3(LF_EH|LF_CORDB, LL_INFO100, "UnwindExceptionTrackerAndResumeInInterceptionFrame: completing intercept at EIP = %p ESP = %p EBP = %p\n", context->Eip, context->Esp, context->Ebp); + + ResumeAtJitEHHelper(context); + UNREACHABLE_MSG("Should never return from ResumeAtJitEHHelper!"); +} + +// +// Pop SEH records below the given target ESP. This is only used to pop nested exception records. +// If bCheckForUnknownHandlers is set, it only checks for unknown FS:[0] handlers. +// +BOOL PopNestedExceptionRecords(LPVOID pTargetSP, BOOL bCheckForUnknownHandlers) +{ + // No CONTRACT here, because we can't run the risk of it pushing any SEH into the current method. + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + STATIC_CONTRACT_SO_TOLERANT; + + PEXCEPTION_REGISTRATION_RECORD pEHR = GetCurrentSEHRecord(); + + while ((LPVOID)pEHR < pTargetSP) + { + // + // The only handler type we're allowed to have below the limit on the FS:0 chain in these cases is a nested + // exception record, so we verify that here. + // + // There is a special case, of course: for an unhandled exception, when the default handler does the exit + // unwind, we may have an exception that escapes a finally clause, thus replacing the original unhandled + // exception. If we find a catcher for that new exception, then we'll go ahead and do our own unwind, then + // jump to the catch. When we are called here, just before jumpping to the catch, we'll pop off our nested + // handlers, then we'll pop off one more handler: the handler that ntdll!ExecuteHandler2 pushed before + // calling our nested handler. We go ahead and pop off that handler, too. Its okay, its only there to catch + // exceptions from handlers and turn them into collided unwind status codes... there's no cleanup in the + // handler that we're removing, and that's the important point. The handler that ExecuteHandler2 pushes + // isn't a public export from ntdll, but its named "UnwindHandler" and is physically shortly after + // ExecuteHandler2 in ntdll. + // + static HINSTANCE ExecuteHandler2Module = 0; + static BOOL ExecuteHandler2ModuleInited = FALSE; + + // Cache the handle to the dll with the handler pushed by ExecuteHandler2. + if (!ExecuteHandler2ModuleInited) + { + ExecuteHandler2Module = WszGetModuleHandle(W("ntdll.dll")); + ExecuteHandler2ModuleInited = TRUE; + } + + if (bCheckForUnknownHandlers) + { + if (!IsComPlusNestedExceptionRecord(pEHR) || + !((ExecuteHandler2Module != NULL) && IsIPInModule(ExecuteHandler2Module, (PCODE)pEHR->Handler))) + { + return TRUE; + } + } +#ifdef _DEBUG + else + { + // Note: if we can't find the module containing ExecuteHandler2, we'll just be really strict and require + // that we're only popping nested handlers. + _ASSERTE(IsComPlusNestedExceptionRecord(pEHR) || + ((ExecuteHandler2Module != NULL) && IsIPInModule(ExecuteHandler2Module, (PCODE)pEHR->Handler))); + } +#endif // _DEBUG + + pEHR = pEHR->Next; + } + + if (!bCheckForUnknownHandlers) + { + SetCurrentSEHRecord(pEHR); + } + return FALSE; +} + +// +// This is implemented differently from the PopNestedExceptionRecords above because it's called in the context of +// the DebuggerRCThread to operate on the stack of another thread. +// +VOID PopNestedExceptionRecords(LPVOID pTargetSP, CONTEXT *pCtx, void *pSEH) +{ + // No CONTRACT here, because we can't run the risk of it pushing any SEH into the current method. + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + +#ifdef _DEBUG + LOG((LF_CORDB,LL_INFO1000, "\nPrintSEHRecords:\n")); + + EXCEPTION_REGISTRATION_RECORD *pEHR = (EXCEPTION_REGISTRATION_RECORD *)(size_t)*(DWORD *)pSEH; + + // check that all the eh frames are all greater than the current stack value. If not, the + // stack has been updated somehow w/o unwinding the SEH chain. + while (pEHR != NULL && pEHR != EXCEPTION_CHAIN_END) + { + LOG((LF_EH, LL_INFO1000000, "\t%08x: next:%08x handler:%x\n", pEHR, pEHR->Next, pEHR->Handler)); + pEHR = pEHR->Next; + } +#endif + + DWORD dwCur = *(DWORD*)pSEH; // 'EAX' in the original routine + DWORD dwPrev = (DWORD)(size_t)pSEH; + + while (dwCur < (DWORD)(size_t)pTargetSP) + { + // Watch for the OS handler + // for nested exceptions, or any C++ handlers for destructors in our call + // stack, or anything else. + if (dwCur < (DWORD)GetSP(pCtx)) + dwPrev = dwCur; + + dwCur = *(DWORD *)(size_t)dwCur; + + LOG((LF_CORDB,LL_INFO10000, "dwCur: 0x%x dwPrev:0x%x pTargetSP:0x%x\n", + dwCur, dwPrev, pTargetSP)); + } + + *(DWORD *)(size_t)dwPrev = dwCur; + +#ifdef _DEBUG + pEHR = (EXCEPTION_REGISTRATION_RECORD *)(size_t)*(DWORD *)pSEH; + // check that all the eh frames are all greater than the current stack value. If not, the + // stack has been updated somehow w/o unwinding the SEH chain. + + LOG((LF_CORDB,LL_INFO1000, "\nPopSEHRecords:\n")); + while (pEHR != NULL && pEHR != (void *)-1) + { + LOG((LF_EH, LL_INFO1000000, "\t%08x: next:%08x handler:%x\n", pEHR, pEHR->Next, pEHR->Handler)); + pEHR = pEHR->Next; + } +#endif +} + +//========================================================================== +// COMPlusThrowCallback +// +//========================================================================== + +/* + * + * COMPlusThrowCallbackHelper + * + * This function is a simple helper function for COMPlusThrowCallback. It is needed + * because of the EX_TRY macro. This macro does an alloca(), which allocates space + * off the stack, not free'ing it. Thus, doing a EX_TRY in a loop can easily result + * in a stack overflow error. By factoring out the EX_TRY into a separate function, + * we recover that stack space. + * + * Parameters: + * pJitManager - The JIT manager that will filter the EH. + * pCf - The frame to crawl. + * EHClausePtr + * nestingLevel + * pThread - Used to determine if the thread is throwable or not. + * + * Return: + * Exception status. + * + */ +int COMPlusThrowCallbackHelper(IJitManager *pJitManager, + CrawlFrame *pCf, + ThrowCallbackType* pData, + EE_ILEXCEPTION_CLAUSE *EHClausePtr, + DWORD nestingLevel, + OBJECTREF throwable, + Thread *pThread + ) +{ + CONTRACTL + { + NOTHROW; + GC_TRIGGERS; + MODE_COOPERATIVE; + } + CONTRACTL_END; + + int iFilt = 0; + BOOL impersonating = FALSE; + + EX_TRY + { + GCPROTECT_BEGIN (throwable); + if (pData->hCallerToken != NULL) + { + STRESS_LOG1(LF_EH, LL_INFO100, "In COMPlusThrowCallbackHelper hCallerToken = %d\n",pData->hCallerToken); + // CLR_ImpersonateLoggedOnUser fails fast on error + COMPrincipal::CLR_ImpersonateLoggedOnUser(pData->hCallerToken); + impersonating = TRUE; + } + + // We want to call filters even if the thread is aborting, so suppress abort + // checks while the filter runs. + ThreadPreventAsyncHolder preventAbort; + + BYTE* startAddress = (BYTE*)pCf->GetCodeInfo()->GetStartAddress(); + iFilt = ::CallJitEHFilter(pCf, startAddress, EHClausePtr, nestingLevel, throwable); + + if (impersonating) + { + STRESS_LOG1(LF_EH, LL_INFO100, "In COMPlusThrowCallbackHelper hImpersonationToken = %d\n",pData->hImpersonationToken); + // CLR_ImpersonateLoggedOnUser fails fast on error + COMPrincipal::CLR_ImpersonateLoggedOnUser(pData->hImpersonationToken); + impersonating = FALSE; + } + GCPROTECT_END(); + } + EX_CATCH + { + if (impersonating) + { + STRESS_LOG1(LF_EH, LL_INFO100, "In COMPlusThrowCallbackHelper EX_CATCH hImpersonationToken = %d\n",pData->hImpersonationToken); + // CLR_ImpersonateLoggedOnUser fails fast on error + COMPrincipal::CLR_ImpersonateLoggedOnUser(pData->hImpersonationToken); + impersonating = FALSE; + } + + // We had an exception in filter invocation that remained unhandled. + // Sync managed exception state, for the managed thread, based upon the active exception tracker. + pThread->SyncManagedExceptionState(false); + + // + // Swallow exception. Treat as exception continue search. + // + iFilt = EXCEPTION_CONTINUE_SEARCH; + + } + EX_END_CATCH(SwallowAllExceptions) + + return iFilt; +} + +//****************************************************************************** +// The stack walk callback for exception handling on x86. +// Returns one of: +// SWA_CONTINUE = 0, // continue walking +// SWA_ABORT = 1, // stop walking, early out in "failure case" +// SWA_FAILED = 2 // couldn't walk stack +StackWalkAction COMPlusThrowCallback( // SWA value + CrawlFrame *pCf, // Data from StackWalkFramesEx + ThrowCallbackType *pData) // Context data passed through from CPFH +{ + // We don't want to use a runtime contract here since this codepath is used during + // the processing of a hard SO. Contracts use a significant amount of stack + // which we can't afford for those cases. + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + STATIC_CONTRACT_MODE_COOPERATIVE; + + Frame *pFrame = pCf->GetFrame(); + MethodDesc *pFunc = pCf->GetFunction(); + + #if defined(_DEBUG) + #define METHODNAME(pFunc) (pFunc?pFunc->m_pszDebugMethodName:"") + #else + #define METHODNAME(pFunc) "" + #endif + STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusThrowCallback: STACKCRAWL method:%pM ('%s'), Frame:%p, FrameVtable = %pV\n", + pFunc, METHODNAME(pFunc), pFrame, pCf->IsFrameless()?0:(*(void**)pFrame)); + #undef METHODNAME + + Thread *pThread = GetThread(); + + if (pFrame && pData->pTopFrame == pFrame) + /* Don't look past limiting frame if there is one */ + return SWA_ABORT; + + if (!pFunc) + return SWA_CONTINUE; + + if (pThread->IsRudeAbortInitiated() && !pThread->IsWithinCer(pCf)) + { + return SWA_CONTINUE; + } + + ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo); + + _ASSERTE(!pData->bIsUnwind); +#ifdef _DEBUG + // It SHOULD be the case that any frames we consider live between this exception + // record and the previous one. + if (!pExInfo->m_pPrevNestedInfo) { + if (pData->pCurrentExceptionRecord) { + if (pFrame) _ASSERTE(pData->pCurrentExceptionRecord > pFrame); + if (pCf->IsFrameless()) _ASSERTE((ULONG_PTR)pData->pCurrentExceptionRecord >= GetRegdisplaySP(pCf->GetRegisterSet())); + } + if (pData->pPrevExceptionRecord) { + // FCALLS have an extra SEH record in debug because of the desctructor + // associated with ForbidGC checking. This is benign, so just ignore it. + if (pFrame) _ASSERTE(pData->pPrevExceptionRecord < pFrame || pFrame->GetVTablePtr() == HelperMethodFrame::GetMethodFrameVPtr()); + if (pCf->IsFrameless()) _ASSERTE((ULONG_PTR)pData->pPrevExceptionRecord <= GetRegdisplaySP(pCf->GetRegisterSet())); + } + } +#endif + + UINT_PTR currentIP = 0; + UINT_PTR currentSP = 0; + + if (pCf->IsFrameless()) + { + currentIP = (UINT_PTR)GetControlPC(pCf->GetRegisterSet()); + currentSP = (UINT_PTR)GetRegdisplaySP(pCf->GetRegisterSet()); + } + else if (InlinedCallFrame::FrameHasActiveCall(pFrame)) + { + // don't have the IP, SP for native code + currentIP = 0; + currentSP = 0; + } + else + { + currentIP = (UINT_PTR)(pCf->GetFrame()->GetIP()); + currentSP = 0; //Don't have an SP to get. + } + + if (!pFunc->IsILStub()) + { + // Append the current frame to the stack trace and save the save trace to the managed Exception object. + pExInfo->m_StackTraceInfo.AppendElement(pData->bAllowAllocMem, currentIP, currentSP, pFunc, pCf); + + pExInfo->m_StackTraceInfo.SaveStackTrace(pData->bAllowAllocMem, + pThread->GetThrowableAsHandle(), + pData->bReplaceStack, + pData->bSkipLastElement); + } + else + { + LOG((LF_EH, LL_INFO1000, "COMPlusThrowCallback: Skipping AppendElement/SaveStackTrace for IL stub MD %p\n", pFunc)); + } + + // Fire an exception thrown ETW event when an exception occurs + ETW::ExceptionLog::ExceptionThrown(pCf, pData->bSkipLastElement, pData->bReplaceStack); + + // Reset the flags. These flags are set only once before each stack walk done by LookForHandler(), and + // they apply only to the first frame we append to the stack trace. Subsequent frames are always appended. + if (pData->bReplaceStack) + { + pData->bReplaceStack = FALSE; + } + if (pData->bSkipLastElement) + { + pData->bSkipLastElement = FALSE; + } + + // Check for any impersonation on the frame and save that for use during EH filter callbacks + OBJECTREF* pRefSecDesc = pCf->GetAddrOfSecurityObject(); + if (pRefSecDesc != NULL && *pRefSecDesc != NULL) + { + FRAMESECDESCREF fsdRef = (FRAMESECDESCREF)*pRefSecDesc; + if (fsdRef->GetCallerToken() != NULL) + { + // Impersonation info present on the Frame + pData->hCallerToken = fsdRef->GetCallerToken(); + STRESS_LOG1(LF_EH, LL_INFO100, "In COMPlusThrowCallback. Found non-NULL callertoken on FSD:%d\n",pData->hCallerToken); + if (!pData->bImpersonationTokenSet) + { + pData->hImpersonationToken = fsdRef->GetImpersonationToken(); + STRESS_LOG1(LF_EH, LL_INFO100, "In COMPlusThrowCallback. Found non-NULL impersonationtoken on FSD:%d\n",pData->hImpersonationToken); + pData->bImpersonationTokenSet = TRUE; + } + } + } + + // now we've got the stack trace, if we aren't allowed to catch this and we're first pass, return + if (pData->bDontCatch) + return SWA_CONTINUE; + + if (!pCf->IsFrameless()) + { + // @todo - remove this once SIS is fully enabled. + extern bool g_EnableSIS; + if (g_EnableSIS) + { + // For debugger, we may want to notify 1st chance exceptions if they're coming out of a stub. + // We recognize stubs as Frames with a M2U transition type. The debugger's stackwalker also + // recognizes these frames and publishes ICorDebugInternalFrames in the stackwalk. It's + // important to use pFrame as the stack address so that the Exception callback matches up + // w/ the ICorDebugInternlFrame stack range. + if (CORDebuggerAttached()) + { + Frame * pFrameStub = pCf->GetFrame(); + Frame::ETransitionType t = pFrameStub->GetTransitionType(); + if (t == Frame::TT_M2U) + { + // Use address of the frame as the stack address. + currentSP = (SIZE_T) ((void*) pFrameStub); + currentIP = 0; // no IP. + EEToDebuggerExceptionInterfaceWrapper::FirstChanceManagedException(pThread, (SIZE_T)currentIP, (SIZE_T)currentSP); +#ifdef FEATURE_EXCEPTION_NOTIFICATIONS + // Deliver the FirstChanceNotification after the debugger, if not already delivered. + if (!pExInfo->DeliveredFirstChanceNotification()) + { + ExceptionNotifications::DeliverFirstChanceNotification(); + } +#endif // FEATURE_EXCEPTION_NOTIFICATIONS + } + } + } + return SWA_CONTINUE; + } + + bool fIsILStub = pFunc->IsILStub(); + bool fGiveDebuggerAndProfilerNotification = !fIsILStub; + BOOL fMethodCanHandleException = TRUE; + + MethodDesc * pUserMDForILStub = NULL; + Frame * pILStubFrame = NULL; + if (fIsILStub) + pUserMDForILStub = GetUserMethodForILStub(pThread, currentSP, pFunc, &pILStubFrame); + +#ifdef FEATURE_CORRUPTING_EXCEPTIONS + CorruptionSeverity currentSeverity = pThread->GetExceptionState()->GetCurrentExceptionTracker()->GetCorruptionSeverity(); + { + // We must defer to the MethodDesc of the user method instead of the IL stub + // itself because the user can specify the policy on a per-method basis and + // that won't be reflected via the IL stub's MethodDesc. + MethodDesc * pMDWithCEAttribute = fIsILStub ? pUserMDForILStub : pFunc; + + // Check if the exception can be delivered to the method? It will check if the exception + // is a CE or not. If it is, it will check if the method can process it or not. + fMethodCanHandleException = CEHelper::CanMethodHandleException(currentSeverity, pMDWithCEAttribute); + } +#endif // FEATURE_CORRUPTING_EXCEPTIONS + + // Let the profiler know that we are searching for a handler within this function instance + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionEnter(pFunc); + + // The following debugger notification and AppDomain::FirstChanceNotification should be scoped together + // since the AD notification *must* follow immediately after the debugger's notification. + { +#ifdef DEBUGGING_SUPPORTED + // + // Go ahead and notify any debugger of this exception. + // + EEToDebuggerExceptionInterfaceWrapper::FirstChanceManagedException(pThread, (SIZE_T)currentIP, (SIZE_T)currentSP); + + if (CORDebuggerAttached() && pExInfo->m_ExceptionFlags.DebuggerInterceptInfo()) + { + return SWA_ABORT; + } +#endif // DEBUGGING_SUPPORTED + +#ifdef FEATURE_EXCEPTION_NOTIFICATIONS + // Attempt to deliver the first chance notification to the AD only *AFTER* the debugger + // has done that, provided we have not already done that. + if (!pExInfo->DeliveredFirstChanceNotification()) + { + ExceptionNotifications::DeliverFirstChanceNotification(); + } +#endif // FEATURE_EXCEPTION_NOTIFICATIONS + } + IJitManager* pJitManager = pCf->GetJitManager(); + _ASSERTE(pJitManager); + EH_CLAUSE_ENUMERATOR pEnumState; + unsigned EHCount = 0; + +#ifdef FEATURE_CORRUPTING_EXCEPTIONS + // If exception cannot be handled, then just bail out. We shouldnt examine the EH clauses + // in such a method. + if (!fMethodCanHandleException) + { + LOG((LF_EH, LL_INFO100, "COMPlusThrowCallback - CEHelper decided not to look for exception handlers in the method(MD:%p).\n", pFunc)); + + // Set the flag to skip this frame since the CE cannot be delivered + _ASSERTE(currentSeverity == ProcessCorrupting); + + // Ensure EHClause count is zero + EHCount = 0; + } + else +#endif // FEATURE_CORRUPTING_EXCEPTIONS + { + EHCount = pJitManager->InitializeEHEnumeration(pCf->GetMethodToken(), &pEnumState); + } + + if (EHCount == 0) + { + // Inform the profiler that we're leaving, and what pass we're on + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionLeave(pFunc); + return SWA_CONTINUE; + } + + TypeHandle thrownType = TypeHandle(); + // if we are being called on an unwind for an exception that we did not try to catch, eg. + // an internal EE exception, then pThread->GetThrowable will be null + { + OBJECTREF throwable = pThread->GetThrowable(); + if (throwable != NULL) + { + throwable = PossiblyUnwrapThrowable(throwable, pCf->GetAssembly()); + thrownType = TypeHandle(throwable->GetTrueMethodTable()); + } + } + + PREGDISPLAY regs = pCf->GetRegisterSet(); + BYTE *pStack = (BYTE *) GetRegdisplaySP(regs); +#ifdef DEBUGGING_SUPPORTED + BYTE *pHandlerEBP = (BYTE *) GetRegdisplayFP(regs); +#endif + + DWORD offs = (DWORD)pCf->GetRelOffset(); //= (BYTE*) (*regs->pPC) - (BYTE*) pCf->GetStartAddress(); + STRESS_LOG1(LF_EH, LL_INFO10000, "COMPlusThrowCallback: offset is %d\n", offs); + + EE_ILEXCEPTION_CLAUSE EHClause; + unsigned start_adjust, end_adjust; + + start_adjust = !(pCf->HasFaulted() || pCf->IsIPadjusted()); + end_adjust = pCf->IsActiveFunc(); + + for(ULONG i=0; i < EHCount; i++) + { + pJitManager->GetNextEHClause(&pEnumState, &EHClause); + _ASSERTE(IsValidClause(&EHClause)); + + STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusThrowCallback: considering '%s' clause [%d,%d], ofs:%d\n", + (IsFault(&EHClause) ? "fault" : ( + IsFinally(&EHClause) ? "finally" : ( + IsFilterHandler(&EHClause) ? "filter" : ( + IsTypedHandler(&EHClause) ? "typed" : "unknown")))), + EHClause.TryStartPC, + EHClause.TryEndPC, + offs + ); + + // Checking the exception range is a bit tricky because + // on CPU faults (null pointer access, div 0, ..., the IP points + // to the faulting instruction, but on calls, the IP points + // to the next instruction. + // This means that we should not include the start point on calls + // as this would be a call just preceding the try block. + // Also, we should include the end point on calls, but not faults. + + // If we're in the FILTER part of a filter clause, then we + // want to stop crawling. It's going to be caught in a + // EX_CATCH just above us. If not, the exception + if ( IsFilterHandler(&EHClause) + && ( offs > EHClause.FilterOffset + || offs == EHClause.FilterOffset && !start_adjust) + && ( offs < EHClause.HandlerStartPC + || offs == EHClause.HandlerStartPC && !end_adjust)) { + + STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusThrowCallback: Fault inside filter [%d,%d] startAdj %d endAdj %d\n", + EHClause.FilterOffset, EHClause.HandlerStartPC, start_adjust, end_adjust); + + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionLeave(pFunc); + return SWA_ABORT; + } + + if ( (offs < EHClause.TryStartPC) || + (offs > EHClause.TryEndPC) || + (offs == EHClause.TryStartPC && start_adjust) || + (offs == EHClause.TryEndPC && end_adjust)) + continue; + + BOOL typeMatch = FALSE; + BOOL isTypedHandler = IsTypedHandler(&EHClause); + + if (isTypedHandler && !thrownType.IsNull()) + { + if (EHClause.TypeHandle == (void*)(size_t)mdTypeRefNil) + { + // this is a catch(...) + typeMatch = TRUE; + } + else + { + TypeHandle exnType = pJitManager->ResolveEHClause(&EHClause,pCf); + + // if doesn't have cached class then class wasn't loaded so couldn't have been thrown + typeMatch = !exnType.IsNull() && ExceptionIsOfRightType(exnType, thrownType); + } + } + + // @PERF: Is this too expensive? Consider storing the nesting level + // instead of the HandlerEndPC. + + // Determine the nesting level of EHClause. Just walk the table + // again, and find out how many handlers enclose it + DWORD nestingLevel = 0; + + if (IsFaultOrFinally(&EHClause)) + continue; + if (isTypedHandler) + { + LOG((LF_EH, LL_INFO100, "COMPlusThrowCallback: %s match for typed handler.\n", typeMatch?"Found":"Did not find")); + if (!typeMatch) + { + continue; + } + } + else + { + // Must be an exception filter (__except() part of __try{}__except(){}). + nestingLevel = ComputeEnclosingHandlerNestingLevel(pJitManager, + pCf->GetMethodToken(), + EHClause.HandlerStartPC); + + // We just need *any* address within the method. This will let the debugger + // resolve the EnC version of the method. + PCODE pMethodAddr = GetControlPC(regs); + if (fGiveDebuggerAndProfilerNotification) + EEToDebuggerExceptionInterfaceWrapper::ExceptionFilter(pFunc, pMethodAddr, EHClause.FilterOffset, pHandlerEBP); + + UINT_PTR uStartAddress = (UINT_PTR)pCf->GetCodeInfo()->GetStartAddress(); + + // save clause information in the exinfo + pExInfo->m_EHClauseInfo.SetInfo(COR_PRF_CLAUSE_FILTER, + uStartAddress + EHClause.FilterOffset, + StackFrame((UINT_PTR)pHandlerEBP)); + + // Let the profiler know we are entering a filter + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFilterEnter(pFunc); + + COUNTER_ONLY(GetPerfCounters().m_Excep.cFiltersExecuted++); + + STRESS_LOG3(LF_EH, LL_INFO10, "COMPlusThrowCallback: calling filter code, EHClausePtr:%08x, Start:%08x, End:%08x\n", + &EHClause, EHClause.HandlerStartPC, EHClause.HandlerEndPC); + + OBJECTREF throwable = PossiblyUnwrapThrowable(pThread->GetThrowable(), pCf->GetAssembly()); + + pExInfo->m_EHClauseInfo.SetManagedCodeEntered(TRUE); + + int iFilt = COMPlusThrowCallbackHelper(pJitManager, + pCf, + pData, + &EHClause, + nestingLevel, + throwable, + pThread); + + pExInfo->m_EHClauseInfo.SetManagedCodeEntered(FALSE); + + // Let the profiler know we are leaving a filter + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFilterLeave(); + + pExInfo->m_EHClauseInfo.ResetInfo(); + + if (pThread->IsRudeAbortInitiated() && !pThread->IsWithinCer(pCf)) + { + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionLeave(pFunc); + return SWA_CONTINUE; + } + + // If this filter didn't want the exception, keep looking. + if (EXCEPTION_EXECUTE_HANDLER != iFilt) + continue; + } + + // Record this location, to stop the unwind phase, later. + pData->pFunc = pFunc; + pData->dHandler = i; + pData->pStack = pStack; + + // Notify the profiler that a catcher has been found + if (fGiveDebuggerAndProfilerNotification) + { + EEToProfilerExceptionInterfaceWrapper::ExceptionSearchCatcherFound(pFunc); + EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionLeave(pFunc); + } + +#ifdef DEBUGGING_SUPPORTED + // + // Notify debugger that a catcher has been found. + // + if (fIsILStub) + { + EEToDebuggerExceptionInterfaceWrapper::NotifyOfCHFFilter(pExInfo->m_pExceptionPointers, pILStubFrame); + } + else + if (fGiveDebuggerAndProfilerNotification && + CORDebuggerAttached() && !pExInfo->m_ExceptionFlags.DebuggerInterceptInfo()) + { + _ASSERTE(pData); + // We just need *any* address within the method. This will let the debugger + // resolve the EnC version of the method. + PCODE pMethodAddr = GetControlPC(regs); + + EEToDebuggerExceptionInterfaceWrapper::FirstChanceManagedExceptionCatcherFound(pThread, + pData->pFunc, pMethodAddr, + (SIZE_T)pData->pStack, + &EHClause); + } +#endif // DEBUGGING_SUPPORTED + + return SWA_ABORT; + } + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionLeave(pFunc); + return SWA_CONTINUE; +} // StackWalkAction COMPlusThrowCallback() + + +//========================================================================== +// COMPlusUnwindCallback +//========================================================================== + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning (disable : 4740) // There is inline asm code in this function, which disables + // global optimizations. +#pragma warning (disable : 4731) +#endif +StackWalkAction COMPlusUnwindCallback (CrawlFrame *pCf, ThrowCallbackType *pData) +{ + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_NOTRIGGER; + STATIC_CONTRACT_MODE_COOPERATIVE; + + _ASSERTE(pData->bIsUnwind); + + Frame *pFrame = pCf->GetFrame(); + MethodDesc *pFunc = pCf->GetFunction(); + + #if defined(_DEBUG) + #define METHODNAME(pFunc) (pFunc?pFunc->m_pszDebugMethodName:"") + #else + #define METHODNAME(pFunc) "" + #endif + STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusUnwindCallback: STACKCRAWL method:%pM ('%s'), Frame:%p, FrameVtable = %pV\n", + pFunc, METHODNAME(pFunc), pFrame, pCf->IsFrameless()?0:(*(void**)pFrame)); + #undef METHODNAME + + if (pFrame && pData->pTopFrame == pFrame) + /* Don't look past limiting frame if there is one */ + return SWA_ABORT; + + if (!pFunc) + return SWA_CONTINUE; + + if (!pCf->IsFrameless()) + return SWA_CONTINUE; + + Thread *pThread = GetThread(); + + // If the thread is being RudeAbort, we will not run any finally + if (pThread->IsRudeAbortInitiated() && !pThread->IsWithinCer(pCf)) + { + return SWA_CONTINUE; + } + + IJitManager* pJitManager = pCf->GetJitManager(); + _ASSERTE(pJitManager); + + ExInfo *pExInfo = &(pThread->GetExceptionState()->m_currentExInfo); + + PREGDISPLAY regs = pCf->GetRegisterSet(); + BYTE *pStack = (BYTE *) GetRegdisplaySP(regs); + + TypeHandle thrownType = TypeHandle(); + + BOOL fCanMethodHandleException = TRUE; +#ifdef FEATURE_CORRUPTING_EXCEPTIONS + // MethodDesc's security information (i.e. whether it is critical or transparent) is calculated lazily. + // If this method's security information was not precalculated, then it would have been in the first pass + // already using Security::IsMethodCritical which could take have taken us down a path which is GC_TRIGGERS. + // + // + // However, this unwind callback (for X86) is GC_NOTRIGGER and at this point the security information would have been + // calculated already. Hence, we wouldnt endup in the GC_TRIGGERS path. Thus, to keep SCAN.EXE (static contract analyzer) happy, + // we will pass a FALSE to the CanMethodHandleException call, indicating we dont need to calculate security information (and thus, + // not go down the GC_TRIGGERS path. + // + // Check if the exception can be delivered to the method? It will check if the exception + // is a CE or not. If it is, it will check if the method can process it or not. + CorruptionSeverity currentSeverity = pThread->GetExceptionState()->GetCurrentExceptionTracker()->GetCorruptionSeverity(); + + // We have to do this check for x86 since, unlike 64bit which will setup a new exception tracker for longjmp, + // x86 only sets up new trackers in the first pass (and longjmp is 2nd pass only exception). Hence, we pass + // this information in the callback structure without affecting any existing exception tracker (incase longjmp was + // a nested exception). + if (pData->m_fIsLongJump) + { + // Longjump is not a CSE. With a CSE in progress, this can be invoked by either: + // + // 1) Managed code (e.g. finally/fault/catch), OR + // 2) By native code + // + // In scenario (1), managed code can invoke it only if it was attributed with HPCSE attribute. Thus, + // longjmp is no different than managed code doing a "throw new Exception();". + // + // In scenario (2), longjmp is no different than any other non-CSE native exception raised. + // + // In both these case, longjmp should be treated as non-CSE. Since x86 does not setup a tracker for + // it (see comment above), we pass this information (of whether the current exception is a longjmp or not) + // to this callback (from UnwindFrames) to setup the correct corruption severity. + // + // http://www.nynaeve.net/?p=105 has a brief description of how exception-safe setjmp/longjmp works. + currentSeverity = NotCorrupting; + } + { + MethodDesc * pFuncWithCEAttribute = pFunc; + Frame * pILStubFrame = NULL; + if (pFunc->IsILStub()) + { + // We must defer to the MethodDesc of the user method instead of the IL stub + // itself because the user can specify the policy on a per-method basis and + // that won't be reflected via the IL stub's MethodDesc. + pFuncWithCEAttribute = GetUserMethodForILStub(pThread, (UINT_PTR)pStack, pFunc, &pILStubFrame); + } + fCanMethodHandleException = CEHelper::CanMethodHandleException(currentSeverity, pFuncWithCEAttribute, FALSE); + } +#endif // FEATURE_CORRUPTING_EXCEPTIONS + +#ifdef DEBUGGING_SUPPORTED + LOG((LF_EH, LL_INFO1000, "COMPlusUnwindCallback: Intercept %d, pData->pFunc 0x%X, pFunc 0x%X, pData->pStack 0x%X, pStack 0x%X\n", + pExInfo->m_ExceptionFlags.DebuggerInterceptInfo(), + pData->pFunc, + pFunc, + pData->pStack, + pStack)); + + // + // If the debugger wants to intercept this exception here, go do that. + // + if (pExInfo->m_ExceptionFlags.DebuggerInterceptInfo() && (pData->pFunc == pFunc) && (pData->pStack == pStack)) + { + goto LDoDebuggerIntercept; + } +#endif + + bool fGiveDebuggerAndProfilerNotification; + fGiveDebuggerAndProfilerNotification = !pFunc->IsILStub(); + + // Notify the profiler of the function we're dealing with in the unwind phase + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFunctionEnter(pFunc); + + EH_CLAUSE_ENUMERATOR pEnumState; + unsigned EHCount; + +#ifdef FEATURE_CORRUPTING_EXCEPTIONS + if (!fCanMethodHandleException) + { + LOG((LF_EH, LL_INFO100, "COMPlusUnwindCallback - CEHelper decided not to look for exception handlers in the method(MD:%p).\n", pFunc)); + + // Set the flag to skip this frame since the CE cannot be delivered + _ASSERTE(currentSeverity == ProcessCorrupting); + + // Force EHClause count to be zero + EHCount = 0; + } + else +#endif // FEATURE_CORRUPTING_EXCEPTIONS + { + EHCount = pJitManager->InitializeEHEnumeration(pCf->GetMethodToken(), &pEnumState); + } + + if (EHCount == 0) + { + // Inform the profiler that we're leaving, and what pass we're on + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFunctionLeave(pFunc); + + return SWA_CONTINUE; + } + + // if we are being called on an unwind for an exception that we did not try to catch, eg. + // an internal EE exception, then pThread->GetThrowable will be null + { + OBJECTREF throwable = pThread->GetThrowable(); + if (throwable != NULL) + { + throwable = PossiblyUnwrapThrowable(throwable, pCf->GetAssembly()); + thrownType = TypeHandle(throwable->GetTrueMethodTable()); + } + } +#ifdef DEBUGGING_SUPPORTED + BYTE *pHandlerEBP; + pHandlerEBP = (BYTE *) GetRegdisplayFP(regs); +#endif + + DWORD offs; + offs = (DWORD)pCf->GetRelOffset(); //= (BYTE*) (*regs->pPC) - (BYTE*) pCf->GetStartAddress(); + + LOG((LF_EH, LL_INFO100, "COMPlusUnwindCallback: current EIP offset in method 0x%x, \n", offs)); + + EE_ILEXCEPTION_CLAUSE EHClause; + unsigned start_adjust, end_adjust; + + start_adjust = !(pCf->HasFaulted() || pCf->IsIPadjusted()); + end_adjust = pCf->IsActiveFunc(); + + for(ULONG i=0; i < EHCount; i++) + { + pJitManager->GetNextEHClause(&pEnumState, &EHClause); + _ASSERTE(IsValidClause(&EHClause)); + + STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusUnwindCallback: considering '%s' clause [%d,%d], offs:%d\n", + (IsFault(&EHClause) ? "fault" : ( + IsFinally(&EHClause) ? "finally" : ( + IsFilterHandler(&EHClause) ? "filter" : ( + IsTypedHandler(&EHClause) ? "typed" : "unknown")))), + EHClause.TryStartPC, + EHClause.TryEndPC, + offs + ); + + // Checking the exception range is a bit tricky because + // on CPU faults (null pointer access, div 0, ..., the IP points + // to the faulting instruction, but on calls, the IP points + // to the next instruction. + // This means that we should not include the start point on calls + // as this would be a call just preceding the try block. + // Also, we should include the end point on calls, but not faults. + + if ( IsFilterHandler(&EHClause) + && ( offs > EHClause.FilterOffset + || offs == EHClause.FilterOffset && !start_adjust) + && ( offs < EHClause.HandlerStartPC + || offs == EHClause.HandlerStartPC && !end_adjust) + ) { + STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusUnwindCallback: Fault inside filter [%d,%d] startAdj %d endAdj %d\n", + EHClause.FilterOffset, EHClause.HandlerStartPC, start_adjust, end_adjust); + + // Make the filter as done. See comment in CallJitEHFilter + // on why we have to do it here. + Frame* pFilterFrame = pThread->GetFrame(); + _ASSERTE(pFilterFrame->GetVTablePtr() == ExceptionFilterFrame::GetMethodFrameVPtr()); + ((ExceptionFilterFrame*)pFilterFrame)->SetFilterDone(); + + // Inform the profiler that we're leaving, and what pass we're on + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFunctionLeave(pFunc); + + return SWA_ABORT; + } + + if ( (offs < EHClause.TryStartPC) || + (offs > EHClause.TryEndPC) || + (offs == EHClause.TryStartPC && start_adjust) || + (offs == EHClause.TryEndPC && end_adjust)) + continue; + + // @PERF : Is this too expensive? Consider storing the nesting level + // instead of the HandlerEndPC. + + // Determine the nesting level of EHClause. Just walk the table + // again, and find out how many handlers enclose it + + DWORD nestingLevel = ComputeEnclosingHandlerNestingLevel(pJitManager, + pCf->GetMethodToken(), + EHClause.HandlerStartPC); + + // We just need *any* address within the method. This will let the debugger + // resolve the EnC version of the method. + PCODE pMethodAddr = GetControlPC(regs); + + UINT_PTR uStartAddress = (UINT_PTR)pCf->GetCodeInfo()->GetStartAddress(); + + if (IsFaultOrFinally(&EHClause)) + { + COUNTER_ONLY(GetPerfCounters().m_Excep.cFinallysExecuted++); + + if (fGiveDebuggerAndProfilerNotification) + EEToDebuggerExceptionInterfaceWrapper::ExceptionHandle(pFunc, pMethodAddr, EHClause.HandlerStartPC, pHandlerEBP); + + pExInfo->m_EHClauseInfo.SetInfo(COR_PRF_CLAUSE_FINALLY, + uStartAddress + EHClause.HandlerStartPC, + StackFrame((UINT_PTR)pHandlerEBP)); + + // Notify the profiler that we are about to execute the finally code + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFinallyEnter(pFunc); + + LOG((LF_EH, LL_INFO100, "COMPlusUnwindCallback: finally clause [%d,%d] - call\n", EHClause.TryStartPC, EHClause.TryEndPC)); + + pExInfo->m_EHClauseInfo.SetManagedCodeEntered(TRUE); + + ::CallJitEHFinally(pCf, (BYTE *)uStartAddress, &EHClause, nestingLevel); + + pExInfo->m_EHClauseInfo.SetManagedCodeEntered(FALSE); + + LOG((LF_EH, LL_INFO100, "COMPlusUnwindCallback: finally - returned\n")); + + // Notify the profiler that we are done with the finally code + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFinallyLeave(); + + pExInfo->m_EHClauseInfo.ResetInfo(); + + continue; + } + + // Current is not a finally, check if it's the catching handler (or filter). + if (pData->pFunc != pFunc || (ULONG)(pData->dHandler) != i || pData->pStack != pStack) + { + continue; + } + +#ifdef _DEBUG + gLastResumedExceptionFunc = pCf->GetFunction(); + gLastResumedExceptionHandler = i; +#endif + + // save clause information in the exinfo + pExInfo->m_EHClauseInfo.SetInfo(COR_PRF_CLAUSE_CATCH, + uStartAddress + EHClause.HandlerStartPC, + StackFrame((UINT_PTR)pHandlerEBP)); + + // Notify the profiler that we are about to resume at the catcher. + if (fGiveDebuggerAndProfilerNotification) + { + DACNotify::DoExceptionCatcherEnterNotification(pFunc, EHClause.HandlerStartPC); + + EEToProfilerExceptionInterfaceWrapper::ExceptionCatcherEnter(pThread, pFunc); + + EEToDebuggerExceptionInterfaceWrapper::ExceptionHandle(pFunc, pMethodAddr, EHClause.HandlerStartPC, pHandlerEBP); + } + + STRESS_LOG4(LF_EH, LL_INFO100, "COMPlusUnwindCallback: offset 0x%x matches clause [0x%x, 0x%x) matches in method %pM\n", + offs, EHClause.TryStartPC, EHClause.TryEndPC, pFunc); + + // ResumeAtJitEH will set pExInfo->m_EHClauseInfo.m_fManagedCodeEntered = TRUE; at the appropriate time + ::ResumeAtJitEH(pCf, (BYTE *)uStartAddress, &EHClause, nestingLevel, pThread, pData->bUnwindStack); + //UNREACHABLE_MSG("ResumeAtJitEH shouldn't have returned!"); + + // we do not set pExInfo->m_EHClauseInfo.m_fManagedCodeEntered = FALSE here, + // that happens when the catch clause calls back to COMPlusEndCatch + + } + + STRESS_LOG1(LF_EH, LL_INFO100, "COMPlusUnwindCallback: no handler found in method %pM\n", pFunc); + if (fGiveDebuggerAndProfilerNotification) + EEToProfilerExceptionInterfaceWrapper::ExceptionUnwindFunctionLeave(pFunc); + + return SWA_CONTINUE; + + +#ifdef DEBUGGING_SUPPORTED +LDoDebuggerIntercept: + + STRESS_LOG1(LF_EH|LF_CORDB, LL_INFO100, "COMPlusUnwindCallback: Intercepting in method %pM\n", pFunc); + + // + // Setup up the easy parts of the context to restart at. + // + EHContext context; + + // + // Note: EAX ECX EDX are scratch + // + context.Esp = (DWORD)(size_t)(GetRegdisplaySP(regs)); + context.Ebx = *regs->pEbx; + context.Esi = *regs->pEsi; + context.Edi = *regs->pEdi; + context.Ebp = *regs->pEbp; + + // + // Set scratch registers to 0 to avoid reporting incorrect values to GC in case of debugger changing the IP + // in the middle of a scratch register lifetime (see Dev10 754922) + // + context.Eax = 0; + context.Ecx = 0; + context.Edx = 0; + + // + // Ok, now set the target Eip to the address the debugger requested. + // + ULONG_PTR nativeOffset; + pExInfo->m_DebuggerExState.GetDebuggerInterceptInfo(NULL, NULL, NULL, NULL, &nativeOffset, NULL); + context.Eip = GetControlPC(regs) - (pCf->GetRelOffset() - nativeOffset); + + // + // Finally we need to get the correct Esp for this nested level + // + + context.Esp = pCf->GetCodeManager()->GetAmbientSP(regs, + pCf->GetCodeInfo(), + nativeOffset, + pData->dHandler, + pCf->GetCodeManState() + ); + // + // In case we see unknown FS:[0] handlers we delay the interception point until we reach the handler that protects the interception point. + // This way we have both FS:[0] handlers being poped up by RtlUnwind and managed capital F Frames being unwinded by managed stackwalker. + // + BOOL fCheckForUnknownHandler = TRUE; + if (PopNestedExceptionRecords((LPVOID)(size_t)context.Esp, fCheckForUnknownHandler)) + { + // Let ClrDebuggerDoUnwindAndIntercept RtlUnwind continue to unwind frames until we reach the handler protected by COMPlusNestedExceptionHandler. + pExInfo->m_InterceptionContext = context; + pExInfo->m_ValidInterceptionContext = TRUE; + STRESS_LOG0(LF_EH|LF_CORDB, LL_INFO100, "COMPlusUnwindCallback: Skip interception until unwinding reaches the actual handler protected by COMPlusNestedExceptionHandler\n"); + } + else + { + // + // Pop off all the Exception information up to this point in the stack + // + UnwindExceptionTrackerAndResumeInInterceptionFrame(pExInfo, &context); + } + return SWA_ABORT; +#endif // DEBUGGING_SUPPORTED +} // StackWalkAction COMPlusUnwindCallback () +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning (disable : 4740) // There is inline asm code in this function, which disables + // global optimizations. +#pragma warning (disable : 4731) +#endif +void ResumeAtJitEH(CrawlFrame* pCf, + BYTE* startPC, + EE_ILEXCEPTION_CLAUSE *EHClausePtr, + DWORD nestingLevel, + Thread *pThread, + BOOL unwindStack) +{ + // No dynamic contract here because this function doesn't return and destructors wouldn't be executed + WRAPPER_NO_CONTRACT; + + EHContext context; + + context.Setup(PCODE(startPC + EHClausePtr->HandlerStartPC), pCf->GetRegisterSet()); + + size_t * pShadowSP = NULL; // Write Esp to *pShadowSP before jumping to handler + size_t * pHandlerEnd = NULL; + + OBJECTREF throwable = PossiblyUnwrapThrowable(pThread->GetThrowable(), pCf->GetAssembly()); + + pCf->GetCodeManager()->FixContext(ICodeManager::CATCH_CONTEXT, + &context, + pCf->GetCodeInfo(), + EHClausePtr->HandlerStartPC, + nestingLevel, + throwable, + pCf->GetCodeManState(), + &pShadowSP, + &pHandlerEnd); + + if (pHandlerEnd) + { + *pHandlerEnd = EHClausePtr->HandlerEndPC; + } + + // save esp so that endcatch can restore it (it always restores, so want correct value) + ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo); + pExInfo->m_dEsp = (LPVOID)context.GetSP(); + LOG((LF_EH, LL_INFO1000, "ResumeAtJitEH: current m_dEsp set to %p\n", context.GetSP())); + + PVOID dEsp = GetCurrentSP(); + + if (!unwindStack) + { + // If we don't want to unwind the stack, then the guard page had better not be gone! + _ASSERTE(pThread->DetermineIfGuardPagePresent()); + + // so down below won't really update esp + context.SetSP(dEsp); + pExInfo->m_pShadowSP = pShadowSP; // so that endcatch can zero it back + + if (pShadowSP) + { + *pShadowSP = (size_t)dEsp; + } + } + else + { + // so shadow SP has the real SP as we are going to unwind the stack + dEsp = (LPVOID)context.GetSP(); + + // BEGIN: pExInfo->UnwindExInfo(dEsp); + ExInfo *pPrevNestedInfo = pExInfo->m_pPrevNestedInfo; + + while (pPrevNestedInfo && pPrevNestedInfo->m_StackAddress < dEsp) + { + LOG((LF_EH, LL_INFO1000, "ResumeAtJitEH: popping nested ExInfo at 0x%p\n", pPrevNestedInfo->m_StackAddress)); + + pPrevNestedInfo->DestroyExceptionHandle(); + pPrevNestedInfo->m_StackTraceInfo.FreeStackTrace(); + +#ifdef DEBUGGING_SUPPORTED + if (g_pDebugInterface != NULL) + { + g_pDebugInterface->DeleteInterceptContext(pPrevNestedInfo->m_DebuggerExState.GetDebuggerInterceptContext()); + } +#endif // DEBUGGING_SUPPORTED + + pPrevNestedInfo = pPrevNestedInfo->m_pPrevNestedInfo; + } + + pExInfo->m_pPrevNestedInfo = pPrevNestedInfo; + + _ASSERTE(pExInfo->m_pPrevNestedInfo == 0 || pExInfo->m_pPrevNestedInfo->m_StackAddress >= dEsp); + + // Before we unwind the SEH records, get the Frame from the top-most nested exception record. + Frame* pNestedFrame = GetCurrFrame(FindNestedEstablisherFrame(GetCurrentSEHRecord())); + + PopNestedExceptionRecords((LPVOID)(size_t)dEsp); + + EXCEPTION_REGISTRATION_RECORD* pNewBottomMostHandler = GetCurrentSEHRecord(); + + pExInfo->m_pShadowSP = pShadowSP; + + // The context and exception record are no longer any good. + _ASSERTE(pExInfo->m_pContext < dEsp); // It must be off the top of the stack. + pExInfo->m_pContext = 0; // Whack it. + pExInfo->m_pExceptionRecord = 0; + pExInfo->m_pExceptionPointers = 0; + + // We're going to put one nested record back on the stack before we resume. This is + // where it goes. + NestedHandlerExRecord *pNestedHandlerExRecord = (NestedHandlerExRecord*)((BYTE*)dEsp - ALIGN_UP(sizeof(NestedHandlerExRecord), STACK_ALIGN_SIZE)); + + // The point of no return. The next statement starts scribbling on the stack. It's + // deep enough that we won't hit our own locals. (That's important, 'cuz we're still + // using them.) + // + _ASSERTE(dEsp > &pCf); + pNestedHandlerExRecord->m_handlerInfo.m_hThrowable=NULL; // This is random memory. Handle + // must be initialized to null before + // calling Init(), as Init() will try + // to free any old handle. + pNestedHandlerExRecord->Init((PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler, pNestedFrame); + + INSTALL_EXCEPTION_HANDLING_RECORD(&(pNestedHandlerExRecord->m_ExReg)); + + context.SetSP(pNestedHandlerExRecord); + + // We might have moved the bottommost handler. The nested record itself is never + // the bottom most handler -- it's pushed afte the fact. So we have to make the + // bottom-most handler the one BEFORE the nested record. + if (pExInfo->m_pBottomMostHandler < pNewBottomMostHandler) + { + STRESS_LOG3(LF_EH, LL_INFO10000, "ResumeAtJitEH: setting ExInfo:0x%p m_pBottomMostHandler from 0x%p to 0x%p\n", + pExInfo, pExInfo->m_pBottomMostHandler, pNewBottomMostHandler); + pExInfo->m_pBottomMostHandler = pNewBottomMostHandler; + } + + if (pShadowSP) + { + *pShadowSP = context.GetSP(); + } + } + + STRESS_LOG3(LF_EH, LL_INFO100, "ResumeAtJitEH: resuming at EIP = %p ESP = %p EBP = %p\n", + context.Eip, context.GetSP(), context.GetFP()); + +#ifdef STACK_GUARDS_DEBUG + // We are transitioning back to managed code, so ensure that we are in + // SO-tolerant mode before we do so. + RestoreSOToleranceState(); +#endif + + // we want this to happen as late as possible but certainly after the notification + // that the handle for the current ExInfo has been freed has been delivered + pExInfo->m_EHClauseInfo.SetManagedCodeEntered(TRUE); + + ETW::ExceptionLog::ExceptionCatchBegin(pCf->GetCodeInfo()->GetMethodDesc(), (PVOID)pCf->GetCodeInfo()->GetStartAddress()); + + ResumeAtJitEHHelper(&context); + UNREACHABLE_MSG("Should never return from ResumeAtJitEHHelper!"); + + // we do not set pExInfo->m_EHClauseInfo.m_fManagedCodeEntered = FALSE here, + // that happens when the catch clause calls back to COMPlusEndCatch + // we don't return to this point so it would be moot (see unreachable_msg above) + +} +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +// Must be in a separate function because INSTALL_COMPLUS_EXCEPTION_HANDLER has a filter +int CallJitEHFilterWorker(size_t *pShadowSP, EHContext *pContext) +{ + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + STATIC_CONTRACT_MODE_COOPERATIVE; + STATIC_CONTRACT_SO_INTOLERANT; + + int retVal = EXCEPTION_CONTINUE_SEARCH; + + BEGIN_CALL_TO_MANAGED(); + + retVal = CallJitEHFilterHelper(pShadowSP, pContext); + + END_CALL_TO_MANAGED(); + + return retVal; +} + +int CallJitEHFilter(CrawlFrame* pCf, BYTE* startPC, EE_ILEXCEPTION_CLAUSE *EHClausePtr, DWORD nestingLevel, OBJECTREF thrownObj) +{ + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + STATIC_CONTRACT_MODE_COOPERATIVE; + + int retVal = EXCEPTION_CONTINUE_SEARCH; + size_t * pShadowSP = NULL; + EHContext context; + + context.Setup(PCODE(startPC + EHClausePtr->FilterOffset), pCf->GetRegisterSet()); + + size_t * pEndFilter = NULL; // Write + pCf->GetCodeManager()->FixContext(ICodeManager::FILTER_CONTEXT, &context, pCf->GetCodeInfo(), + EHClausePtr->FilterOffset, nestingLevel, thrownObj, pCf->GetCodeManState(), + &pShadowSP, &pEndFilter); + + // End of the filter is the same as start of handler + if (pEndFilter) + { + *pEndFilter = EHClausePtr->HandlerStartPC; + } + + // ExceptionFilterFrame serves two purposes: + // + // 1. It serves as a frame that stops the managed search for handler + // if we fault in the filter. ThrowCallbackType.pTopFrame is going point + // to this frame during search for exception handler inside filter. + // The search for handler needs a frame to stop. If we had no frame here, + // the exceptions in filters would not be swallowed correctly since we would + // walk past the EX_TRY/EX_CATCH block in COMPlusThrowCallbackHelper. + // + // 2. It allows setting of SHADOW_SP_FILTER_DONE flag in UnwindFrames() + // if we fault in the filter. We have to set this flag together with unwinding + // of the filter frame. Using a regular C++ holder to clear this flag here would cause + // GC holes. The stack would be in inconsistent state when we trigger gc just before + // returning from UnwindFrames. + + FrameWithCookie exceptionFilterFrame(pShadowSP); + + ETW::ExceptionLog::ExceptionFilterBegin(pCf->GetCodeInfo()->GetMethodDesc(), (PVOID)pCf->GetCodeInfo()->GetStartAddress()); + + retVal = CallJitEHFilterWorker(pShadowSP, &context); + + ETW::ExceptionLog::ExceptionFilterEnd(); + + exceptionFilterFrame.Pop(); + + return retVal; +} + +void CallJitEHFinally(CrawlFrame* pCf, BYTE* startPC, EE_ILEXCEPTION_CLAUSE *EHClausePtr, DWORD nestingLevel) +{ + WRAPPER_NO_CONTRACT; + + EHContext context; + context.Setup(PCODE(startPC + EHClausePtr->HandlerStartPC), pCf->GetRegisterSet()); + + size_t * pShadowSP = NULL; // Write Esp to *pShadowSP before jumping to handler + + size_t * pFinallyEnd = NULL; + pCf->GetCodeManager()->FixContext( + ICodeManager::FINALLY_CONTEXT, &context, pCf->GetCodeInfo(), + EHClausePtr->HandlerStartPC, nestingLevel, ObjectToOBJECTREF((Object *) NULL), pCf->GetCodeManState(), + &pShadowSP, &pFinallyEnd); + + if (pFinallyEnd) + { + *pFinallyEnd = EHClausePtr->HandlerEndPC; + } + + ETW::ExceptionLog::ExceptionFinallyBegin(pCf->GetCodeInfo()->GetMethodDesc(), (PVOID)pCf->GetCodeInfo()->GetStartAddress()); + + CallJitEHFinallyHelper(pShadowSP, &context); + + ETW::ExceptionLog::ExceptionFinallyEnd(); + + // + // Update the registers using new context + // + // This is necessary to reflect GC pointer changes during the middle of a unwind inside a + // finally clause, because: + // 1. GC won't see the part of stack inside try (which has thrown an exception) that is already + // unwinded and thus GC won't update GC pointers for this portion of the stack, but rather the + // call stack in finally. + // 2. upon return of finally, the unwind process continues and unwinds stack based on the part + // of stack inside try and won't see the updated values in finally. + // As a result, we need to manually update the context using register values upon return of finally + // + // Note that we only update the registers for finally clause because + // 1. For filter handlers, stack walker is able to see the whole stack (including the try part) + // with the help of ExceptionFilterFrame as filter handlers are called in first pass + // 2. For catch handlers, the current unwinding is already finished + // + context.UpdateFrame(pCf->GetRegisterSet()); + + // This does not need to be guarded by a holder because the frame is dead if an exception gets thrown. Filters are different + // since they are run in the first pass, so we must update the shadowSP reset in CallJitEHFilter. + if (pShadowSP) { + *pShadowSP = 0; // reset the shadowSP to 0 + } +} +#if defined(_MSC_VER) +#pragma warning (default : 4731) +#endif + +//===================================================================== +// ********************************************************************* +BOOL ComPlusFrameSEH(EXCEPTION_REGISTRATION_RECORD* pEHR) +{ + LIMITED_METHOD_CONTRACT; + + return ((LPVOID)pEHR->Handler == (LPVOID)COMPlusFrameHandler || (LPVOID)pEHR->Handler == (LPVOID)COMPlusNestedExceptionHandler); +} + + +// +//------------------------------------------------------------------------- +// This is installed when we call COMPlusFrameHandler to provide a bound to +// determine when are within a nested exception +//------------------------------------------------------------------------- +EXCEPTION_HANDLER_IMPL(COMPlusNestedExceptionHandler) +{ + WRAPPER_NO_CONTRACT; + + if (pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND)) + { + LOG((LF_EH, LL_INFO100, " COMPlusNestedHandler(unwind) with %x at %x\n", pExceptionRecord->ExceptionCode, + pContext ? GetIP(pContext) : 0)); + + + // We're unwinding past a nested exception record, which means that we've thrown + // a new exception out of a region in which we're handling a previous one. The + // previous exception is overridden -- and needs to be unwound. + + // The preceding is ALMOST true. There is one more case, where we use setjmp/longjmp + // from withing a nested handler. We won't have a nested exception in that case -- just + // the unwind. + + Thread* pThread = GetThread(); + _ASSERTE(pThread); + ExInfo* pExInfo = &(pThread->GetExceptionState()->m_currentExInfo); + ExInfo* pPrevNestedInfo = pExInfo->m_pPrevNestedInfo; + + if (pPrevNestedInfo == &((NestedHandlerExRecord*)pEstablisherFrame)->m_handlerInfo) + { + _ASSERTE(pPrevNestedInfo); + + LOG((LF_EH, LL_INFO100, "COMPlusNestedExceptionHandler: PopExInfo(): popping nested ExInfo at 0x%p\n", pPrevNestedInfo)); + + pPrevNestedInfo->DestroyExceptionHandle(); + pPrevNestedInfo->m_StackTraceInfo.FreeStackTrace(); + +#ifdef DEBUGGING_SUPPORTED + if (g_pDebugInterface != NULL) + { + g_pDebugInterface->DeleteInterceptContext(pPrevNestedInfo->m_DebuggerExState.GetDebuggerInterceptContext()); + } +#endif // DEBUGGING_SUPPORTED + + pExInfo->m_pPrevNestedInfo = pPrevNestedInfo->m_pPrevNestedInfo; + + } else { + // The whacky setjmp/longjmp case. Nothing to do. + } + + } else { + LOG((LF_EH, LL_INFO100, " InCOMPlusNestedHandler with %x at %x\n", pExceptionRecord->ExceptionCode, + pContext ? GetIP(pContext) : 0)); + } + + + // There is a nasty "gotcha" in the way exception unwinding, finally's, and nested exceptions + // interact. Here's the scenario ... it involves two exceptions, one normal one, and one + // raised in a finally. + // + // The first exception occurs, and is caught by some handler way up the stack. That handler + // calls RtlUnwind -- and handlers that didn't catch this first exception are called again, with + // the UNWIND flag set. If, one of the handlers throws an exception during + // unwind (like, a throw from a finally) -- then that same handler is not called during + // the unwind pass of the second exception. [ASIDE: It is called on first-pass.] + // + // What that means is -- the COMPlusExceptionHandler, can't count on unwinding itself correctly + // if an exception is thrown from a finally. Instead, it relies on the NestedExceptionHandler + // that it pushes for this. + // + + EXCEPTION_DISPOSITION retval = EXCEPTION_HANDLER_FWD(COMPlusFrameHandler); + LOG((LF_EH, LL_INFO100, "Leaving COMPlusNestedExceptionHandler with %d\n", retval)); + return retval; +} + +EXCEPTION_REGISTRATION_RECORD *FindNestedEstablisherFrame(EXCEPTION_REGISTRATION_RECORD *pEstablisherFrame) +{ + LIMITED_METHOD_CONTRACT; + + while (pEstablisherFrame->Handler != (PEXCEPTION_ROUTINE)COMPlusNestedExceptionHandler) { + pEstablisherFrame = pEstablisherFrame->Next; + _ASSERTE(pEstablisherFrame != EXCEPTION_CHAIN_END); // should always find one + } + return pEstablisherFrame; +} + +EXCEPTION_HANDLER_IMPL(FastNExportExceptHandler) +{ + WRAPPER_NO_CONTRACT; + + // Most of our logic is in commin with COMPlusFrameHandler. + EXCEPTION_DISPOSITION retval = EXCEPTION_HANDLER_FWD(COMPlusFrameHandler); + +#ifdef _DEBUG + // If the exception is escaping the last CLR personality routine on the stack, + // then state a flag on the thread to indicate so. + if (retval == ExceptionContinueSearch) + { + SetReversePInvokeEscapingUnhandledExceptionStatus(IS_UNWINDING(pExceptionRecord->ExceptionFlags), pEstablisherFrame); + } +#endif // _DEBUG + + return retval; +} + + +// Just like a regular NExport handler -- except it pops an extra frame on unwind. A handler +// like this is needed by the COMMethodStubProlog code. It first pushes a frame -- and then +// pushes a handler. When we unwind, we need to pop the extra frame to avoid corrupting the +// frame chain in the event of an unmanaged catcher. +// +EXCEPTION_HANDLER_IMPL(UMThunkPrestubHandler) +{ + // @todo: we'd like to have a dynamic contract here, but there's a problem. (Bug 129180) Enter on the CRST used + // in HandleManagedFault leaves the no-trigger count incremented. The destructor of this contract will restore + // it to zero, then when we leave the CRST in LinkFrameAndThrow, we assert because we're trying to decrement the + // gc-trigger count down past zero. The solution is to fix what we're doing with this CRST. + STATIC_CONTRACT_THROWS; // COMPlusFrameHandler throws + STATIC_CONTRACT_GC_TRIGGERS; + STATIC_CONTRACT_MODE_ANY; + + EXCEPTION_DISPOSITION retval = ExceptionContinueSearch; + + BEGIN_CONTRACT_VIOLATION(SOToleranceViolation); + + // We must forward to the COMPlusFrameHandler. This will unwind the Frame Chain up to here, and also leave the + // preemptive GC mode set correctly. + retval = EXCEPTION_HANDLER_FWD(COMPlusFrameHandler); + +#ifdef _DEBUG + // If the exception is escaping the last CLR personality routine on the stack, + // then state a flag on the thread to indicate so. + if (retval == ExceptionContinueSearch) + { + SetReversePInvokeEscapingUnhandledExceptionStatus(IS_UNWINDING(pExceptionRecord->ExceptionFlags), pEstablisherFrame); + } +#endif // _DEBUG + + if (IS_UNWINDING(pExceptionRecord->ExceptionFlags)) + { + // Pops an extra frame on unwind. + + GCX_COOP(); // Must be cooperative to modify frame chain. + + Thread *pThread = GetThread(); + _ASSERTE(pThread); + Frame *pFrame = pThread->GetFrame(); + pFrame->ExceptionUnwind(); + pFrame->Pop(pThread); + } + + END_CONTRACT_VIOLATION; + + return retval; +} + +LONG CLRNoCatchHandler(EXCEPTION_POINTERS* pExceptionInfo, PVOID pv) +{ + WRAPPER_NO_CONTRACT; + STATIC_CONTRACT_ENTRY_POINT; + + LONG result = EXCEPTION_CONTINUE_SEARCH; + + // This function can be called during the handling of a SO + //BEGIN_ENTRYPOINT_VOIDRET; + + result = CLRVectoredExceptionHandler(pExceptionInfo); + + if (EXCEPTION_EXECUTE_HANDLER == result) + { + result = EXCEPTION_CONTINUE_SEARCH; + } + + //END_ENTRYPOINT_VOIDRET; + + return result; +} + +#ifdef FEATURE_COMINTEROP +// The reverse COM interop path needs to be sure to pop the ComMethodFrame that is pushed, but we do not want +// to have an additional FS:0 handler between the COM callsite and the call into managed. So we push this +// FS:0 handler, which will defer to the usual COMPlusFrameHandler and then perform the cleanup of the +// ComMethodFrame, if needed. +EXCEPTION_HANDLER_IMPL(COMPlusFrameHandlerRevCom) +{ + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + STATIC_CONTRACT_MODE_ANY; + + // Defer to COMPlusFrameHandler + EXCEPTION_DISPOSITION result = EXCEPTION_HANDLER_FWD(COMPlusFrameHandler); + + if (pExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND)) + { + // Do cleanup as needed + ComMethodFrame::DoSecondPassHandlerCleanup(GetCurrFrame(pEstablisherFrame)); + } + + return result; +} +#endif // FEATURE_COMINTEROP + + +// Returns TRUE if caller should resume execution. +BOOL +AdjustContextForVirtualStub( + EXCEPTION_RECORD *pExceptionRecord, + CONTEXT *pContext) +{ + LIMITED_METHOD_CONTRACT; + + Thread * pThread = GetThread(); + + // We may not have a managed thread object. Example is an AV on the helper thread. + // (perhaps during StubManager::IsStub) + if (pThread == NULL) + { + return FALSE; + } + + PCODE f_IP = GetIP(pContext); + + VirtualCallStubManager::StubKind sk; + /* VirtualCallStubManager *pMgr = */ VirtualCallStubManager::FindStubManager(f_IP, &sk); + + if (sk == VirtualCallStubManager::SK_DISPATCH) + { + if (*PTR_WORD(f_IP) != X86_INSTR_CMP_IND_ECX_IMM32) + { + _ASSERTE(!"AV in DispatchStub at unknown instruction"); + return FALSE; + } + } + else + if (sk == VirtualCallStubManager::SK_RESOLVE) + { + if (*PTR_WORD(f_IP) != X86_INSTR_MOV_EAX_ECX_IND) + { + _ASSERTE(!"AV in ResolveStub at unknown instruction"); + return FALSE; + } + + SetSP(pContext, dac_cast(dac_cast(GetSP(pContext)) + sizeof(void*))); // rollback push eax + } + else + { + return FALSE; + } + + PCODE callsite = GetAdjustedCallAddress(*dac_cast(GetSP(pContext))); + pExceptionRecord->ExceptionAddress = (PVOID)callsite; + SetIP(pContext, callsite); + + // put ESP back to what it was before the call. + SetSP(pContext, dac_cast(dac_cast(GetSP(pContext)) + sizeof(void*))); + + return TRUE; +} + +#endif // !DACCESS_COMPILE diff --git a/src/vm/i386/fptext.asm b/src/vm/i386/fptext.asm new file mode 100644 index 0000000000..2190d18519 --- /dev/null +++ b/src/vm/i386/fptext.asm @@ -0,0 +1,277 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. +; See the LICENSE file in the project root for more information. + +; ==++== +; + +; +; ==--== + .386 + .model flat + + option casemap:none + public _DoubleToNumber,_NumberToDouble + +; NUMBER structure + +nPrecision equ (dword ptr 0) +nScale equ (dword ptr 4) +nSign equ (dword ptr 8) +nDigits equ (word ptr 12) + + .code + +; Powers of 10 from 1.0E1 to 1.0E15 increasing by 1 + +Pow10By1 label tbyte + + dt 1.0E1 + dt 1.0E2 + dt 1.0E3 + dt 1.0E4 + dt 1.0E5 + dt 1.0E6 + dt 1.0E7 + dt 1.0E8 + dt 1.0E9 + dt 1.0E10 + dt 1.0E11 + dt 1.0E12 + dt 1.0E13 + dt 1.0E14 + dt 1.0E15 + +; Powers of 10 from 1.0E16 to 1.0E336 increasing by 16 + +Pow10By16 label tbyte + + dt 1.0E16 + dt 1.0E32 + dt 1.0E48 + dt 1.0E64 + dt 1.0E80 + dt 1.0E96 + dt 1.0E112 + dt 1.0E128 + dt 1.0E144 + dt 1.0E160 + dt 1.0E176 + dt 1.0E192 + dt 1.0E208 + dt 1.0E224 + dt 1.0E240 + dt 1.0E256 + dt 1.0E272 + dt 1.0E288 + dt 1.0E304 + dt 1.0E320 + dt 1.0E336 + +; Single precision constants + +Single10 dd 10.0 +SingleINF dd 7F800000H + +g_CwStd dw 137fH ;Mask all errors, 64-bit, round near + +; void _cdecl DoubleToNumber(double value, int precision, NUMBER* number) + +_DoubleToNumber proc + +value equ (qword ptr [ebp+8]) +precision equ (dword ptr [ebp+16]) +number equ (dword ptr [ebp+20]) +paramSize = 16 + +cwsave equ (word ptr [ebp-24]) +digits equ (tbyte ptr [ebp-20]) +temp equ (tbyte ptr [ebp-10]) +localSize = 24 + + push ebp + mov ebp,esp + sub esp,localSize + push edi + push ebx + fnstcw cwsave + fldcw g_CwStd + fld value + fstp temp + mov edi,number + mov eax,precision + mov nPrecision[edi],eax + movzx eax,word ptr temp[8] + mov edx,eax + shr edx,15 + mov nSign[edi],edx + and eax,7FFFH + je DN1 + cmp eax,7FFFH + jne DN10 + mov eax,80000000H + cmp dword ptr temp[4],eax + jne DN1 + cmp dword ptr temp[0],0 + jne DN1 + dec eax +DN1: mov nScale[edi],eax + mov nDigits[edi],0 + jmp DN30 +DN10: fld value + sub eax,16382+58 ;Remove bias and 58 bits + imul eax,19728 ;log10(2) * 2^16 = .30103 * 65536 + add eax,0FFFFH ;Round up + sar eax,16 ;Only use high half + lea edx,[eax+18] + mov nScale[edi],edx + neg eax + call ScaleByPow10 + fbstp digits + xor eax,eax + xor ebx,ebx + mov ecx,precision + inc ecx + mov edx,8 + mov al,byte ptr digits[8] + test al,0F0H + jne DN11 + dec nScale[edi] + jmp DN12 +DN11: shr al,4 + dec ecx + je DN20 + add al,'0' + mov nDigits[edi+ebx*2],ax + inc ebx + mov al,byte ptr digits[edx] +DN12: and al,0FH + dec ecx + je DN20 + add al,'0' + mov nDigits[edi+ebx*2],ax + inc ebx + dec edx + jl DN22 ; We've run out of digits & don't have a rounding digit, so we'll skip the rounding step. + mov al,byte ptr digits[edx] + jmp DN11 +DN20: cmp al,5 + jb DN22 +DN21: dec ebx + inc nDigits[edi+ebx*2] + cmp nDigits[edi+ebx*2],'9' + jbe DN23 + or ebx,ebx + jne DN21 + mov nDigits[edi+ebx*2],'1' + inc nScale[edi] + jmp DN23 +DN22: dec ebx + cmp nDigits[edi+ebx*2],'0' + je DN22 +DN23: mov nDigits[edi+ebx*2+2],0 +DN30: + fldcw cwsave ;;Restore original CW + pop ebx + pop edi + mov esp,ebp + pop ebp + ret ;made _cdecl for WinCE paramSize + +_DoubleToNumber endp + +; void _cdecl NumberToDouble(NUMBER* number, double* value) +_NumberToDouble proc + +number equ (dword ptr [ebp+8]) +value equ (dword ptr [ebp+12]) +paramSize = 8 + +cwsave equ (word ptr [ebp-8]) +temp equ (dword ptr [ebp-4]) +localSize = 8 + + push ebp + mov ebp,esp ; Save the stack ptr + sub esp,localSize ; + fnstcw cwsave + fldcw g_CwStd + fldz ; zero the register + mov ecx,number ; move precision into ecx + xor edx,edx ; clear edx + cmp dx,nDigits[ecx] ; if the first digit is 0 goto SignResult + je SignResult + mov eax,nScale[ecx] ; store the scale in eax + cmp eax,-330 ; if the scale is less than or equal to -330 goto Cleanup + jle Cleanup + cmp eax,310 ; if the scale is less than 310, goto ParseDigits + jl ParseDigits + fstp st(0) ; store value on the top of the floating point stack + fld SingleINF ; Load infinity + jmp SignResult ; Goto SignResult +ParseDigits: + movzx eax,nDigits[ecx+edx*2]; load the character at nDigits[edx]; + sub eax,'0' ; subtract '0' + jc ScaleResult ; jump to ScaleResult if this produces a negative value + mov temp,eax ; store the first digit in temp + fmul Single10 ; Multiply by 10 + fiadd temp ; Add the digit which we just found + inc edx ; increment the counter + cmp edx,18 ; if (eax<18) goto ParseDigits + jb ParseDigits +ScaleResult: + mov eax,nScale[ecx] ; eax = scale + sub eax,edx ; scale -= (number of digits) + call ScaleByPow10 ; multiply the result by 10^scale +SignResult: + cmp nSign[ecx],0 ; If the sign is 0 already go to Cleanup, otherwise change the sign. + je Cleanup + fchs +Cleanup: + mov edx,value ; store value in edx + fstp qword ptr [edx] ; copy from value to the fp stack + fldcw cwsave ; Restore original CW + mov esp,ebp ; restore the stack frame & exit. + pop ebp + ret ;Made _cdecl for WinCE paramSize + +_NumberToDouble endp + +; Scale st(0) by 10^eax + +ScaleByPow10 proc + test eax,eax + je SP2 + jl SP3 + mov edx,eax + and edx,0FH + je SP1 + lea edx,[edx+edx*4] + fld Pow10By1[edx*2-10] + fmul +SP1: mov edx,eax + shr edx,4 + test edx, edx ; remove partial flag stall caused by shr + je SP2 + lea edx,[edx+edx*4] + fld Pow10By16[edx*2-10] + fmul +SP2: ret +SP3: neg eax + mov edx,eax + and edx,0FH + je SP4 + lea edx,[edx+edx*4] + fld Pow10By1[edx*2-10] + fdiv +SP4: mov edx,eax + shr edx,4 + test edx, edx ; remove partial flag stall caused by shr + je SP5 + lea edx,[edx+edx*4] + fld Pow10By16[edx*2-10] + fdiv +SP5: ret +ScaleByPow10 endp + + end diff --git a/src/vm/i386/gmsasm.asm b/src/vm/i386/gmsasm.asm new file mode 100644 index 0000000000..6b6044b50d --- /dev/null +++ b/src/vm/i386/gmsasm.asm @@ -0,0 +1,37 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. +; See the LICENSE file in the project root for more information. + +; ==++== +; + +; +; ==--== +; +; *** NOTE: If you make changes to this file, propagate the changes to +; gmsasm.s in this directory + + .586 + .model flat + +include asmconstants.inc + + option casemap:none + .code + +; int __fastcall LazyMachStateCaptureState(struct LazyMachState *pState); +@LazyMachStateCaptureState@4 proc public + mov [ecx+MachState__pRetAddr], 0 ; marks that this is not yet valid + mov [ecx+MachState__edi], edi ; remember register values + mov [ecx+MachState__esi], esi + mov [ecx+MachState__ebx], ebx + mov [ecx+LazyMachState_captureEbp], ebp + mov [ecx+LazyMachState_captureEsp], esp + + mov eax, [esp] ; capture return address + mov [ecx+LazyMachState_captureEip], eax + xor eax, eax + retn +@LazyMachStateCaptureState@4 endp + +end diff --git a/src/vm/i386/gmscpu.h b/src/vm/i386/gmscpu.h new file mode 100644 index 0000000000..0aecefac21 --- /dev/null +++ b/src/vm/i386/gmscpu.h @@ -0,0 +1,140 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/**************************************************************/ +/* gmscpu.h */ +/**************************************************************/ +/* HelperFrame is defines 'GET_STATE(machState)' macro, which + figures out what the state of the machine will be when the + current method returns. It then stores the state in the + JIT_machState structure. */ + +/**************************************************************/ + +#ifndef __gmsx86_h__ +#define __gmsx86_h__ + +#define __gmsx86_h__ + +#ifdef _DEBUG +class HelperMethodFrame; +struct MachState; +EXTERN_C MachState* STDCALL HelperMethodFrameConfirmState(HelperMethodFrame* frame, void* esiVal, void* ediVal, void* ebxVal, void* ebpVal); +#endif + + // A MachState indicates the register state of the processor at some point in time (usually + // just before or after a call is made). It can be made one of two ways. Either explicitly + // (when you for some reason know the values of all the registers), or implicitly using the + // GET_STATE macros. + +typedef DPTR(struct MachState) PTR_MachState; +struct MachState { + + MachState() + { + LIMITED_METHOD_DAC_CONTRACT; + INDEBUG(memset(this, 0xCC, sizeof(MachState));) + } + + bool isValid() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(dac_cast(_pRetAddr) != INVALID_POINTER_CC); return(_pRetAddr != 0); } + TADDR* pEdi() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(dac_cast(_pEdi) != INVALID_POINTER_CC); return(_pEdi); } + TADDR* pEsi() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(dac_cast(_pEsi) != INVALID_POINTER_CC); return(_pEsi); } + TADDR* pEbx() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(dac_cast(_pEbx) != INVALID_POINTER_CC); return(_pEbx); } + TADDR* pEbp() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(dac_cast(_pEbp) != INVALID_POINTER_CC); return(_pEbp); } + TADDR esp() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(isValid()); return(_esp); } + PTR_TADDR pRetAddr() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(isValid()); return(_pRetAddr); } + TADDR GetRetAddr() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(isValid()); return *_pRetAddr; } +#ifndef DACCESS_COMPILE + void SetRetAddr(TADDR* addr) { LIMITED_METHOD_CONTRACT; _ASSERTE(isValid()); _pRetAddr = addr; } +#endif + + friend class HelperMethodFrame; + friend class CheckAsmOffsets; + friend struct LazyMachState; +#ifdef _DEBUG + friend MachState* STDCALL HelperMethodFrameConfirmState(HelperMethodFrame* frame, void* esiVal, void* ediVal, void* ebxVal, void* ebpVal); +#endif + + +protected: + // Note the fields are layed out to make generating a + // MachState structure from assembly code very easy + + // The state of all the callee saved registers. + // If the register has been spill to the stack p + // points at this location, otherwise it points + // at the field field itself + PTR_TADDR _pEdi; + TADDR _edi; + PTR_TADDR _pEsi; + TADDR _esi; + PTR_TADDR _pEbx; + TADDR _ebx; + PTR_TADDR _pEbp; + TADDR _ebp; + + TADDR _esp; // stack pointer after the function returns + PTR_TADDR _pRetAddr; // The address of the stored IP address (points into the stack) +}; + +/********************************************************************/ +/* This allows you to defer the computation of the Machine state + until later. Note that we don't reuse slots, because we want + this to be threadsafe without locks */ + +struct LazyMachState; +typedef DPTR(LazyMachState) PTR_LazyMachState; +struct LazyMachState : public MachState { + // compute the machine state of the processor as it will exist just + // after the return after at most'funCallDepth' number of functions. + // if 'testFtn' is non-NULL, the return address is tested at each + // return instruction encountered. If this test returns non-NULL, + // then stack walking stops (thus you can walk up to the point that the + // return address matches some criteria + + // Normally this is called with funCallDepth=1 and testFtn = 0 so that + // it returns the state of the processor after the function that called 'captureState()' + void setLazyStateFromUnwind(MachState* copy); + static void unwindLazyState(LazyMachState* baseState, + MachState* lazyState, + DWORD threadId, + int funCallDepth = 1, + HostCallPreference hostCallPreference = AllowHostCalls); + + friend class HelperMethodFrame; + friend class CheckAsmOffsets; +private: + TADDR captureEbp; // Ebp at the time of capture + TADDR captureEsp; // Esp at the time of capture + TADDR captureEip; // Eip at the time of capture +}; + +inline void LazyMachState::setLazyStateFromUnwind(MachState* copy) +{ + // _pRetAddr has to be the last thing updated when we make the copy (because its + // is the the _pRetAddr becoming non-zero that flips this from invalid to valid. + // we assert that it is the last field in the struct. + static_assert_no_msg(offsetof(MachState, _pRetAddr) + sizeof(_pRetAddr) == sizeof(MachState)); + + memcpy(this, copy, offsetof(MachState, _pRetAddr)); + + // this has to be last + VolatileStore((TADDR*)&_pRetAddr, dac_cast(copy->_pRetAddr)); +} + +// Do the initial capture of the machine state. This is meant to be +// as light weight as possible, as we may never need the state that +// we capture. Thus to complete the process you need to call +// 'getMachState()', which finishes the process +EXTERN_C int __fastcall LazyMachStateCaptureState(struct LazyMachState *pState); + +// CAPTURE_STATE captures just enough register state so that the state of the +// processor can be deterined just after the the routine that has CAPTURE_STATE in +// it returns. + +// Note that the return is never taken, is is there for epilog walking +#define CAPTURE_STATE(machState, ret) \ + if (LazyMachStateCaptureState(machState)) ret + +#endif diff --git a/src/vm/i386/gmsx86.cpp b/src/vm/i386/gmsx86.cpp new file mode 100644 index 0000000000..e7e16b70ab --- /dev/null +++ b/src/vm/i386/gmsx86.cpp @@ -0,0 +1,1245 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/**************************************************************/ +/* gmsx86.cpp */ +/**************************************************************/ + +#include "common.h" +#include "gmscpu.h" + +/***************************************************************/ +/* setMachState figures out what the state of the CPU will be + when the function that calls 'setMachState' returns. It stores + this information in 'frame' + + setMachState works by simulating the execution of the + instructions starting at the instruction following the + call to 'setMachState' and continuing until a return instruction + is simulated. To avoid having to process arbitrary code, the + call to 'setMachState' should be called as follows + + if (machState.setMachState != 0) return; + + setMachState is guarnenteed to return 0 (so the return + statement will never be executed), but the expression above + insures insures that there is a 'quick' path to epilog + of the function. This insures that setMachState will only + have to parse a limited number of X86 instructions. */ + + +/***************************************************************/ +#ifndef POISONC +#define POISONC ((sizeof(int *) == 4)?0xCCCCCCCCU:UI64(0xCCCCCCCCCCCCCCCC)) +#endif + +/***************************************************************/ +/* the 'zeroFtn and 'recursiveFtn' are only here to determine + if if mscorwks itself has been instrumented by a profiler + that intercepts calls or epilogs of functions. (the + callsInstrumented and epilogInstrumented functions). */ + +#if !defined(DACCESS_COMPILE) + +#pragma optimize("gsy", on ) // optimize to insure that code generation does not have junk in it +#pragma warning(disable:4717) + +static int __stdcall zeroFtn() { + return 0; +} + +static int __stdcall recursiveFtn() { + return recursiveFtn()+1; +} + +#pragma optimize("", on ) + + +/* Has mscorwks been instrumented so that calls are morphed into push XXXX call */ +static bool callsInstrumented() { + // Does the recusive function begin with push XXXX call + PTR_BYTE ptr = PTR_BYTE(recursiveFtn); + + return (ptr[0] == 0x68 && ptr[5] == 0xe8); // PUSH XXXX, call +} + +/* Has mscorwks been instrumented so function prolog and epilogs are replaced with + jmp [XXXX] */ + +static bool epilogInstrumented() { + + PTR_BYTE ptr = PTR_BYTE(zeroFtn); + if (ptr[0] == 0xe8) // call (prolog instrumentation) + ptr += 5; + if (ptr[0] == 0x33 && ptr[1] == 0xc0) // xor eax eax + ptr += 2; + return (ptr[0] == 0xeb || ptr[0] == 0xe9); // jmp +} + +#else + + // Note that we have the callsInstrumeted and epilogInstrumented + // functions so that the looser heuristics used for instrumented code + // can't foul up an instrumented mscorwks. For simplicity sake we + // don't bother with this in the DAC, which means that the DAC could + // be misled more frequently than mscorwks itself, but I still think + // it will not be misled in any real scenario +static bool callsInstrumented() { LIMITED_METHOD_DAC_CONTRACT; return true; } +static bool epilogInstrumented() { LIMITED_METHOD_DAC_CONTRACT; return true; } + +#endif // !defined(DACCESS_COMPILE) + +/***************************************************************/ +/* returns true if a call to 'ip' should be entered by the + epilog walker. Bascically we are looking for things that look + like __SEH_epilog. In particular we look for things that + pops a register before doing a push. If we see something + that we don't recognise, we dont consider it a epilog helper + and return false. +*/ + +static bool shouldEnterCall(PTR_BYTE ip) { + SUPPORTS_DAC; + + int datasize; // helper variable for decoding of address modes + int mod; // helper variable for decoding of mod r/m + int rm; // helper variable for decoding of mod r/m + + int pushes = 0; + + // we should start unbalenced pops within 48 instrs. If not, it is not a special epilog function + // the only reason we need as many instructions as we have below is because coreclr + // gets instrumented for profiling, code coverage, BBT etc, and we want these things to + // just work. + for (int i = 0; i < 48; i++) { + switch(*ip) { + case 0xF2: // repne + case 0xF3: // repe + ip++; + break; + + case 0x68: // push 0xXXXXXXXX + ip += 5; + + // For office profiler. They morph tail calls into push TARGET; jmp helper + // so if you see + // + // push XXXX + // jmp xxxx + // + // and we notice that coreclr has been instrumented and + // xxxx starts with a JMP [] then do what you would do for jmp XXXX + if (*ip == 0xE9 && callsInstrumented()) { // jmp helper + PTR_BYTE tmpIp = ip + 5; + PTR_BYTE target = tmpIp + (__int32)*((PTR_TADDR)(PTR_TO_TADDR(tmpIp) - 4)); + if (target[0] == 0xFF && target[1] == 0x25) { // jmp [xxxx] (to external dll) + ip = PTR_BYTE(*((PTR_TADDR)(PTR_TO_TADDR(ip) - 4))); + } + } + else { + pushes++; + } + break; + + case 0x50: // push EAX + case 0x51: // push ECX + case 0x52: // push EDX + case 0x53: // push EBX + case 0x55: // push EBP + case 0x56: // push ESI + case 0x57: // push EDI + pushes++; + ip++; + break; + + case 0xE8: // call + ip += 5; + pushes = 0; // This assumes that all of the previous pushes are arguments to this call + break; + + case 0xFF: + if (ip[1] != 0x15) // call [XXXX] is OK (prolog of epilog helper is intrumented) + return false; // but everything else is not OK. + ip += 6; + pushes = 0; // This assumes that all of the previous pushes are arguments to this call + break; + + case 0x9C: // pushfd + case 0x9D: // popfd + // a pushfd can never be an argument, so we model a pair of + // these instruction as not changing the stack so that a call + // that occurs between them does not consume the value of pushfd + ip++; + break; + + case 0x5D: // pop EBP + case 0x5E: // pop ESI + case 0x5F: // pop EDI + case 0x5B: // pop EBX + case 0x58: // pop EAX + case 0x59: // pop ECX + case 0x5A: // pop EDX + if (pushes <= 0) { + // We now have more pops than pushes. This is our indication + // that we are in an EH_epilog function so we return true. + // This is the only way to exit this method with a retval of true. + return true; + } + --pushes; + ip++; + break; + + case 0xA1: // MOV EAX, [XXXX] + ip += 5; + break; + + case 0xC6: // MOV r/m8, imm8 + datasize = 1; + goto decodeRM; + + case 0x89: // MOV r/m, reg + if (ip[1] == 0xE5) // MOV EBP, ESP + return false; + if (ip[1] == 0xEC) // MOV ESP, EBP + return false; + goto move; + + case 0x8B: // MOV reg, r/m + if (ip[1] == 0xE5) // MOV ESP, EBP + return false; + if (ip[1] == 0xEC) // MOV EBP, ESP + return false; + goto move; + + case 0x88: // MOV reg, r/m (BYTE) + case 0x8A: // MOV r/m, reg (BYTE) + + case 0x31: // XOR + case 0x32: // XOR + case 0x33: // XOR + + move: + datasize = 0; + + decodeRM: + // Note that we don't want to read from ip[] after + // we do ANY incrementing of ip + + mod = (ip[1] & 0xC0) >> 6; + if (mod != 3) { + rm = (ip[1] & 0x07); + if (mod == 0) { // (mod == 0) + if (rm == 5) + ip += 4; // disp32 + else if (rm == 4) + ip += 1; // [reg*K+reg] + // otherwise [reg] + + } + else if (mod == 1) { // (mod == 1) + ip += 1; // for disp8 + if (rm == 4) + ip += 1; // [reg*K+reg+disp8] + // otherwise [reg+disp8] + } + else { // (mod == 2) + ip += 4; // for disp32 + if (rm == 4) + ip += 1; // [reg*K+reg+disp32] + // otherwise [reg+disp32] + } + } + + ip += 2; + ip += datasize; + break; + + case 0x64: // FS: prefix + ip++; + break; + + case 0xEB: // jmp + ip += (signed __int8) ip[1] + 2; + break; + + case 0xE9: // jmp + ip += (__int32)*PTR_DWORD(PTR_TO_TADDR(ip) + 1) + 5; + break; + + case 0xF7: // test r/m32, imm32 + // Magellan code coverage build + if ( (ip[1] & 0x38) == 0x00) + { + datasize = 4; + goto decodeRM; + } + else + { + return false; + } + break; + + case 0x75: // jnz + // Magellan code coverage build + // We always follow forward jump to avoid possible looping. + { + PTR_BYTE tmpIp = ip + (TADDR)(signed __int8) ip[1] + 2; + if (tmpIp > ip) { + ip = tmpIp; // follow forwards jump + } + else { + return false; // backwards jump implies not EH_epilog function + } + } + break; + + case 0xC2: // ret + case 0xC3: // ret n + default: + return false; + } + } + + return false; +} + + +/***************************************************************/ +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable:21000) // Suppress PREFast warning about overly large function +#endif + +/***************************************************************/ +// A fundamental requirement of managed code is that we need to be able to enumerate all GC references on the +// stack at GC time. To do this we need to be able to 'crawl' the stack. We know how to do this in JIT +// compiled code (it generates additional information like the frame size etc), but we don't know how to do +// this for unmanaged code. For PINVOKE calls, we leave a pointer to the transition boundary between managed +// and unmanaged code and we simply ignore the lower part of the stack. However setting up this transition is +// a bit expensive (1-2 dozen instructions), and while that is acceptable for PINVOKE, it is not acceptable +// for high volume calls, like NEW, CAST, WriterBarrier, Stack field fetch and others. +// +// To get around this, for transitions into the runtime (which we call FCALLS), we DEFER setting up the +// boundary variables (what we call the transition frame), until we actually need it (we will do an operation +// that might cause a GC). This allow us to handle the common case (where we might find the thing in a cache, +// or be service the 'new' from a allocation quantum), and only pay the cost of setting up the transition +// frame when it will actually be used. +// +// The problem is that in order to set up a transition frame we need to be able to find ALL REGISTERS AT THE +// TIME THE TRANSITION TO UNMANAGED CODE WAS MADE (because we might need to update them if they have GC +// references). Because we have executed ordinary C++ code (which might spill the registers to the stack at +// any time), we have a problem. LazyMachState is our 'solution' to this problem. We take advantage of the +// fact that the C++ code MUST RESTORE the register before returning. Thus we simulate the execution from the +// current location to the return and 'watch' where the registers got restored from. This is what +// unwindLazyState does (determine what the registers would be IF you had never executed and unmanaged C++ +// code). +// +// By design, this code does not handle all X86 instructions, but only those instructions needed in an +// epilog. If you get a failure because of a missing instruction, it MAY simply be because the compiler +// changed and now emits a new instruction in the epilog, but it MAY also be because the unwinder is +// 'confused' and is trying to follow a code path that is NOT AN EPILOG, and in this case adding +// instructions to 'fix' it is inappropriate. +// +void LazyMachState::unwindLazyState(LazyMachState* baseState, + MachState* lazyState, + DWORD threadId, + int funCallDepth /* = 1 */, + HostCallPreference hostCallPreference /* = (HostCallPreference)(-1) */) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + SUPPORTS_DAC; + } CONTRACTL_END; + + lazyState->_edi = baseState->_edi; + lazyState->_esi = baseState->_esi; + lazyState->_ebx = baseState->_ebx; + lazyState->_ebp = baseState->captureEbp; +#ifndef DACCESS_COMPILE + lazyState->_pEdi = &baseState->_edi; + lazyState->_pEsi = &baseState->_esi; + lazyState->_pEbx = &baseState->_ebx; + lazyState->_pEbp = &baseState->_ebp; +#endif + + // We have captured the state of the registers as they exist in 'captureState' + // we need to simulate execution from the return address captured in 'captureState + // until we return from the caller of captureState. + + PTR_BYTE ip = PTR_BYTE(baseState->captureEip); + PTR_TADDR ESP = PTR_TADDR(baseState->captureEsp); + ESP++; // pop captureState's return address + + + // VC now has small helper calls that it uses in epilogs. We need to walk into these + // helpers if we are to decode the stack properly. After we walk the helper we need + // to return and continue walking the epiliog. This varaible remembers were to return to + PTR_BYTE epilogCallRet = PTR_BYTE((TADDR)0); + + // The very first conditional jump that we are going to encounter is + // the one testing for the return value of LazyMachStateCaptureState. + // The non-zero path is the one directly leading to a return statement. + // This variable keeps track of whether we are still looking for that + // first conditional jump. + BOOL bFirstCondJmp = TRUE; + + // The general strategy is that we always try to plough forward: + // we follow a conditional jump if and only if it is a forward jump. + // However, in fcall functions that set up a HELPER_METHOD_FRAME in + // more than one place, gcc will have both of them share the same + // epilog - and the second one may actually be a backward jump. + // This can lead us to loop in a destructor code loop. To protect + // against this, we remember the ip of the last conditional jump + // we followed, and if we encounter it again, we take the other branch. + PTR_BYTE lastCondJmpIp = PTR_BYTE((TADDR)0); + + int datasize; // helper variable for decoding of address modes + int mod; // helper variable for decoding of mod r/m + int rm; // helper variable for decoding of mod r/m + +#ifdef _DEBUG + int count = 0; + const DWORD cInstructions = 1000; + PTR_BYTE *instructionBytes = (PTR_BYTE*)alloca(cInstructions * sizeof(PTR_BYTE)); + memset(instructionBytes, 0, cInstructions * sizeof(PTR_BYTE)); +#endif + bool bset16bit=false; + bool b16bit=false; + for(;;) + { + _ASSERTE(count++ < 1000); // we should never walk more than 1000 instructions! + b16bit=bset16bit; + bset16bit=false; + +#ifndef DACCESS_COMPILE + again: +#endif +#ifdef _DEBUG + instructionBytes[count-1] = ip; +#endif + switch(*ip) + { + + case 0x64: // FS: prefix + bset16bit=b16bit; // In case we have just seen a 0x66 prefix + goto incIp1; + + case 0x66: + bset16bit=true; // Remember that we saw the 0x66 prefix [16-bit datasize override] + goto incIp1; + + case 0x50: // push EAX + case 0x51: // push ECX + case 0x52: // push EDX + case 0x53: // push EBX + case 0x55: // push EBP + case 0x56: // push ESI + case 0x57: // push EDI + case 0x9C: // pushfd + --ESP; + case 0x40: // inc EAX + case 0x41: // inc ECX + case 0x42: // inc EDX + case 0x43: // inc EBX + case 0x46: // inc ESI + case 0x47: // inc EDI + goto incIp1; + + case 0x58: // pop EAX + case 0x59: // pop ECX + case 0x5A: // pop EDX + case 0x9D: // popfd + ESP++; + // FALL THROUGH + + case 0x90: // nop + incIp1: + ip++; + break; + + case 0x5B: // pop EBX + lazyState->_pEbx = ESP; + lazyState->_ebx = *ESP++; + goto incIp1; + case 0x5D: // pop EBP + lazyState->_pEbp = ESP; + lazyState->_ebp = *ESP++; + goto incIp1; + case 0x5E: // pop ESI + lazyState->_pEsi = ESP; + lazyState->_esi = *ESP++; + goto incIp1; + case 0x5F: // pop EDI + lazyState->_pEdi = ESP; + lazyState->_edi = *ESP++; + goto incIp1; + + case 0xEB: // jmp + ip += (signed __int8) ip[1] + 2; + break; + + case 0x72: // jb for gcc. + { + PTR_BYTE tmpIp = ip + (int)(signed __int8)ip[1] + 2; + if (tmpIp > ip) + ip = tmpIp; + else + ip += 2; + } + break; + + case 0xE8: // call + ip += 5; + if (epilogCallRet == 0) + { + PTR_BYTE target = ip + (__int32)*PTR_DWORD(PTR_TO_TADDR(ip) - 4); // calculate target + + if (shouldEnterCall(target)) + { + epilogCallRet = ip; // remember our return address + --ESP; // simulate pushing the return address + ip = target; + } + } + break; + + case 0xE9: // jmp + { + PTR_BYTE tmpIp = ip + + ((__int32)*dac_cast(ip + 1) + 5); + ip = tmpIp; + } + break; + + case 0x0f: // follow non-zero jumps: + if (ip[1] >= 0x90 && ip[1] <= 0x9f) { + if ((ip[2] & 0xC0) != 0xC0) // set reg + goto badOpcode; + ip += 3; + break; + } + else if ((ip[1] & 0xf0) == 0x40) { //cmov mod/rm + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] >= 0x10 && ip[1] <= 0x17) { // movups, movlps, movhps, unpcklpd, unpckhpd + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0x1f) { // nop (multi-byte) + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0x57) { // xorps + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0xb6 || ip[1] == 0xb7) { //movzx reg, r/m8 + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0xbf) { //movsx reg, r/m16 + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0xd6 || ip[1] == 0x7e) { // movq + ++ip; + datasize = 0; + goto decodeRM; + } + else if (bFirstCondJmp) { + bFirstCondJmp = FALSE; + if (ip[1] == 0x85) // jne + ip += (__int32)*dac_cast(ip + 2) + 6; + else if (ip[1] >= 0x80 && ip[1] <= 0x8F) // jcc + ip += 6; + else + goto badOpcode; + } + else { + if ((ip[1] >= 0x80) && (ip[1] <= 0x8F)) { + PTR_BYTE tmpIp = ip + (__int32)*dac_cast(ip + 2) + 6; + + if ((tmpIp > ip) == (lastCondJmpIp != ip)) { + lastCondJmpIp = ip; + ip = tmpIp; + } + else { + lastCondJmpIp = ip; + ip += 6; + } + } + else + goto badOpcode; + } + break; + + // This is here because VC seems to not always optimize + // away a test for a literal constant + case 0x6A: // push 0xXX + ip += 2; + --ESP; + break; + + case 0x68: // push 0xXXXXXXXX + if ((ip[5] == 0xFF) && (ip[6] == 0x15)) { + ip += 11; // + } + else { + ip += 5; + + // For office profiler. They morph calls into push TARGET; call helper + // so if you see + // + // push XXXX + // call xxxx + // + // and we notice that mscorwks has been instrumented and + // xxxx starts with a JMP [] then do what you would do for call XXXX + if ((*ip & 0xFE) == 0xE8 && callsInstrumented()) { // It is a call or a jump (E8 or E9) + PTR_BYTE tmpIp = ip + 5; + PTR_BYTE target = tmpIp + (__int32)*PTR_DWORD(PTR_TO_TADDR(tmpIp) - 4); + if (target[0] == 0xFF && target[1] == 0x25) { // jmp [xxxx] (to external dll) + target = PTR_BYTE(*PTR_TADDR(PTR_TO_TADDR(ip) - 4)); + if (*ip == 0xE9) { // Do logic for jmp + ip = target; + } + else if (shouldEnterCall(target)) { // Do logic for calls + epilogCallRet = ip; // remember our return address + --ESP; // simulate pushing the return address + ip = target; + } + } + } + } + break; + + case 0x74: // jz + if (bFirstCondJmp) { + bFirstCondJmp = FALSE; + ip += 2; // follow the non-zero path + break; + } + goto condJumpDisp8; + + case 0x75: // jnz + // Except the first jump, we always follow forward jump to avoid possible looping. + // + if (bFirstCondJmp) { + bFirstCondJmp = FALSE; + ip += (signed __int8) ip[1] + 2; // follow the non-zero path + break; + } + goto condJumpDisp8; + + case 0x77: // ja + case 0x78: // js + case 0x79: // jns + case 0x7d: // jge + case 0x7c: // jl + goto condJumpDisp8; + + condJumpDisp8: + { + PTR_BYTE tmpIp = ip + (TADDR)(signed __int8) ip[1] + 2; + if ((tmpIp > ip) == (lastCondJmpIp != ip)) { + lastCondJmpIp = ip; + ip = tmpIp; + } + else { + lastCondJmpIp = ip; + ip += 2; + } + } + break; + + case 0x84: + case 0x85: + mod = (ip[1] & 0xC0) >> 6; + if (mod != 3) // test reg1, reg2 + goto badOpcode; + ip += 2; + break; + + case 0x31: + case 0x32: + case 0x33: +#ifdef __GNUC__ + //there are lots of special workarounds for XOR for msvc. For GnuC + //just do the normal Mod/rm stuff. + datasize = 0; + goto decodeRM; +#else + mod = (ip[1] & 0xC0) >> 6; + if (mod == 3) + { + // XOR reg1, reg2 + + // VC generates this sequence in some code: + // xor reg, reg + // test reg reg + // je + // This is just an unconditional branch, so jump to it + if ((ip[1] & 7) == ((ip[1] >> 3) & 7)) { // reg1 == reg2? + if (ip[2] == 0x85 && ip[3] == ip[1]) { // TEST reg, reg + if (ip[4] == 0x74) { + ip += (signed __int8) ip[5] + 6; // follow the non-zero path + break; + } + _ASSERTE(ip[4] != 0x0f || ((ip[5] & 0xF0)!=0x80)); // If this goes off, we need the big jumps + } + else + { + if (ip[2]==0x74) + { + ip += (signed __int8) ip[3] + 4; + break; + } + _ASSERTE(ip[2] != 0x0f || ((ip[3] & 0xF0)!=0x80)); // If this goes off, we need the big jumps + } + } + ip += 2; + } + else if (mod == 1) + { + // XOR reg1, [reg+offs8] + // Used by the /GS flag for call to __security_check_cookie() + // Should only be XOR ECX,[EBP+4] + _ASSERTE((((ip[1] >> 3) & 0x7) == 0x1) && ((ip[1] & 0x7) == 0x5) && (ip[2] == 4)); + ip += 3; + } + else if (mod == 2) + { + // XOR reg1, [reg+offs32] + // Should not happen but may occur with __security_check_cookie() + _ASSERTE(!"Unexpected XOR reg1, [reg+offs32]"); + ip += 6; + } + else // (mod == 0) + { + // XOR reg1, [reg] + goto badOpcode; + } + break; +#endif + + case 0x05: + // added to handle gcc 3.3 generated code + // add %reg, constant + ip += 5; + break; + + case 0xFF: + if ( (ip[1] & 0x38) == 0x30) + { + // opcode generated by Vulcan/BBT instrumentation + // search for push dword ptr[esp]; push imm32; call disp32 and if found ignore it + if ((ip[1] == 0x34) && (ip[2] == 0x24) && // push dword ptr[esp] (length 3 bytes) + (ip[3] == 0x68) && // push imm32 (length 5 bytes) + (ip[8] == 0xe8)) // call disp32 (length 5 bytes) + { + // found the magic seq emitted by Vulcan instrumentation + ip += 13; // (3+5+5) + break; + } + + --ESP; // push r/m + datasize = 0; + goto decodeRM; + } + else if ( (ip[1] & 0x38) == 0x10) + { + // added to handle gcc 3.3 generated code + // This is a call *(%eax) generated by gcc for destructor calls. + // We can safely skip over the call + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0xe0) + { + goto badOpcode; +#if 0 + // Handles jmp *%eax from gcc + datasize = 0; + goto decodeRM; +#endif + } + else if (ip[1] == 0x25 && epilogInstrumented()) // is it jmp [XXXX] + { + // this is a office profiler epilog (this jmp is acting as a return instruction) + PTR_BYTE epilogHelper = PTR_BYTE(*PTR_TADDR(*PTR_TADDR(PTR_TO_TADDR(ip) + 2))); + + ip = PTR_BYTE(*ESP); + lazyState->_pRetAddr = ESP++; + + if (epilogHelper[0] != 0x6A) // push + goto badOpcode; + unsigned disp = *PTR_BYTE(PTR_TO_TADDR(epilogHelper) + 1) * 4; + ESP = PTR_TADDR(PTR_TO_TADDR(ESP) + disp); // pop args + goto ret_with_epilogHelperCheck; + + } + else + { + goto badOpcode; + } + break; + + case 0x39: // comp r/m, reg + case 0x3B: // comp reg, r/m + datasize = 0; + goto decodeRM; + + case 0xA1: // MOV EAX, [XXXX] + ip += 5; + break; + + case 0x89: // MOV r/m, reg + if (ip[1] == 0xEC) // MOV ESP, EBP + goto mov_esp_ebp; + // FALL THROUGH + + case 0x18: // SBB r/m8, r8 + case 0x19: // SBB r/m[16|32], r[16|32] + case 0x1A: // SBB r8, r/m8 + case 0x1B: // SBB r[16|32], r/m[16|32] + + case 0x88: // MOV reg, r/m (BYTE) + case 0x8A: // MOV r/m, reg (BYTE) + + move: + datasize = 0; + + decodeRM: + // Note that we don't want to read from ip[] + // after we do ANY incrementing of ip + + mod = (ip[1] & 0xC0) >> 6; + if (mod != 3) { + rm = (ip[1] & 0x07); + if (mod == 0) { // (mod == 0) + if (rm == 5) // has disp32? + ip += 4; // [disp32] + else if (rm == 4) // has SIB byte? + ip += 1; // [reg*K+reg] + } + else if (mod == 1) { // (mod == 1) + if (rm == 4) // has SIB byte? + ip += 1; // [reg*K+reg+disp8] + ip += 1; // for disp8 + } + else { // (mod == 2) + if (rm == 4) // has SIB byte? + ip += 1; // [reg*K+reg+disp32] + ip += 4; // for disp32 + } + } + ip += 2; // opcode and Mod R/M byte + ip += datasize; + break; + + case 0x80: // OP r/m8, + datasize = 1; + goto decodeRM; + + case 0x81: // OP r/m32, + if (!b16bit && ip[1] == 0xC4) { // ADD ESP, + ESP = dac_cast(dac_cast(ESP) + + (__int32)*dac_cast(ip + 2)); + ip += 6; + break; + } else if (!b16bit && ip[1] == 0xC5) { // ADD EBP, + lazyState->_ebp += (__int32)*dac_cast(ip + 2); + ip += 6; + break; + } + + datasize = b16bit?2:4; + goto decodeRM; + + case 0x01: // ADD mod/rm + case 0x03: + case 0x29: // SUB mod/rm + case 0x2B: + datasize = 0; + goto decodeRM; + case 0x83: // OP r/m32, + if (ip[1] == 0xC4) { // ADD ESP, + ESP = dac_cast(dac_cast(ESP) + (signed __int8)ip[2]); + ip += 3; + break; + } + if (ip[1] == 0xec) { // SUB ESP, + ESP = PTR_TADDR(PTR_TO_TADDR(ESP) - (signed __int8)ip[2]); + ip += 3; + break; + } + if (ip[1] == 0xe4) { // AND ESP, + ESP = PTR_TADDR(PTR_TO_TADDR(ESP) & (signed __int8)ip[2]); + ip += 3; + break; + } + if (ip[1] == 0xc5) { // ADD EBP, + lazyState->_ebp += (signed __int8)ip[2]; + ip += 3; + break; + } + + datasize = 1; + goto decodeRM; + + case 0x8B: // MOV reg, r/m + if (ip[1] == 0xE5) { // MOV ESP, EBP + mov_esp_ebp: + ESP = PTR_TADDR(lazyState->_ebp); + ip += 2; + break; + } + + if ((ip[1] & 0xc7) == 0x4 && ip[2] == 0x24) // move reg, [esp] + { + if ( ip[1] == 0x1C ) { // MOV EBX, [ESP] + lazyState->_pEbx = ESP; + lazyState->_ebx = *lazyState->_pEbx; + } + else if ( ip[1] == 0x34 ) { // MOV ESI, [ESP] + lazyState->_pEsi = ESP; + lazyState->_esi = *lazyState->_pEsi; + } + else if ( ip[1] == 0x3C ) { // MOV EDI, [ESP] + lazyState->_pEdi = ESP; + lazyState->_edi = *lazyState->_pEdi; + } + else if ( ip[1] == 0x24 /*ESP*/ || ip[1] == 0x2C /*EBP*/) + goto badOpcode; + + ip += 3; + break; + } + + if ((ip[1] & 0xc7) == 0x44 && ip[2] == 0x24) // move reg, [esp+imm8] + { + if ( ip[1] == 0x5C ) { // MOV EBX, [ESP+XX] + lazyState->_pEbx = PTR_TADDR(PTR_TO_TADDR(ESP) + (signed __int8)ip[3]); + lazyState->_ebx = *lazyState->_pEbx ; + } + else if ( ip[1] == 0x74 ) { // MOV ESI, [ESP+XX] + lazyState->_pEsi = PTR_TADDR(PTR_TO_TADDR(ESP) + (signed __int8)ip[3]); + lazyState->_esi = *lazyState->_pEsi; + } + else if ( ip[1] == 0x7C ) { // MOV EDI, [ESP+XX] + lazyState->_pEdi = PTR_TADDR(PTR_TO_TADDR(ESP) + (signed __int8)ip[3]); + lazyState->_edi = *lazyState->_pEdi; + } + else if ( ip[1] == 0x64 /*ESP*/ || ip[1] == 0x6C /*EBP*/) + goto badOpcode; + + ip += 4; + break; + } + + if ((ip[1] & 0xC7) == 0x45) { // MOV reg, [EBP + imm8] + // gcc sometimes restores callee-preserved registers + // via 'mov reg, [ebp-xx]' instead of 'pop reg' + if ( ip[1] == 0x5D ) { // MOV EBX, [EBP+XX] + lazyState->_pEbx = PTR_TADDR(lazyState->_ebp + (signed __int8)ip[2]); + lazyState->_ebx = *lazyState->_pEbx ; + } + else if ( ip[1] == 0x75 ) { // MOV ESI, [EBP+XX] + lazyState->_pEsi = PTR_TADDR(lazyState->_ebp + (signed __int8)ip[2]); + lazyState->_esi = *lazyState->_pEsi; + } + else if ( ip[1] == 0x7D ) { // MOV EDI, [EBP+XX] + lazyState->_pEdi = PTR_TADDR(lazyState->_ebp + (signed __int8)ip[2]); + lazyState->_edi = *lazyState->_pEdi; + } + else if ( ip[1] == 0x65 /*ESP*/ || ip[1] == 0x6D /*EBP*/) + goto badOpcode; + + // We don't track the values of EAX,ECX,EDX + + ip += 3; // MOV reg, [reg + imm8] + break; + } + + if ((ip[1] & 0xC7) == 0x85) { // MOV reg, [EBP+imm32] + // gcc sometimes restores callee-preserved registers + // via 'mov reg, [ebp-xx]' instead of 'pop reg' + if ( ip[1] == 0xDD ) { // MOV EBX, [EBP+XXXXXXXX] + lazyState->_pEbx = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast(ip + 2)); + lazyState->_ebx = *lazyState->_pEbx ; + } + else if ( ip[1] == 0xF5 ) { // MOV ESI, [EBP+XXXXXXXX] + lazyState->_pEsi = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast(ip + 2)); + lazyState->_esi = *lazyState->_pEsi; + } + else if ( ip[1] == 0xFD ) { // MOV EDI, [EBP+XXXXXXXX] + lazyState->_pEdi = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast(ip + 2)); + lazyState->_edi = *lazyState->_pEdi; + } + else if ( ip[1] == 0xE5 /*ESP*/ || ip[1] == 0xED /*EBP*/) + goto badOpcode; // Add more registers + + // We don't track the values of EAX,ECX,EDX + + ip += 6; // MOV reg, [reg + imm32] + break; + } + goto move; + + case 0x8D: // LEA + if ((ip[1] & 0x38) == 0x20) { // Don't allow ESP to be updated + if (ip[1] == 0xA5) // LEA ESP, [EBP+XXXX] + ESP = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast(ip + 2)); + else if (ip[1] == 0x65) // LEA ESP, [EBP+XX] + ESP = PTR_TADDR(lazyState->_ebp + (signed __int8) ip[2]); + else if (ip[1] == 0x24 && ip[2] == 0x24) // LEA ESP, [ESP] + ; + else if (ip[1] == 0xa4 && ip[2] == 0x24 && *((DWORD *)(&ip[3])) == 0) // Another form of: LEA ESP, [ESP] + ; + else if (ip[1] == 0x64 && ip[2] == 0x24 && ip[3] == 0) // Yet another form of: LEA ESP, [ESP] (8 bit offset) + ; + else + { + goto badOpcode; + } + } + + datasize = 0; + goto decodeRM; + + case 0xB0: // MOV AL, imm8 + ip += 2; + break; + case 0xB8: // MOV EAX, imm32 + case 0xB9: // MOV ECX, imm32 + case 0xBA: // MOV EDX, imm32 + case 0xBB: // MOV EBX, imm32 + case 0xBE: // MOV ESI, imm32 + case 0xBF: // MOV EDI, imm32 + if(b16bit) + ip += 3; + else + ip += 5; + break; + + case 0xC2: // ret N + { + unsigned __int16 disp = *dac_cast(ip + 1); + ip = PTR_BYTE(*ESP); + lazyState->_pRetAddr = ESP++; + _ASSERTE(disp < 64); // sanity check (although strictly speaking not impossible) + ESP = dac_cast(dac_cast(ESP) + disp); // pop args + goto ret; + } + case 0xC3: // ret + ip = PTR_BYTE(*ESP); + lazyState->_pRetAddr = ESP++; + + ret_with_epilogHelperCheck: + if (epilogCallRet != 0) { // we are returning from a special epilog helper + ip = epilogCallRet; + epilogCallRet = 0; + break; // this does not count toward funCallDepth + } + ret: + if (funCallDepth > 0) + { + --funCallDepth; + if (funCallDepth == 0) + goto done; + } + else + { + // Determine whether given IP resides in JITted code. (It returns nonzero in that case.) + // Use it now to see if we've unwound to managed code yet. + BOOL fFailedReaderLock = FALSE; + BOOL fIsManagedCode = ExecutionManager::IsManagedCode(*lazyState->pRetAddr(), hostCallPreference, &fFailedReaderLock); + if (fFailedReaderLock) + { + // We don't know if we would have been able to find a JIT + // manager, because we couldn't enter the reader lock without + // yielding (and our caller doesn't want us to yield). So abort + // now. + + // Invalidate the lazyState we're returning, so the caller knows + // we aborted before we could fully unwind + lazyState->_pRetAddr = NULL; + return; + } + + if (fIsManagedCode) + goto done; + } + + bFirstCondJmp = TRUE; + break; + + case 0xC6: // MOV r/m8, imm8 + datasize = 1; + goto decodeRM; + + case 0xC7: // MOV r/m32, imm32 + datasize = b16bit?2:4; + goto decodeRM; + + case 0xC9: // leave + ESP = PTR_TADDR(lazyState->_ebp); + lazyState->_pEbp = ESP; + lazyState->_ebp = *ESP++; + ip++; + break; + +#ifndef DACCESS_COMPILE + case 0xCC: + if (IsDebuggerPresent()) + { + OutputDebugStringA("CLR: Invalid breakpoint in a helpermethod frame epilog\n"); + DebugBreak(); + goto again; + } +#ifndef _PREFIX_ + *((int*) 0) = 1; // If you get at this error, it is because yout + // set a breakpoint in a helpermethod frame epilog + // you can't do that unfortunately. Just move it + // into the interior of the method to fix it +#endif // !_PREFIX_ + goto done; +#endif //!DACCESS_COMPILE + + case 0xD0: // shl REG16, 1 + case 0xD1: // shl REG32, 1 + if (0xE4 == ip[1] || 0xE5 == ip[1]) // shl, ESP, 1 or shl EBP, 1 + goto badOpcode; // Doesn't look like valid code + ip += 2; + break; + + case 0xC1: // shl REG32, imm8 + if (0xE4 == ip[1] || 0xE5 == ip[1]) // shl, ESP, imm8 or shl EBP, imm8 + goto badOpcode; // Doesn't look like valid code + ip += 3; + break; + + case 0xD9: // single prefix + if (0xEE == ip[1]) + { + ip += 2; // FLDZ + break; + } + // + // INTENTIONAL FALL THRU + // + case 0xDD: // double prefix + if ((ip[1] & 0xC0) != 0xC0) + { + datasize = 0; // floatop r/m + goto decodeRM; + } + else + { + goto badOpcode; + } + break; + + case 0xf2: // repne prefix + case 0xF3: // rep prefix + ip += 1; + break; + + case 0xA4: // MOVS byte + case 0xA5: // MOVS word/dword + ip += 1; + break; + + case 0xA8: //test AL, imm8 + ip += 2; + break; + case 0xA9: //test EAX, imm32 + ip += 5; + break; + case 0xF6: + if ( (ip[1] & 0x38) == 0x00) // TEST r/m8, imm8 + { + datasize = 1; + goto decodeRM; + } + else + { + goto badOpcode; + } + break; + + case 0xF7: + if ( (ip[1] & 0x38) == 0x00) // TEST r/m32, imm32 + { + datasize = b16bit?2:4; + goto decodeRM; + } + else if ((ip[1] & 0xC8) == 0xC8) //neg reg + { + ip += 2; + break; + } + else if ((ip[1] & 0x30) == 0x30) //div eax by mod/rm + { + datasize = 0; + goto decodeRM; + } + else + { + goto badOpcode; + } + break; + +#ifdef __GNUC__ + case 0x2e: + // Group 2 instruction prefix. + if (ip[1] == 0x0f && ip[2] == 0x1f) + { + // Although not the recommended multi-byte sequence for 9-byte + // nops (the suggestion is to use 0x66 as the prefix), this shows + // up in GCC-optimized code. + ip += 2; + datasize = 0; + goto decodeRM; + } + else + { + goto badOpcode; + } + break; +#endif // __GNUC__ + + default: + badOpcode: + _ASSERTE(!"Bad opcode"); + // FIX what to do here? +#ifndef DACCESS_COMPILE +#ifndef _PREFIX_ + *((unsigned __int8**) 0) = ip; // cause an access violation (Free Build assert) +#endif // !_PREFIX_ +#else + DacNotImpl(); +#endif + goto done; + } + } +done: + _ASSERTE(epilogCallRet == 0); + + // At this point the fields in 'frame' coorespond exactly to the register + // state when the the helper returns to its caller. + lazyState->_esp = dac_cast(ESP); +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif diff --git a/src/vm/i386/jithelp.asm b/src/vm/i386/jithelp.asm new file mode 100644 index 0000000000..ac767287ee --- /dev/null +++ b/src/vm/i386/jithelp.asm @@ -0,0 +1,2574 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. +; See the LICENSE file in the project root for more information. + +; ==++== +; + +; +; ==--== +; *********************************************************************** +; File: JIThelp.asm +; +; *********************************************************************** +; +; *** NOTE: If you make changes to this file, propagate the changes to +; jithelp.s in this directory + +; This contains JITinterface routines that are 100% x86 assembly + + .586 + .model flat + + include asmconstants.inc + + option casemap:none + .code +; +; @TODO Switch to g_ephemeral_low and g_ephemeral_high +; @TODO instead of g_lowest_address, g_highest address +; + +ARGUMENT_REG1 equ ecx +ARGUMENT_REG2 equ edx +g_ephemeral_low TEXTEQU <_g_ephemeral_low> +g_ephemeral_high TEXTEQU <_g_ephemeral_high> +g_lowest_address TEXTEQU <_g_lowest_address> +g_highest_address TEXTEQU <_g_highest_address> +g_card_table TEXTEQU <_g_card_table> +WriteBarrierAssert TEXTEQU <_WriteBarrierAssert@8> +JIT_LLsh TEXTEQU <_JIT_LLsh@0> +JIT_LRsh TEXTEQU <_JIT_LRsh@0> +JIT_LRsz TEXTEQU <_JIT_LRsz@0> +JIT_LMul TEXTEQU <@JIT_LMul@16> +JIT_Dbl2LngOvf TEXTEQU <@JIT_Dbl2LngOvf@8> +JIT_Dbl2Lng TEXTEQU <@JIT_Dbl2Lng@8> +JIT_Dbl2IntSSE2 TEXTEQU <@JIT_Dbl2IntSSE2@8> +JIT_Dbl2LngP4x87 TEXTEQU <@JIT_Dbl2LngP4x87@8> +JIT_Dbl2LngSSE3 TEXTEQU <@JIT_Dbl2LngSSE3@8> +JIT_InternalThrowFromHelper TEXTEQU <@JIT_InternalThrowFromHelper@4> +JIT_WriteBarrierReg_PreGrow TEXTEQU <_JIT_WriteBarrierReg_PreGrow@0> +JIT_WriteBarrierReg_PostGrow TEXTEQU <_JIT_WriteBarrierReg_PostGrow@0> +JIT_TailCall TEXTEQU <_JIT_TailCall@0> +JIT_TailCallLeave TEXTEQU <_JIT_TailCallLeave@0> +JIT_TailCallVSDLeave TEXTEQU <_JIT_TailCallVSDLeave@0> +JIT_TailCallHelper TEXTEQU <_JIT_TailCallHelper@4> +JIT_TailCallReturnFromVSD TEXTEQU <_JIT_TailCallReturnFromVSD@0> + +EXTERN g_ephemeral_low:DWORD +EXTERN g_ephemeral_high:DWORD +EXTERN g_lowest_address:DWORD +EXTERN g_highest_address:DWORD +EXTERN g_card_table:DWORD +ifdef _DEBUG +EXTERN WriteBarrierAssert:PROC +endif ; _DEBUG +EXTERN JIT_InternalThrowFromHelper:PROC +ifdef FEATURE_HIJACK +EXTERN JIT_TailCallHelper:PROC +endif +EXTERN _g_TailCallFrameVptr:DWORD +EXTERN @JIT_FailFast@0:PROC +EXTERN _s_gsCookie:DWORD +EXTERN @JITutil_IsInstanceOfInterface@8:PROC +EXTERN @JITutil_ChkCastInterface@8:PROC +EXTERN @JITutil_IsInstanceOfAny@8:PROC +EXTERN @JITutil_ChkCastAny@8:PROC +ifdef FEATURE_IMPLICIT_TLS +EXTERN _GetThread@0:PROC +endif + +ifdef WRITE_BARRIER_CHECK +; Those global variables are always defined, but should be 0 for Server GC +g_GCShadow TEXTEQU +g_GCShadowEnd TEXTEQU +EXTERN g_GCShadow:DWORD +EXTERN g_GCShadowEnd:DWORD +INVALIDGCVALUE equ 0CCCCCCCDh +endif + +ifdef FEATURE_REMOTING +EXTERN _TransparentProxyStub_CrossContext@0:PROC +EXTERN _InContextTPQuickDispatchAsmStub@0:PROC +endif + +.686P +.XMM +; The following macro is needed because of a MASM issue with the +; movsd mnemonic +; +$movsd MACRO op1, op2 + LOCAL begin_movsd, end_movsd +begin_movsd: + movupd op1, op2 +end_movsd: + org begin_movsd + db 0F2h + org end_movsd +ENDM +.586 + +; The following macro is used to match the JITs +; multi-byte NOP sequence +$nop3 MACRO + db 090h + db 090h + db 090h +ENDM + + + +;*** +;JIT_WriteBarrier* - GC write barrier helper +; +;Purpose: +; Helper calls in order to assign an object to a field +; Enables book-keeping of the GC. +; +;Entry: +; EDX - address of ref-field (assigned to) +; the resp. other reg - RHS of assignment +; +;Exit: +; +;Uses: +; EDX is destroyed. +; +;Exceptions: +; +;******************************************************************************* + +; The code here is tightly coupled with AdjustContextForWriteBarrier, if you change +; anything here, you might need to change AdjustContextForWriteBarrier as well +WriteBarrierHelper MACRO rg + ALIGN 4 + + ;; The entry point is the fully 'safe' one in which we check if EDX (the REF + ;; begin updated) is actually in the GC heap + +PUBLIC _JIT_CheckedWriteBarrier&rg&@0 +_JIT_CheckedWriteBarrier&rg&@0 PROC + ;; check in the REF being updated is in the GC heap + cmp edx, g_lowest_address + jb WriteBarrier_NotInHeap_&rg + cmp edx, g_highest_address + jae WriteBarrier_NotInHeap_&rg + + ;; fall through to unchecked routine + ;; note that its entry point also happens to be aligned + +ifdef WRITE_BARRIER_CHECK + ;; This entry point is used when you know the REF pointer being updated + ;; is in the GC heap +PUBLIC _JIT_DebugWriteBarrier&rg&@0 +_JIT_DebugWriteBarrier&rg&@0: +endif + +ifdef _DEBUG + push edx + push ecx + push eax + + push rg + push edx + call WriteBarrierAssert + + pop eax + pop ecx + pop edx +endif ;_DEBUG + + ; in the !WRITE_BARRIER_CHECK case this will be the move for all + ; addresses in the GCHeap, addresses outside the GCHeap will get + ; taken care of below at WriteBarrier_NotInHeap_&rg + +ifndef WRITE_BARRIER_CHECK + mov DWORD PTR [edx], rg +endif + +ifdef WRITE_BARRIER_CHECK + ; Test dest here so if it is bad AV would happen before we change register/stack + ; status. This makes job of AdjustContextForWriteBarrier easier. + cmp [edx], 0 + ;; ALSO update the shadow GC heap if that is enabled + ; Make ebp into the temporary src register. We need to do this so that we can use ecx + ; in the calculation of the shadow GC address, but still have access to the src register + push ecx + push ebp + mov ebp, rg + + ; if g_GCShadow is 0, don't perform the check + cmp g_GCShadow, 0 + je WriteBarrier_NoShadow_&rg + + mov ecx, edx + sub ecx, g_lowest_address ; U/V + jb WriteBarrier_NoShadow_&rg + add ecx, [g_GCShadow] + cmp ecx, [g_GCShadowEnd] + ja WriteBarrier_NoShadow_&rg + + ; TODO: In Orcas timeframe if we move to P4+ only on X86 we should enable + ; mfence barriers on either side of these two writes to make sure that + ; they stay as close together as possible + + ; edx contains address in GC + ; ecx contains address in ShadowGC + ; ebp temporarially becomes the src register + + ;; When we're writing to the shadow GC heap we want to be careful to minimize + ;; the risk of a race that can occur here where the GC and ShadowGC don't match + mov DWORD PTR [edx], ebp + mov DWORD PTR [ecx], ebp + + ;; We need a scratch register to verify the shadow heap. We also need to + ;; construct a memory barrier so that the write to the shadow heap happens + ;; before the read from the GC heap. We can do both by using SUB/XCHG + ;; rather than PUSH. + ;; + ;; TODO: Should be changed to a push if the mfence described above is added. + ;; + sub esp, 4 + xchg [esp], eax + + ;; As part of our race avoidance (see above) we will now check whether the values + ;; in the GC and ShadowGC match. There is a possibility that we're wrong here but + ;; being overaggressive means we might mask a case where someone updates GC refs + ;; without going to a write barrier, but by its nature it will be indeterminant + ;; and we will find real bugs whereas the current implementation is indeterminant + ;; but only leads to investigations that find that this code is fundamentally flawed + mov eax, [edx] + cmp [ecx], eax + je WriteBarrier_CleanupShadowCheck_&rg + mov [ecx], INVALIDGCVALUE + +WriteBarrier_CleanupShadowCheck_&rg: + pop eax + + jmp WriteBarrier_ShadowCheckEnd_&rg + +WriteBarrier_NoShadow_&rg: + ; If we come here then we haven't written the value to the GC and need to. + ; ebp contains rg + ; We restore ebp/ecx immediately after this, and if either of them is the src + ; register it will regain its value as the src register. + mov DWORD PTR [edx], ebp +WriteBarrier_ShadowCheckEnd_&rg: + pop ebp + pop ecx +endif + cmp rg, g_ephemeral_low + jb WriteBarrier_NotInEphemeral_&rg + cmp rg, g_ephemeral_high + jae WriteBarrier_NotInEphemeral_&rg + + shr edx, 10 + add edx, [g_card_table] + cmp BYTE PTR [edx], 0FFh + jne WriteBarrier_UpdateCardTable_&rg + ret + +WriteBarrier_UpdateCardTable_&rg: + mov BYTE PTR [edx], 0FFh + ret + +WriteBarrier_NotInHeap_&rg: + ; If it wasn't in the heap then we haven't updated the dst in memory yet + mov DWORD PTR [edx], rg +WriteBarrier_NotInEphemeral_&rg: + ; If it is in the GC Heap but isn't in the ephemeral range we've already + ; updated the Heap with the Object*. + ret +_JIT_CheckedWriteBarrier&rg&@0 ENDP + +ENDM + + +;*** +;JIT_ByRefWriteBarrier* - GC write barrier helper +; +;Purpose: +; Helper calls in order to assign an object to a byref field +; Enables book-keeping of the GC. +; +;Entry: +; EDI - address of ref-field (assigned to) +; ESI - address of the data (source) +; ECX can be trashed +; +;Exit: +; +;Uses: +; EDI and ESI are incremented by a DWORD +; +;Exceptions: +; +;******************************************************************************* + +; The code here is tightly coupled with AdjustContextForWriteBarrier, if you change +; anything here, you might need to change AdjustContextForWriteBarrier as well + +ByRefWriteBarrierHelper MACRO + ALIGN 4 +PUBLIC _JIT_ByRefWriteBarrier@0 +_JIT_ByRefWriteBarrier@0 PROC + ;;test for dest in range + mov ecx, [esi] + cmp edi, g_lowest_address + jb ByRefWriteBarrier_NotInHeap + cmp edi, g_highest_address + jae ByRefWriteBarrier_NotInHeap + +ifndef WRITE_BARRIER_CHECK + ;;write barrier + mov [edi],ecx +endif + +ifdef WRITE_BARRIER_CHECK + ; Test dest here so if it is bad AV would happen before we change register/stack + ; status. This makes job of AdjustContextForWriteBarrier easier. + cmp [edi], 0 + + ;; ALSO update the shadow GC heap if that is enabled + + ; use edx for address in GC Shadow, + push edx + + ;if g_GCShadow is 0, don't do the update + cmp g_GCShadow, 0 + je ByRefWriteBarrier_NoShadow + + mov edx, edi + sub edx, g_lowest_address ; U/V + jb ByRefWriteBarrier_NoShadow + add edx, [g_GCShadow] + cmp edx, [g_GCShadowEnd] + ja ByRefWriteBarrier_NoShadow + + ; TODO: In Orcas timeframe if we move to P4+ only on X86 we should enable + ; mfence barriers on either side of these two writes to make sure that + ; they stay as close together as possible + + ; edi contains address in GC + ; edx contains address in ShadowGC + ; ecx is the value to assign + + ;; When we're writing to the shadow GC heap we want to be careful to minimize + ;; the risk of a race that can occur here where the GC and ShadowGC don't match + mov DWORD PTR [edi], ecx + mov DWORD PTR [edx], ecx + + ;; We need a scratch register to verify the shadow heap. We also need to + ;; construct a memory barrier so that the write to the shadow heap happens + ;; before the read from the GC heap. We can do both by using SUB/XCHG + ;; rather than PUSH. + ;; + ;; TODO: Should be changed to a push if the mfence described above is added. + ;; + sub esp, 4 + xchg [esp], eax + + ;; As part of our race avoidance (see above) we will now check whether the values + ;; in the GC and ShadowGC match. There is a possibility that we're wrong here but + ;; being overaggressive means we might mask a case where someone updates GC refs + ;; without going to a write barrier, but by its nature it will be indeterminant + ;; and we will find real bugs whereas the current implementation is indeterminant + ;; but only leads to investigations that find that this code is fundamentally flawed + + mov eax, [edi] + cmp [edx], eax + je ByRefWriteBarrier_CleanupShadowCheck + mov [edx], INVALIDGCVALUE +ByRefWriteBarrier_CleanupShadowCheck: + pop eax + jmp ByRefWriteBarrier_ShadowCheckEnd + +ByRefWriteBarrier_NoShadow: + ; If we come here then we haven't written the value to the GC and need to. + mov DWORD PTR [edi], ecx + +ByRefWriteBarrier_ShadowCheckEnd: + pop edx +endif + ;;test for *src in ephemeral segement + cmp ecx, g_ephemeral_low + jb ByRefWriteBarrier_NotInEphemeral + cmp ecx, g_ephemeral_high + jae ByRefWriteBarrier_NotInEphemeral + + mov ecx, edi + add esi,4 + add edi,4 + + shr ecx, 10 + add ecx, [g_card_table] + cmp byte ptr [ecx], 0FFh + jne ByRefWriteBarrier_UpdateCardTable + ret +ByRefWriteBarrier_UpdateCardTable: + mov byte ptr [ecx], 0FFh + ret + +ByRefWriteBarrier_NotInHeap: + ; If it wasn't in the heap then we haven't updated the dst in memory yet + mov [edi],ecx +ByRefWriteBarrier_NotInEphemeral: + ; If it is in the GC Heap but isn't in the ephemeral range we've already + ; updated the Heap with the Object*. + add esi,4 + add edi,4 + ret +_JIT_ByRefWriteBarrier@0 ENDP +ENDM + +;******************************************************************************* +; Write barrier wrappers with fcall calling convention +; +UniversalWriteBarrierHelper MACRO name + ALIGN 4 +PUBLIC @JIT_&name&@8 +@JIT_&name&@8 PROC + mov eax,edx + mov edx,ecx + jmp _JIT_&name&EAX@0 +@JIT_&name&@8 ENDP +ENDM + +; WriteBarrierStart and WriteBarrierEnd are used to determine bounds of +; WriteBarrier functions so can determine if got AV in them. +; +PUBLIC _JIT_WriteBarrierStart@0 +_JIT_WriteBarrierStart@0 PROC +ret +_JIT_WriteBarrierStart@0 ENDP + +ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS +; Only define these if we're using the ASM GC write barriers; if this flag is not defined, +; we'll use C++ versions of these write barriers. +UniversalWriteBarrierHelper +UniversalWriteBarrierHelper +endif + +WriteBarrierHelper +WriteBarrierHelper +WriteBarrierHelper +WriteBarrierHelper +WriteBarrierHelper +WriteBarrierHelper + +ByRefWriteBarrierHelper + +PUBLIC _JIT_WriteBarrierLast@0 +_JIT_WriteBarrierLast@0 PROC +ret +_JIT_WriteBarrierLast@0 ENDP + +; This is the first function outside the "keep together range". Used by BBT scripts. +PUBLIC _JIT_WriteBarrierEnd@0 +_JIT_WriteBarrierEnd@0 PROC +ret +_JIT_WriteBarrierEnd@0 ENDP + +;*********************************************************************/ +; In cases where we support it we have an optimized GC Poll callback. Normall (when we're not trying to +; suspend for GC, the CORINFO_HELP_POLL_GC helper points to this nop routine. When we're ready to suspend +; for GC, we whack the Jit Helper table entry to point to the real helper. When we're done with GC we +; whack it back. +PUBLIC @JIT_PollGC_Nop@0 +@JIT_PollGC_Nop@0 PROC +ret +@JIT_PollGC_Nop@0 ENDP + +;*********************************************************************/ +;llshl - long shift left +; +;Purpose: +; Does a Long Shift Left (signed and unsigned are identical) +; Shifts a long left any number of bits. +; +; NOTE: This routine has been adapted from the Microsoft CRTs. +; +;Entry: +; EDX:EAX - long value to be shifted +; ECX - number of bits to shift by +; +;Exit: +; EDX:EAX - shifted value +; + ALIGN 16 +PUBLIC JIT_LLsh +JIT_LLsh PROC +; Handle shifts of between bits 0 and 31 + cmp ecx, 32 + jae short LLshMORE32 + shld edx,eax,cl + shl eax,cl + ret +; Handle shifts of between bits 32 and 63 +LLshMORE32: + ; The x86 shift instructions only use the lower 5 bits. + mov edx,eax + xor eax,eax + shl edx,cl + ret +JIT_LLsh ENDP + + +;*********************************************************************/ +;LRsh - long shift right +; +;Purpose: +; Does a signed Long Shift Right +; Shifts a long right any number of bits. +; +; NOTE: This routine has been adapted from the Microsoft CRTs. +; +;Entry: +; EDX:EAX - long value to be shifted +; ECX - number of bits to shift by +; +;Exit: +; EDX:EAX - shifted value +; + ALIGN 16 +PUBLIC JIT_LRsh +JIT_LRsh PROC +; Handle shifts of between bits 0 and 31 + cmp ecx, 32 + jae short LRshMORE32 + shrd eax,edx,cl + sar edx,cl + ret +; Handle shifts of between bits 32 and 63 +LRshMORE32: + ; The x86 shift instructions only use the lower 5 bits. + mov eax,edx + sar edx, 31 + sar eax,cl + ret +JIT_LRsh ENDP + + +;*********************************************************************/ +; LRsz: +;Purpose: +; Does a unsigned Long Shift Right +; Shifts a long right any number of bits. +; +; NOTE: This routine has been adapted from the Microsoft CRTs. +; +;Entry: +; EDX:EAX - long value to be shifted +; ECX - number of bits to shift by +; +;Exit: +; EDX:EAX - shifted value +; + ALIGN 16 +PUBLIC JIT_LRsz +JIT_LRsz PROC +; Handle shifts of between bits 0 and 31 + cmp ecx, 32 + jae short LRszMORE32 + shrd eax,edx,cl + shr edx,cl + ret +; Handle shifts of between bits 32 and 63 +LRszMORE32: + ; The x86 shift instructions only use the lower 5 bits. + mov eax,edx + xor edx,edx + shr eax,cl + ret +JIT_LRsz ENDP + +;*********************************************************************/ +; LMul: +;Purpose: +; Does a long multiply (same for signed/unsigned) +; +; NOTE: This routine has been adapted from the Microsoft CRTs. +; +;Entry: +; Parameters are passed on the stack: +; 1st pushed: multiplier (QWORD) +; 2nd pushed: multiplicand (QWORD) +; +;Exit: +; EDX:EAX - product of multiplier and multiplicand +; + ALIGN 16 +PUBLIC JIT_LMul +JIT_LMul PROC + +; AHI, BHI : upper 32 bits of A and B +; ALO, BLO : lower 32 bits of A and B +; +; ALO * BLO +; ALO * BHI +; + BLO * AHI +; --------------------- + + mov eax,[esp + 8] ; AHI + mov ecx,[esp + 16] ; BHI + or ecx,eax ;test for both hiwords zero. + mov ecx,[esp + 12] ; BLO + jnz LMul_hard ;both are zero, just mult ALO and BLO + + mov eax,[esp + 4] + mul ecx + + ret 16 ; callee restores the stack + +LMul_hard: + push ebx + + mul ecx ;eax has AHI, ecx has BLO, so AHI * BLO + mov ebx,eax ;save result + + mov eax,[esp + 8] ; ALO + mul dword ptr [esp + 20] ;ALO * BHI + add ebx,eax ;ebx = ((ALO * BHI) + (AHI * BLO)) + + mov eax,[esp + 8] ; ALO ;ecx = BLO + mul ecx ;so edx:eax = ALO*BLO + add edx,ebx ;now edx has all the LO*HI stuff + + pop ebx + + ret 16 ; callee restores the stack + +JIT_LMul ENDP + +;*********************************************************************/ +; JIT_Dbl2LngOvf + +;Purpose: +; converts a double to a long truncating toward zero (C semantics) +; with check for overflow +; +; uses stdcall calling conventions +; +PUBLIC JIT_Dbl2LngOvf +JIT_Dbl2LngOvf PROC + fnclex + fld qword ptr [esp+4] + push ecx + push ecx + fstp qword ptr [esp] + call JIT_Dbl2Lng + mov ecx,eax + fnstsw ax + test ax,01h + jnz Dbl2LngOvf_throw + mov eax,ecx + ret 8 + +Dbl2LngOvf_throw: + mov ECX, CORINFO_OverflowException_ASM + call JIT_InternalThrowFromHelper + ret 8 +JIT_Dbl2LngOvf ENDP + +;*********************************************************************/ +; JIT_Dbl2Lng + +;Purpose: +; converts a double to a long truncating toward zero (C semantics) +; +; uses stdcall calling conventions +; +; note that changing the rounding mode is very expensive. This +; routine basiclly does the truncation sematics without changing +; the rounding mode, resulting in a win. +; +PUBLIC JIT_Dbl2Lng +JIT_Dbl2Lng PROC + fld qword ptr[ESP+4] ; fetch arg + lea ecx,[esp-8] + sub esp,16 ; allocate frame + and ecx,-8 ; align pointer on boundary of 8 + fld st(0) ; duplciate top of stack + fistp qword ptr[ecx] ; leave arg on stack, also save in temp + fild qword ptr[ecx] ; arg, round(arg) now on stack + mov edx,[ecx+4] ; high dword of integer + mov eax,[ecx] ; low dword of integer + test eax,eax + je integer_QNaN_or_zero + +arg_is_not_integer_QNaN: + fsubp st(1),st ; TOS=d-round(d), + ; { st(1)=st(1)-st & pop ST } + test edx,edx ; what's sign of integer + jns positive + ; number is negative + ; dead cycle + ; dead cycle + fstp dword ptr[ecx] ; result of subtraction + mov ecx,[ecx] ; dword of difference(single precision) + add esp,16 + xor ecx,80000000h + add ecx,7fffffffh ; if difference>0 then increment integer + adc eax,0 ; inc eax (add CARRY flag) + adc edx,0 ; propagate carry flag to upper bits + ret 8 + +positive: + fstp dword ptr[ecx] ;17-18 ; result of subtraction + mov ecx,[ecx] ; dword of difference (single precision) + add esp,16 + add ecx,7fffffffh ; if difference<0 then decrement integer + sbb eax,0 ; dec eax (subtract CARRY flag) + sbb edx,0 ; propagate carry flag to upper bits + ret 8 + +integer_QNaN_or_zero: + test edx,7fffffffh + jnz arg_is_not_integer_QNaN + fstp st(0) ;; pop round(arg) + fstp st(0) ;; arg + add esp,16 + ret 8 +JIT_Dbl2Lng ENDP + +;*********************************************************************/ +; JIT_Dbl2LngP4x87 + +;Purpose: +; converts a double to a long truncating toward zero (C semantics) +; +; uses stdcall calling conventions +; +; This code is faster on a P4 than the Dbl2Lng code above, but is +; slower on a PIII. Hence we choose this code when on a P4 or above. +; +PUBLIC JIT_Dbl2LngP4x87 +JIT_Dbl2LngP4x87 PROC +arg1 equ <[esp+0Ch]> + + sub esp, 8 ; get some local space + + fld qword ptr arg1 ; fetch arg + fnstcw word ptr arg1 ; store FPCW + movzx eax, word ptr arg1 ; zero extend - wide + or ah, 0Ch ; turn on OE and DE flags + mov dword ptr [esp], eax ; store new FPCW bits + fldcw word ptr [esp] ; reload FPCW with new bits + fistp qword ptr [esp] ; convert + mov eax, dword ptr [esp] ; reload FP result + mov edx, dword ptr [esp+4] ; + fldcw word ptr arg1 ; reload original FPCW value + + add esp, 8 ; restore stack + + ret 8 +JIT_Dbl2LngP4x87 ENDP + +;*********************************************************************/ +; JIT_Dbl2LngSSE3 + +;Purpose: +; converts a double to a long truncating toward zero (C semantics) +; +; uses stdcall calling conventions +; +; This code is faster than the above P4 x87 code for Intel processors +; equal or later than Core2 and Atom that have SSE3 support +; +.686P +.XMM +PUBLIC JIT_Dbl2LngSSE3 +JIT_Dbl2LngSSE3 PROC +arg1 equ <[esp+0Ch]> + + sub esp, 8 ; get some local space + + fld qword ptr arg1 ; fetch arg + fisttp qword ptr [esp] ; convert + mov eax, dword ptr [esp] ; reload FP result + mov edx, dword ptr [esp+4] + + add esp, 8 ; restore stack + + ret 8 +JIT_Dbl2LngSSE3 ENDP +.586 + +;*********************************************************************/ +; JIT_Dbl2IntSSE2 + +;Purpose: +; converts a double to a long truncating toward zero (C semantics) +; +; uses stdcall calling conventions +; +; This code is even faster than the P4 x87 code for Dbl2LongP4x87, +; but only returns a 32 bit value (only good for int). +; +.686P +.XMM +PUBLIC JIT_Dbl2IntSSE2 +JIT_Dbl2IntSSE2 PROC + $movsd xmm0, [esp+4] + cvttsd2si eax, xmm0 + ret 8 +JIT_Dbl2IntSSE2 ENDP +.586 + + +;*********************************************************************/ +; This is the small write barrier thunk we use when we know the +; ephemeral generation is higher in memory than older generations. +; The 0x0F0F0F0F values are bashed by the two functions above. +; This the generic version - wherever the code says ECX, +; the specific register is patched later into a copy +; Note: do not replace ECX by EAX - there is a smaller encoding for +; the compares just for EAX, which won't work for other registers. +; +; READ THIS!!!!!! +; it is imperative that the addresses of of the values that we overwrite +; (card table, ephemeral region ranges, etc) are naturally aligned since +; there are codepaths that will overwrite these values while the EE is running. +; +PUBLIC JIT_WriteBarrierReg_PreGrow +JIT_WriteBarrierReg_PreGrow PROC + mov DWORD PTR [edx], ecx + cmp ecx, 0F0F0F0F0h + jb NoWriteBarrierPre + + shr edx, 10 + nop ; padding for alignment of constant + cmp byte ptr [edx+0F0F0F0F0h], 0FFh + jne WriteBarrierPre +NoWriteBarrierPre: + ret + nop ; padding for alignment of constant + nop ; padding for alignment of constant +WriteBarrierPre: + mov byte ptr [edx+0F0F0F0F0h], 0FFh + ret +JIT_WriteBarrierReg_PreGrow ENDP + +;*********************************************************************/ +; This is the larger write barrier thunk we use when we know that older +; generations may be higher in memory than the ephemeral generation +; The 0x0F0F0F0F values are bashed by the two functions above. +; This the generic version - wherever the code says ECX, +; the specific register is patched later into a copy +; Note: do not replace ECX by EAX - there is a smaller encoding for +; the compares just for EAX, which won't work for other registers. +; NOTE: we need this aligned for our validation to work properly + ALIGN 4 +PUBLIC JIT_WriteBarrierReg_PostGrow +JIT_WriteBarrierReg_PostGrow PROC + mov DWORD PTR [edx], ecx + cmp ecx, 0F0F0F0F0h + jb NoWriteBarrierPost + cmp ecx, 0F0F0F0F0h + jae NoWriteBarrierPost + + shr edx, 10 + nop ; padding for alignment of constant + cmp byte ptr [edx+0F0F0F0F0h], 0FFh + jne WriteBarrierPost +NoWriteBarrierPost: + ret + nop ; padding for alignment of constant + nop ; padding for alignment of constant +WriteBarrierPost: + mov byte ptr [edx+0F0F0F0F0h], 0FFh + ret +JIT_WriteBarrierReg_PostGrow ENDP + +;*********************************************************************/ +; + + ; a fake virtual stub dispatch register indirect callsite + $nop3 + call dword ptr [eax] + + +PUBLIC JIT_TailCallReturnFromVSD +JIT_TailCallReturnFromVSD: +ifdef _DEBUG + nop ; blessed callsite +endif + call VSDHelperLabel ; keep call-ret count balanced. +VSDHelperLabel: + +; Stack at this point : +; ... +; m_ReturnAddress +; m_regs +; m_CallerAddress +; m_pThread +; vtbl +; GSCookie +; &VSDHelperLabel +OffsetOfTailCallFrame = 8 + +; ebx = pThread + +ifdef _DEBUG + mov esi, _s_gsCookie ; GetProcessGSCookie() + cmp dword ptr [esp+OffsetOfTailCallFrame-SIZEOF_GSCookie], esi + je TailCallFrameGSCookieIsValid + call @JIT_FailFast@0 + TailCallFrameGSCookieIsValid: +endif + ; remove the padding frame from the chain + mov esi, dword ptr [esp+OffsetOfTailCallFrame+4] ; esi = TailCallFrame::m_Next + mov dword ptr [ebx + Thread_m_pFrame], esi + + ; skip the frame + add esp, 20 ; &VSDHelperLabel, GSCookie, vtbl, m_Next, m_CallerAddress + + pop edi ; restore callee saved registers + pop esi + pop ebx + pop ebp + + ret ; return to m_ReturnAddress + +;------------------------------------------------------------------------------ +; + +PUBLIC JIT_TailCall +JIT_TailCall PROC + +; the stack layout at this point is: +; +; ebp+8+4*nOldStackArgs <- end of argument destination +; ... ... +; ebp+8+ old args (size is nOldStackArgs) +; ... ... +; ebp+8 <- start of argument destination +; ebp+4 ret addr +; ebp+0 saved ebp +; ebp-c saved ebx, esi, edi (if have callee saved regs = 1) +; +; other stuff (local vars) in the jitted callers' frame +; +; esp+20+4*nNewStackArgs <- end of argument source +; ... ... +; esp+20+ new args (size is nNewStackArgs) to be passed to the target of the tail-call +; ... ... +; esp+20 <- start of argument source +; esp+16 nOldStackArgs +; esp+12 nNewStackArgs +; esp+8 flags (1 = have callee saved regs, 2 = virtual stub dispatch) +; esp+4 target addr +; esp+0 retaddr +; +; If you change this function, make sure you update code:TailCallStubManager as well. + +RetAddr equ 0 +TargetAddr equ 4 +nNewStackArgs equ 12 +nOldStackArgs equ 16 +NewArgs equ 20 + +; extra space is incremented as we push things on the stack along the way +ExtraSpace = 0 + + call _GetThread@0; eax = Thread* + push eax ; Thread* + + ; save ArgumentRegisters + push ecx + push edx + +ExtraSpace = 12 ; pThread, ecx, edx + +ifdef FEATURE_HIJACK + ; Make sure that the EE does have the return address patched. So we can move it around. + test dword ptr [eax+Thread_m_State], TS_Hijacked_ASM + jz NoHijack + + ; JIT_TailCallHelper(Thread *) + push eax + call JIT_TailCallHelper ; this is __stdcall + +NoHijack: +endif + + mov edx, dword ptr [esp+ExtraSpace+JIT_TailCall_StackOffsetToFlags] ; edx = flags + + mov eax, dword ptr [esp+ExtraSpace+nOldStackArgs] ; eax = nOldStackArgs + mov ecx, dword ptr [esp+ExtraSpace+nNewStackArgs] ; ecx = nNewStackArgs + + ; restore callee saved registers + ; @TODO : esp based - doesnt work with localloc + test edx, 1 + jz NoCalleeSaveRegisters + + mov edi, dword ptr [ebp-4] ; restore edi + mov esi, dword ptr [ebp-8] ; restore esi + mov ebx, dword ptr [ebp-12] ; restore ebx + +NoCalleeSaveRegisters: + + push dword ptr [ebp+4] ; save the original return address for later + push edi + push esi + +ExtraSpace = 24 ; pThread, ecx, edx, orig retaddr, edi, esi +CallersEsi = 0 +CallersEdi = 4 +OrigRetAddr = 8 +pThread = 20 + + lea edi, [ebp+8+4*eax] ; edi = the end of argument destination + lea esi, [esp+ExtraSpace+NewArgs+4*ecx] ; esi = the end of argument source + + mov ebp, dword ptr [ebp] ; restore ebp (do not use ebp as scratch register to get a good stack trace in debugger) + + test edx, 2 + jnz VSDTailCall + + ; copy the arguments to the final destination + test ecx, ecx + jz ArgumentsCopied +ArgumentCopyLoop: + ; At this point, this is the value of the registers : + ; edi = end of argument dest + ; esi = end of argument source + ; ecx = nNewStackArgs + mov eax, dword ptr [esi-4] + sub edi, 4 + sub esi, 4 + mov dword ptr [edi], eax + dec ecx + jnz ArgumentCopyLoop +ArgumentsCopied: + + ; edi = the start of argument destination + + mov eax, dword ptr [esp+4+4] ; return address + mov ecx, dword ptr [esp+ExtraSpace+TargetAddr] ; target address + + mov dword ptr [edi-4], eax ; return address + mov dword ptr [edi-8], ecx ; target address + + lea eax, [edi-8] ; new value for esp + + pop esi + pop edi + pop ecx ; skip original return address + pop edx + pop ecx + + mov esp, eax + +PUBLIC JIT_TailCallLeave ; add a label here so that TailCallStubManager can access it +JIT_TailCallLeave: + retn ; Will branch to targetAddr. This matches the + ; "call" done by JITted code, keeping the + ; call-ret count balanced. + + ;---------------------------------------------------------------------- +VSDTailCall: + ;---------------------------------------------------------------------- + + ; For the Virtual Stub Dispatch, we create a fake callsite to fool + ; the callsite probes. In order to create the call site, we need to insert TailCallFrame + ; if we do not have one already. + ; + ; ecx = nNewStackArgs + ; esi = the end of argument source + ; edi = the end of argument destination + ; + ; The stub has pushed the following onto the stack at this point : + ; pThread, ecx, edx, orig retaddr, edi, esi + + + cmp dword ptr [esp+OrigRetAddr], JIT_TailCallReturnFromVSD + jz VSDTailCallFrameInserted_DoSlideUpArgs ; There is an exiting TailCallFrame that can be reused + + ; try to allocate space for the frame / check whether there is enough space + ; If there is sufficient space, we will setup the frame and then slide + ; the arguments up the stack. Else, we first need to slide the arguments + ; down the stack to make space for the TailCallFrame + sub edi, (SIZEOF_GSCookie + SIZEOF_TailCallFrame) + cmp edi, esi + jae VSDSpaceForFrameChecked + + ; There is not sufficient space to wedge in the TailCallFrame without + ; overwriting the new arguments. + ; We need to allocate the extra space on the stack, + ; and slide down the new arguments + + mov eax, esi + sub eax, edi + sub esp, eax + + mov eax, ecx ; to subtract the size of arguments + mov edx, ecx ; for counter + + neg eax + + ; copy down the arguments to the final destination, need to copy all temporary storage as well + add edx, (ExtraSpace+NewArgs)/4 + + lea esi, [esi+4*eax-(ExtraSpace+NewArgs)] + lea edi, [edi+4*eax-(ExtraSpace+NewArgs)] + +VSDAllocFrameCopyLoop: + mov eax, dword ptr [esi] + mov dword ptr [edi], eax + add esi, 4 + add edi, 4 + dec edx + jnz VSDAllocFrameCopyLoop + + ; the argument source and destination are same now + mov esi, edi + +VSDSpaceForFrameChecked: + + ; At this point, we have enough space on the stack for the TailCallFrame, + ; and we may already have slided down the arguments + + mov eax, _s_gsCookie ; GetProcessGSCookie() + mov dword ptr [edi], eax ; set GSCookie + mov eax, _g_TailCallFrameVptr ; vptr + mov edx, dword ptr [esp+OrigRetAddr] ; orig return address + mov dword ptr [edi+SIZEOF_GSCookie], eax ; TailCallFrame::vptr + mov dword ptr [edi+SIZEOF_GSCookie+28], edx ; TailCallFrame::m_ReturnAddress + + mov eax, dword ptr [esp+CallersEdi] ; restored edi + mov edx, dword ptr [esp+CallersEsi] ; restored esi + mov dword ptr [edi+SIZEOF_GSCookie+12], eax ; TailCallFrame::m_regs::edi + mov dword ptr [edi+SIZEOF_GSCookie+16], edx ; TailCallFrame::m_regs::esi + mov dword ptr [edi+SIZEOF_GSCookie+20], ebx ; TailCallFrame::m_regs::ebx + mov dword ptr [edi+SIZEOF_GSCookie+24], ebp ; TailCallFrame::m_regs::ebp + + mov ebx, dword ptr [esp+pThread] ; ebx = pThread + + mov eax, dword ptr [ebx+Thread_m_pFrame] + lea edx, [edi+SIZEOF_GSCookie] + mov dword ptr [edi+SIZEOF_GSCookie+4], eax ; TailCallFrame::m_pNext + mov dword ptr [ebx+Thread_m_pFrame], edx ; hook the new frame into the chain + + ; setup ebp chain + lea ebp, [edi+SIZEOF_GSCookie+24] ; TailCallFrame::m_regs::ebp + + ; Do not copy arguments again if they are in place already + ; Otherwise, we will need to slide the new arguments up the stack + cmp esi, edi + jne VSDTailCallFrameInserted_DoSlideUpArgs + + ; At this point, we must have already previously slided down the new arguments, + ; or the TailCallFrame is a perfect fit + ; set the caller address + mov edx, dword ptr [esp+ExtraSpace+RetAddr] ; caller address + mov dword ptr [edi+SIZEOF_GSCookie+8], edx ; TailCallFrame::m_CallerAddress + + ; adjust edi as it would by copying + neg ecx + lea edi, [edi+4*ecx] + + jmp VSDArgumentsCopied + +VSDTailCallFrameInserted_DoSlideUpArgs: + ; set the caller address + mov edx, dword ptr [esp+ExtraSpace+RetAddr] ; caller address + mov dword ptr [edi+SIZEOF_GSCookie+8], edx ; TailCallFrame::m_CallerAddress + + ; copy the arguments to the final destination + test ecx, ecx + jz VSDArgumentsCopied +VSDArgumentCopyLoop: + mov eax, dword ptr [esi-4] + sub edi, 4 + sub esi, 4 + mov dword ptr [edi], eax + dec ecx + jnz VSDArgumentCopyLoop +VSDArgumentsCopied: + + ; edi = the start of argument destination + + mov ecx, dword ptr [esp+ExtraSpace+TargetAddr] ; target address + + mov dword ptr [edi-4], JIT_TailCallReturnFromVSD ; return address + mov dword ptr [edi-12], ecx ; address of indirection cell + mov ecx, [ecx] + mov dword ptr [edi-8], ecx ; target address + + ; skip original return address and saved esi, edi + add esp, 12 + + pop edx + pop ecx + + lea esp, [edi-12] ; new value for esp + pop eax + +PUBLIC JIT_TailCallVSDLeave ; add a label here so that TailCallStubManager can access it +JIT_TailCallVSDLeave: + retn ; Will branch to targetAddr. This matches the + ; "call" done by JITted code, keeping the + ; call-ret count balanced. + +JIT_TailCall ENDP + + +;------------------------------------------------------------------------------ + +; HCIMPL2_VV(float, JIT_FltRem, float dividend, float divisor) +@JIT_FltRem@8 proc public + fld dword ptr [esp+4] ; divisor + fld dword ptr [esp+8] ; dividend +fremloop: + fprem + fstsw ax + fwait + sahf + jp fremloop ; Continue while the FPU status bit C2 is set + fxch ; swap, so divisor is on top and result is in st(1) + fstp ST(0) ; Pop the divisor from the FP stack + retn 8 ; Return value is in st(0) +@JIT_FltRem@8 endp + +; HCIMPL2_VV(float, JIT_DblRem, float dividend, float divisor) +@JIT_DblRem@16 proc public + fld qword ptr [esp+4] ; divisor + fld qword ptr [esp+12] ; dividend +fremloopd: + fprem + fstsw ax + fwait + sahf + jp fremloopd ; Continue while the FPU status bit C2 is set + fxch ; swap, so divisor is on top and result is in st(1) + fstp ST(0) ; Pop the divisor from the FP stack + retn 16 ; Return value is in st(0) +@JIT_DblRem@16 endp + +;------------------------------------------------------------------------------ + +g_SystemInfo TEXTEQU +g_SpinConstants TEXTEQU +g_pSyncTable TEXTEQU +JITutil_MonEnterWorker TEXTEQU <@JITutil_MonEnterWorker@4> +JITutil_MonReliableEnter TEXTEQU <@JITutil_MonReliableEnter@8> +JITutil_MonTryEnter TEXTEQU <@JITutil_MonTryEnter@12> +JITutil_MonExitWorker TEXTEQU <@JITutil_MonExitWorker@4> +JITutil_MonContention TEXTEQU <@JITutil_MonContention@4> +JITutil_MonReliableContention TEXTEQU <@JITutil_MonReliableContention@8> +JITutil_MonSignal TEXTEQU <@JITutil_MonSignal@4> +JIT_InternalThrow TEXTEQU <@JIT_InternalThrow@4> +EXTRN g_SystemInfo:BYTE +EXTRN g_SpinConstants:BYTE +EXTRN g_pSyncTable:DWORD +EXTRN JITutil_MonEnterWorker:PROC +EXTRN JITutil_MonReliableEnter:PROC +EXTRN JITutil_MonTryEnter:PROC +EXTRN JITutil_MonExitWorker:PROC +EXTRN JITutil_MonContention:PROC +EXTRN JITutil_MonReliableContention:PROC +EXTRN JITutil_MonSignal:PROC +EXTRN JIT_InternalThrow:PROC + +ifdef MON_DEBUG +ifdef TRACK_SYNC +EnterSyncHelper TEXTEQU <_EnterSyncHelper@8> +LeaveSyncHelper TEXTEQU <_LeaveSyncHelper@8> +EXTRN EnterSyncHelper:PROC +EXTRN LeaveSyncHelper:PROC +endif ;TRACK_SYNC +endif ;MON_DEBUG + +; The following macro is needed because MASM returns +; "instruction prefix not allowed" error message for +; rep nop mnemonic +$repnop MACRO + db 0F3h + db 090h +ENDM + +; Safe ThreadAbort does not abort a thread if it is running finally or has lock counts. +; At the time we call Monitor.Enter, we initiate the abort if we can. +; We do not need to do the same for Monitor.Leave, since most of time, Monitor.Leave is called +; during finally. + +;********************************************************************** +; This is a frameless helper for entering a monitor on a object. +; The object is in ARGUMENT_REG1. This tries the normal case (no +; blocking or object allocation) in line and calls a framed helper +; for the other cases. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonEnterWorker@4 proc public + ; Initialize delay value for retry with exponential backoff + push ebx + mov ebx, dword ptr g_SpinConstants+SpinConstants_dwInitialDuration + + ; We need yet another register to avoid refetching the thread object + push esi + + ; Check if the instance is NULL. + test ARGUMENT_REG1, ARGUMENT_REG1 + jz MonEnterFramedLockHelper + + call _GetThread@0 + mov esi,eax + + ; Check if we can abort here + mov eax, [esi+Thread_m_State] + and eax, TS_CatchAtSafePoint_ASM + jz MonEnterRetryThinLock + ; go through the slow code path to initiate ThreadAbort. + jmp MonEnterFramedLockHelper + +MonEnterRetryThinLock: + ; Fetch the object header dword + mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM] + + ; Check whether we have the "thin lock" layout, the lock is free and the spin lock bit not set + ; SBLK_COMBINED_MASK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + SBLK_MASK_LOCK_THREADID + SBLK_MASK_LOCK_RECLEVEL + test eax, SBLK_COMBINED_MASK_ASM + jnz MonEnterNeedMoreTests + + ; Everything is fine - get the thread id to store in the lock + mov edx, [esi+Thread_m_ThreadId] + + ; If the thread id is too large, we need a syncblock for sure + cmp edx, SBLK_MASK_LOCK_THREADID_ASM + ja MonEnterFramedLockHelper + + ; We want to store a new value with the current thread id set in the low 10 bits + or edx,eax + lock cmpxchg dword ptr [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx + jnz MonEnterPrepareToWaitThinLock + + ; Everything went fine and we're done + add [esi+Thread_m_dwLockCount],1 + pop esi + pop ebx + ret + +MonEnterNeedMoreTests: + ; Ok, it's not the simple case - find out which case it is + test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM + jnz MonEnterHaveHashOrSyncBlockIndex + + ; The header is transitioning or the lock - treat this as if the lock was taken + test eax, BIT_SBLK_SPIN_LOCK_ASM + jnz MonEnterPrepareToWaitThinLock + + ; Here we know we have the "thin lock" layout, but the lock is not free. + ; It could still be the recursion case - compare the thread id to check + mov edx,eax + and edx, SBLK_MASK_LOCK_THREADID_ASM + cmp edx, [esi+Thread_m_ThreadId] + jne MonEnterPrepareToWaitThinLock + + ; Ok, the thread id matches, it's the recursion case. + ; Bump up the recursion level and check for overflow + lea edx, [eax+SBLK_LOCK_RECLEVEL_INC_ASM] + test edx, SBLK_MASK_LOCK_RECLEVEL_ASM + jz MonEnterFramedLockHelper + + ; Try to put the new recursion level back. If the header was changed in the meantime, + ; we need a full retry, because the layout could have changed. + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx + jnz MonEnterRetryHelperThinLock + + ; Everything went fine and we're done + pop esi + pop ebx + ret + +MonEnterPrepareToWaitThinLock: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonEnterFramedLockHelper + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax, ebx +MonEnterdelayLoopThinLock: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonEnterdelayLoopThinLock + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonEnterRetryHelperThinLock + + jmp MonEnterFramedLockHelper + +MonEnterRetryHelperThinLock: + jmp MonEnterRetryThinLock + +MonEnterHaveHashOrSyncBlockIndex: + ; If we have a hash code already, we need to create a sync block + test eax, BIT_SBLK_IS_HASHCODE_ASM + jnz MonEnterFramedLockHelper + + ; Ok, we have a sync block index - just and out the top bits and grab the syncblock index + and eax, MASK_SYNCBLOCKINDEX_ASM + + ; Get the sync block pointer. + mov ARGUMENT_REG2, dword ptr g_pSyncTable + mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock] + + ; Check if the sync block has been allocated. + test ARGUMENT_REG2, ARGUMENT_REG2 + jz MonEnterFramedLockHelper + + ; Get a pointer to the lock object. + lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor] + + ; Attempt to acquire the lock. +MonEnterRetrySyncBlock: + mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld] + test eax,eax + jne MonEnterHaveWaiters + + ; Common case, lock isn't held and there are no waiters. Attempt to + ; gain ownership ourselves. + mov ARGUMENT_REG1,1 + lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld], ARGUMENT_REG1 + jnz MonEnterRetryHelperSyncBlock + + ; Success. Save the thread object in the lock and increment the use count. + mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + inc dword ptr [esi+Thread_m_dwLockCount] + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop esi + pop ebx + ret + + ; It's possible to get here with waiters but no lock held, but in this + ; case a signal is about to be fired which will wake up a waiter. So + ; for fairness sake we should wait too. + ; Check first for recursive lock attempts on the same thread. +MonEnterHaveWaiters: + ; Is mutex already owned by current thread? + cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + jne MonEnterPrepareToWait + + ; Yes, bump our use count. + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop esi + pop ebx + ret + +MonEnterPrepareToWait: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonEnterHaveWaiters1 + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax,ebx +MonEnterdelayLoop: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonEnterdelayLoop + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonEnterRetrySyncBlock + +MonEnterHaveWaiters1: + + pop esi + pop ebx + + ; Place AwareLock in arg1 then call contention helper. + mov ARGUMENT_REG1, ARGUMENT_REG2 + jmp JITutil_MonContention + +MonEnterRetryHelperSyncBlock: + jmp MonEnterRetrySyncBlock + + ; ECX has the object to synchronize on +MonEnterFramedLockHelper: + pop esi + pop ebx + jmp JITutil_MonEnterWorker + +@JIT_MonEnterWorker@4 endp + +;********************************************************************** +; This is a frameless helper for entering a monitor on a object, and +; setting a flag to indicate that the lock was taken. +; The object is in ARGUMENT_REG1. The flag is in ARGUMENT_REG2. +; This tries the normal case (no blocking or object allocation) in line +; and calls a framed helper for the other cases. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonReliableEnter@8 proc public + ; Initialize delay value for retry with exponential backoff + push ebx + mov ebx, dword ptr g_SpinConstants+SpinConstants_dwInitialDuration + + ; Put pbLockTaken in edi + push edi + mov edi, ARGUMENT_REG2 + + ; We need yet another register to avoid refetching the thread object + push esi + + ; Check if the instance is NULL. + test ARGUMENT_REG1, ARGUMENT_REG1 + jz MonReliableEnterFramedLockHelper + + call _GetThread@0 + mov esi,eax + + ; Check if we can abort here + mov eax, [esi+Thread_m_State] + and eax, TS_CatchAtSafePoint_ASM + jz MonReliableEnterRetryThinLock + ; go through the slow code path to initiate ThreadAbort. + jmp MonReliableEnterFramedLockHelper + +MonReliableEnterRetryThinLock: + ; Fetch the object header dword + mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM] + + ; Check whether we have the "thin lock" layout, the lock is free and the spin lock bit not set + ; SBLK_COMBINED_MASK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + SBLK_MASK_LOCK_THREADID + SBLK_MASK_LOCK_RECLEVEL + test eax, SBLK_COMBINED_MASK_ASM + jnz MonReliableEnterNeedMoreTests + + ; Everything is fine - get the thread id to store in the lock + mov edx, [esi+Thread_m_ThreadId] + + ; If the thread id is too large, we need a syncblock for sure + cmp edx, SBLK_MASK_LOCK_THREADID_ASM + ja MonReliableEnterFramedLockHelper + + ; We want to store a new value with the current thread id set in the low 10 bits + or edx,eax + lock cmpxchg dword ptr [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx + jnz MonReliableEnterPrepareToWaitThinLock + + ; Everything went fine and we're done + add [esi+Thread_m_dwLockCount],1 + ; Set *pbLockTaken=true + mov byte ptr [edi],1 + pop esi + pop edi + pop ebx + ret + +MonReliableEnterNeedMoreTests: + ; Ok, it's not the simple case - find out which case it is + test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM + jnz MonReliableEnterHaveHashOrSyncBlockIndex + + ; The header is transitioning or the lock - treat this as if the lock was taken + test eax, BIT_SBLK_SPIN_LOCK_ASM + jnz MonReliableEnterPrepareToWaitThinLock + + ; Here we know we have the "thin lock" layout, but the lock is not free. + ; It could still be the recursion case - compare the thread id to check + mov edx,eax + and edx, SBLK_MASK_LOCK_THREADID_ASM + cmp edx, [esi+Thread_m_ThreadId] + jne MonReliableEnterPrepareToWaitThinLock + + ; Ok, the thread id matches, it's the recursion case. + ; Bump up the recursion level and check for overflow + lea edx, [eax+SBLK_LOCK_RECLEVEL_INC_ASM] + test edx, SBLK_MASK_LOCK_RECLEVEL_ASM + jz MonReliableEnterFramedLockHelper + + ; Try to put the new recursion level back. If the header was changed in the meantime, + ; we need a full retry, because the layout could have changed. + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx + jnz MonReliableEnterRetryHelperThinLock + + ; Everything went fine and we're done + ; Set *pbLockTaken=true + mov byte ptr [edi],1 + pop esi + pop edi + pop ebx + ret + +MonReliableEnterPrepareToWaitThinLock: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonReliableEnterFramedLockHelper + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax, ebx +MonReliableEnterdelayLoopThinLock: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonReliableEnterdelayLoopThinLock + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonReliableEnterRetryHelperThinLock + + jmp MonReliableEnterFramedLockHelper + +MonReliableEnterRetryHelperThinLock: + jmp MonReliableEnterRetryThinLock + +MonReliableEnterHaveHashOrSyncBlockIndex: + ; If we have a hash code already, we need to create a sync block + test eax, BIT_SBLK_IS_HASHCODE_ASM + jnz MonReliableEnterFramedLockHelper + + ; Ok, we have a sync block index - just and out the top bits and grab the syncblock index + and eax, MASK_SYNCBLOCKINDEX_ASM + + ; Get the sync block pointer. + mov ARGUMENT_REG2, dword ptr g_pSyncTable + mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock] + + ; Check if the sync block has been allocated. + test ARGUMENT_REG2, ARGUMENT_REG2 + jz MonReliableEnterFramedLockHelper + + ; Get a pointer to the lock object. + lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor] + + ; Attempt to acquire the lock. +MonReliableEnterRetrySyncBlock: + mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld] + test eax,eax + jne MonReliableEnterHaveWaiters + + ; Common case, lock isn't held and there are no waiters. Attempt to + ; gain ownership ourselves. + mov ARGUMENT_REG1,1 + lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld], ARGUMENT_REG1 + jnz MonReliableEnterRetryHelperSyncBlock + + ; Success. Save the thread object in the lock and increment the use count. + mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + inc dword ptr [esi+Thread_m_dwLockCount] + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] + ; Set *pbLockTaken=true + mov byte ptr [edi],1 + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop esi + pop edi + pop ebx + ret + + ; It's possible to get here with waiters but no lock held, but in this + ; case a signal is about to be fired which will wake up a waiter. So + ; for fairness sake we should wait too. + ; Check first for recursive lock attempts on the same thread. +MonReliableEnterHaveWaiters: + ; Is mutex already owned by current thread? + cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + jne MonReliableEnterPrepareToWait + + ; Yes, bump our use count. + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] + ; Set *pbLockTaken=true + mov byte ptr [edi],1 +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop esi + pop edi + pop ebx + ret + +MonReliableEnterPrepareToWait: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonReliableEnterHaveWaiters1 + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax,ebx +MonReliableEnterdelayLoop: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonReliableEnterdelayLoop + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonReliableEnterRetrySyncBlock + +MonReliableEnterHaveWaiters1: + + ; Place AwareLock in arg1, pbLockTaken in arg2, then call contention helper. + mov ARGUMENT_REG1, ARGUMENT_REG2 + mov ARGUMENT_REG2, edi + + pop esi + pop edi + pop ebx + + jmp JITutil_MonReliableContention + +MonReliableEnterRetryHelperSyncBlock: + jmp MonReliableEnterRetrySyncBlock + + ; ECX has the object to synchronize on +MonReliableEnterFramedLockHelper: + mov ARGUMENT_REG2, edi + pop esi + pop edi + pop ebx + jmp JITutil_MonReliableEnter + +@JIT_MonReliableEnter@8 endp + +;************************************************************************ +; This is a frameless helper for trying to enter a monitor on a object. +; The object is in ARGUMENT_REG1 and a timeout in ARGUMENT_REG2. This tries the +; normal case (no object allocation) in line and calls a framed helper for the +; other cases. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonTryEnter@12 proc public + ; Save the timeout parameter. + push ARGUMENT_REG2 + + ; Initialize delay value for retry with exponential backoff + push ebx + mov ebx, dword ptr g_SpinConstants+SpinConstants_dwInitialDuration + + ; The thin lock logic needs another register to store the thread + push esi + + ; Check if the instance is NULL. + test ARGUMENT_REG1, ARGUMENT_REG1 + jz MonTryEnterFramedLockHelper + + ; Check if the timeout looks valid + cmp ARGUMENT_REG2,-1 + jl MonTryEnterFramedLockHelper + + ; Get the thread right away, we'll need it in any case + call _GetThread@0 + mov esi,eax + + ; Check if we can abort here + mov eax, [esi+Thread_m_State] + and eax, TS_CatchAtSafePoint_ASM + jz MonTryEnterRetryThinLock + ; go through the slow code path to initiate ThreadAbort. + jmp MonTryEnterFramedLockHelper + +MonTryEnterRetryThinLock: + ; Get the header dword and check its layout + mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM] + + ; Check whether we have the "thin lock" layout, the lock is free and the spin lock bit not set + ; SBLK_COMBINED_MASK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + SBLK_MASK_LOCK_THREADID + SBLK_MASK_LOCK_RECLEVEL + test eax, SBLK_COMBINED_MASK_ASM + jnz MonTryEnterNeedMoreTests + + ; Ok, everything is fine. Fetch the thread id and make sure it's small enough for thin locks + mov edx, [esi+Thread_m_ThreadId] + cmp edx, SBLK_MASK_LOCK_THREADID_ASM + ja MonTryEnterFramedLockHelper + + ; Try to put our thread id in there + or edx,eax + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx + jnz MonTryEnterRetryHelperThinLock + + ; Got the lock - everything is fine" + add [esi+Thread_m_dwLockCount],1 + pop esi + + ; Delay value no longer needed + pop ebx + + ; Timeout parameter not needed, ditch it from the stack. + add esp,4 + + mov eax, [esp+4] + mov byte ptr [eax], 1 + ret 4 + +MonTryEnterNeedMoreTests: + ; Ok, it's not the simple case - find out which case it is + test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM + jnz MonTryEnterHaveSyncBlockIndexOrHash + + ; The header is transitioning or the lock is taken + test eax, BIT_SBLK_SPIN_LOCK_ASM + jnz MonTryEnterRetryHelperThinLock + + mov edx, eax + and edx, SBLK_MASK_LOCK_THREADID_ASM + cmp edx, [esi+Thread_m_ThreadId] + jne MonTryEnterPrepareToWaitThinLock + + ; Ok, the thread id matches, it's the recursion case. + ; Bump up the recursion level and check for overflow + lea edx, [eax+SBLK_LOCK_RECLEVEL_INC_ASM] + test edx, SBLK_MASK_LOCK_RECLEVEL_ASM + jz MonTryEnterFramedLockHelper + + ; Try to put the new recursion level back. If the header was changed in the meantime, + ; we need a full retry, because the layout could have changed. + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx + jnz MonTryEnterRetryHelperThinLock + + ; Everything went fine and we're done + pop esi + pop ebx + + ; Timeout parameter not needed, ditch it from the stack. + add esp, 4 + mov eax, [esp+4] + mov byte ptr [eax], 1 + ret 4 + +MonTryEnterPrepareToWaitThinLock: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonTryEnterFramedLockHelper + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax, ebx +MonTryEnterdelayLoopThinLock: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonTryEnterdelayLoopThinLock + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonTryEnterRetryHelperThinLock + + jmp MonTryEnterWouldBlock + +MonTryEnterRetryHelperThinLock: + jmp MonTryEnterRetryThinLock + + +MonTryEnterHaveSyncBlockIndexOrHash: + ; If we have a hash code already, we need to create a sync block + test eax, BIT_SBLK_IS_HASHCODE_ASM + jnz MonTryEnterFramedLockHelper + + ; Just and out the top bits and grab the syncblock index + and eax, MASK_SYNCBLOCKINDEX_ASM + + ; Get the sync block pointer. + mov ARGUMENT_REG2, dword ptr g_pSyncTable + mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock] + + ; Check if the sync block has been allocated. + test ARGUMENT_REG2, ARGUMENT_REG2 + jz MonTryEnterFramedLockHelper + + ; Get a pointer to the lock object. + lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor] + +MonTryEnterRetrySyncBlock: + ; Attempt to acquire the lock. + mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld] + test eax,eax + jne MonTryEnterHaveWaiters + + ; We need another scratch register for what follows, so save EBX now so" + ; we can use it for that purpose." + push ebx + + ; Common case, lock isn't held and there are no waiters. Attempt to + ; gain ownership ourselves. + mov ebx,1 + lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld],ebx + + pop ebx + + jnz MonTryEnterRetryHelperSyncBlock + + ; Success. Save the thread object in the lock and increment the use count. + mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] + inc dword ptr [esi+Thread_m_dwLockCount] + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + + pop esi + pop ebx + + ; Timeout parameter not needed, ditch it from the stack." + add esp,4 + + mov eax, [esp+4] + mov byte ptr [eax], 1 + ret 4 + + ; It's possible to get here with waiters but no lock held, but in this + ; case a signal is about to be fired which will wake up a waiter. So + ; for fairness sake we should wait too. + ; Check first for recursive lock attempts on the same thread. +MonTryEnterHaveWaiters: + ; Is mutex already owned by current thread? + cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + jne MonTryEnterPrepareToWait + + ; Yes, bump our use count. + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop esi + pop ebx + + ; Timeout parameter not needed, ditch it from the stack. + add esp,4 + + mov eax, [esp+4] + mov byte ptr [eax], 1 + ret 4 + +MonTryEnterPrepareToWait: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonTryEnterWouldBlock + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax, ebx +MonTryEnterdelayLoop: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonTryEnterdelayLoop + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonTryEnterRetrySyncBlock + + ; We would need to block to enter the section. Return failure if + ; timeout is zero, else call the framed helper to do the blocking + ; form of TryEnter." +MonTryEnterWouldBlock: + pop esi + pop ebx + pop ARGUMENT_REG2 + test ARGUMENT_REG2, ARGUMENT_REG2 + jnz MonTryEnterBlock + mov eax, [esp+4] + mov byte ptr [eax], 0 + ret 4 + +MonTryEnterRetryHelperSyncBlock: + jmp MonTryEnterRetrySyncBlock + +MonTryEnterFramedLockHelper: + ; ARGUMENT_REG1 has the object to synchronize on, must retrieve the + ; timeout parameter from the stack. + pop esi + pop ebx + pop ARGUMENT_REG2 +MonTryEnterBlock: + jmp JITutil_MonTryEnter + +@JIT_MonTryEnter@12 endp + +;********************************************************************** +; This is a frameless helper for exiting a monitor on a object. +; The object is in ARGUMENT_REG1. This tries the normal case (no +; blocking or object allocation) in line and calls a framed helper +; for the other cases. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonExitWorker@4 proc public + ; The thin lock logic needs an additional register to hold the thread, unfortunately + push esi + + ; Check if the instance is NULL. + test ARGUMENT_REG1, ARGUMENT_REG1 + jz MonExitFramedLockHelper + + call _GetThread@0 + mov esi,eax + +MonExitRetryThinLock: + ; Fetch the header dword and check its layout and the spin lock bit + mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM] + ;BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_SPIN_LOCK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_SPIN_LOCK_ASM + jnz MonExitNeedMoreTests + + ; Ok, we have a "thin lock" layout - check whether the thread id matches + mov edx,eax + and edx, SBLK_MASK_LOCK_THREADID_ASM + cmp edx, [esi+Thread_m_ThreadId] + jne MonExitFramedLockHelper + + ; Check the recursion level + test eax, SBLK_MASK_LOCK_RECLEVEL_ASM + jne MonExitDecRecursionLevel + + ; It's zero - we're leaving the lock. + ; So try to put back a zero thread id. + ; edx and eax match in the thread id bits, and edx is zero elsewhere, so the xor is sufficient + xor edx,eax + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx + jnz MonExitRetryHelperThinLock + + ; We're done + sub [esi+Thread_m_dwLockCount],1 + pop esi + ret + +MonExitDecRecursionLevel: + lea edx, [eax-SBLK_LOCK_RECLEVEL_INC_ASM] + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx + jnz MonExitRetryHelperThinLock + + ; We're done + pop esi + ret + +MonExitNeedMoreTests: + ;Forward all special cases to the slow helper + ;BIT_SBLK_IS_HASHCODE_OR_SPIN_LOCK_ASM = BIT_SBLK_IS_HASHCODE + BIT_SBLK_SPIN_LOCK + test eax, BIT_SBLK_IS_HASHCODE_OR_SPIN_LOCK_ASM + jnz MonExitFramedLockHelper + + ; Get the sync block index and use it to compute the sync block pointer + mov ARGUMENT_REG2, dword ptr g_pSyncTable + and eax, MASK_SYNCBLOCKINDEX_ASM + mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock] + + ; was there a sync block? + test ARGUMENT_REG2, ARGUMENT_REG2 + jz MonExitFramedLockHelper + + ; Get a pointer to the lock object. + lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor] + + ; Check if lock is held. + cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + jne MonExitFramedLockHelper + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG1 ; preserve regs + push ARGUMENT_REG2 + + push ARGUMENT_REG2 ; AwareLock + push [esp+8] ; return address + call LeaveSyncHelper + + pop ARGUMENT_REG2 ; restore regs + pop ARGUMENT_REG1 +endif ;TRACK_SYNC +endif ;MON_DEBUG + ; Reduce our recursion count. + dec dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] + jz MonExitLastRecursion + + pop esi + ret + +MonExitRetryHelperThinLock: + jmp MonExitRetryThinLock + +MonExitFramedLockHelper: + pop esi + jmp JITutil_MonExitWorker + + ; This is the last count we held on this lock, so release the lock. +MonExitLastRecursion: + dec dword ptr [esi+Thread_m_dwLockCount] + mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],0 + +MonExitRetry: + mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld] + lea esi, [eax-1] + lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld], esi + jne MonExitRetryHelper + pop esi + test eax,0FFFFFFFEh + jne MonExitMustSignal + + ret + +MonExitMustSignal: + mov ARGUMENT_REG1, ARGUMENT_REG2 + jmp JITutil_MonSignal + +MonExitRetryHelper: + jmp MonExitRetry + +@JIT_MonExitWorker@4 endp + +;********************************************************************** +; This is a frameless helper for entering a static monitor on a class. +; The methoddesc is in ARGUMENT_REG1. This tries the normal case (no +; blocking or object allocation) in line and calls a framed helper +; for the other cases. +; Note we are changing the methoddesc parameter to a pointer to the +; AwareLock. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonEnterStatic@4 proc public + ; We need another scratch register for what follows, so save EBX now so + ; we can use it for that purpose. + push ebx + + ; Attempt to acquire the lock +MonEnterStaticRetry: + mov eax, [ARGUMENT_REG1+AwareLock_m_MonitorHeld] + test eax,eax + jne MonEnterStaticHaveWaiters + + ; Common case, lock isn't held and there are no waiters. Attempt to + ; gain ownership ourselves. + mov ebx,1 + lock cmpxchg [ARGUMENT_REG1+AwareLock_m_MonitorHeld],ebx + jnz MonEnterStaticRetryHelper + + pop ebx + + ; Success. Save the thread object in the lock and increment the use count. + call _GetThread@0 + mov [ARGUMENT_REG1+AwareLock_m_HoldingThread], eax + inc dword ptr [ARGUMENT_REG1+AwareLock_m_Recursion] + inc dword ptr [eax+Thread_m_dwLockCount] + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG1 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + ret + + ; It's possible to get here with waiters but no lock held, but in this + ; case a signal is about to be fired which will wake up a waiter. So + ; for fairness sake we should wait too. + ; Check first for recursive lock attempts on the same thread. +MonEnterStaticHaveWaiters: + ; Get thread but preserve EAX (contains cached contents of m_MonitorHeld). + push eax + call _GetThread@0 + mov ebx,eax + pop eax + + ; Is mutex already owned by current thread? + cmp [ARGUMENT_REG1+AwareLock_m_HoldingThread],ebx + jne MonEnterStaticPrepareToWait + + ; Yes, bump our use count. + inc dword ptr [ARGUMENT_REG1+AwareLock_m_Recursion] +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG1 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop ebx + ret + +MonEnterStaticPrepareToWait: + pop ebx + + ; ARGUMENT_REG1 should have AwareLock. Call contention helper. + jmp JITutil_MonContention + +MonEnterStaticRetryHelper: + jmp MonEnterStaticRetry +@JIT_MonEnterStatic@4 endp + +;********************************************************************** +; A frameless helper for exiting a static monitor on a class. +; The methoddesc is in ARGUMENT_REG1. This tries the normal case (no +; blocking or object allocation) in line and calls a framed helper +; for the other cases. +; Note we are changing the methoddesc parameter to a pointer to the +; AwareLock. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonExitStatic@4 proc public + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG1 ; preserve regs + + push ARGUMENT_REG1 ; AwareLock + push [esp+8] ; return address + call LeaveSyncHelper + + pop [ARGUMENT_REG1] ; restore regs +endif ;TRACK_SYNC +endif ;MON_DEBUG + + ; Check if lock is held. + call _GetThread@0 + cmp [ARGUMENT_REG1+AwareLock_m_HoldingThread],eax + jne MonExitStaticLockError + + ; Reduce our recursion count. + dec dword ptr [ARGUMENT_REG1+AwareLock_m_Recursion] + jz MonExitStaticLastRecursion + + ret + + ; This is the last count we held on this lock, so release the lock. +MonExitStaticLastRecursion: + ; eax must have the thread object + dec dword ptr [eax+Thread_m_dwLockCount] + mov dword ptr [ARGUMENT_REG1+AwareLock_m_HoldingThread],0 + push ebx + +MonExitStaticRetry: + mov eax, [ARGUMENT_REG1+AwareLock_m_MonitorHeld] + lea ebx, [eax-1] + lock cmpxchg [ARGUMENT_REG1+AwareLock_m_MonitorHeld],ebx + jne MonExitStaticRetryHelper + pop ebx + test eax,0FFFFFFFEh + jne MonExitStaticMustSignal + + ret + +MonExitStaticMustSignal: + jmp JITutil_MonSignal + +MonExitStaticRetryHelper: + jmp MonExitStaticRetry + ; Throw a synchronization lock exception. +MonExitStaticLockError: + mov ARGUMENT_REG1, CORINFO_SynchronizationLockException_ASM + jmp JIT_InternalThrow + +@JIT_MonExitStatic@4 endp + +; PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code. +; + +_JIT_PatchedCodeStart@0 proc public +ret +_JIT_PatchedCodeStart@0 endp + +; +; Optimized TLS getters +; + + ALIGN 4 + +ifndef FEATURE_IMPLICIT_TLS +_GetThread@0 proc public + ; This will be overwritten at runtime with optimized GetThread implementation + jmp short _GetTLSDummy@0 + ; Just allocate space that will be filled in at runtime + db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) +_GetThread@0 endp + + ALIGN 4 + +_GetAppDomain@0 proc public + ; This will be overwritten at runtime with optimized GetAppDomain implementation + jmp short _GetTLSDummy@0 + ; Just allocate space that will be filled in at runtime + db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) +_GetAppDomain@0 endp + +_GetTLSDummy@0 proc public + xor eax,eax + ret +_GetTLSDummy@0 endp + + ALIGN 4 + +_ClrFlsGetBlock@0 proc public + ; This will be overwritten at runtime with optimized ClrFlsGetBlock implementation + jmp short _GetTLSDummy@0 + ; Just allocate space that will be filled in at runtime + db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) +_ClrFlsGetBlock@0 endp +endif + +;********************************************************************** +; Write barriers generated at runtime + +PUBLIC _JIT_PatchedWriteBarrierStart@0 +_JIT_PatchedWriteBarrierStart@0 PROC +ret +_JIT_PatchedWriteBarrierStart@0 ENDP + +PatchedWriteBarrierHelper MACRO rg + ALIGN 8 +PUBLIC _JIT_WriteBarrier&rg&@0 +_JIT_WriteBarrier&rg&@0 PROC + ; Just allocate space that will be filled in at runtime + db (48) DUP (0CCh) +_JIT_WriteBarrier&rg&@0 ENDP + +ENDM + +PatchedWriteBarrierHelper +PatchedWriteBarrierHelper +PatchedWriteBarrierHelper +PatchedWriteBarrierHelper +PatchedWriteBarrierHelper +PatchedWriteBarrierHelper + +PUBLIC _JIT_PatchedWriteBarrierLast@0 +_JIT_PatchedWriteBarrierLast@0 PROC +ret +_JIT_PatchedWriteBarrierLast@0 ENDP + +;********************************************************************** +; PrecodeRemotingThunk is patched at runtime to activate it +ifdef FEATURE_REMOTING + ALIGN 16 +_PrecodeRemotingThunk@0 proc public + + ret ; This is going to be patched to "test ecx,ecx" + nop + + jz RemotingDone ; predicted not taken + + cmp dword ptr [ecx],11111111h ; This is going to be patched to address of the transparent proxy + je RemotingCheck ; predicted not taken + +RemotingDone: + ret + +RemotingCheck: + push eax ; save method desc + mov eax, dword ptr [ecx + TransparentProxyObject___stubData] + call [ecx + TransparentProxyObject___stub] + test eax, eax + jnz RemotingCtxMismatch + mov eax, [esp] + mov ax, [eax + MethodDesc_m_wFlags] + and ax, MethodDesc_mdcClassification + cmp ax, MethodDesc_mcComInterop + je ComPlusCall + pop eax ; throw away method desc + jmp RemotingDone + +RemotingCtxMismatch: + pop eax ; restore method desc + add esp, 4 ; pop return address into the precode + jmp _TransparentProxyStub_CrossContext@0 + +ComPlusCall: + pop eax ; restore method desc + mov [esp],eax ; replace return address into the precode with method desc (argument for TP stub) + jmp _InContextTPQuickDispatchAsmStub@0 + +_PrecodeRemotingThunk@0 endp +endif ; FEATURE_REMOTING + +_JIT_PatchedCodeLast@0 proc public +ret +_JIT_PatchedCodeLast@0 endp + +; This is the first function outside the "keep together range". Used by BBT scripts. +_JIT_PatchedCodeEnd@0 proc public +ret +_JIT_PatchedCodeEnd@0 endp + +; This is the ASM portion of JIT_IsInstanceOfInterface. For all the bizarre cases, it quickly +; fails and falls back on the JITutil_IsInstanceOfAny helper. So all failure cases take +; the slow path, too. +; +; ARGUMENT_REG1 = array or interface to check for. +; ARGUMENT_REG2 = instance to be cast. + + ALIGN 16 +PUBLIC @JIT_IsInstanceOfInterface@8 +@JIT_IsInstanceOfInterface@8 PROC + test ARGUMENT_REG2, ARGUMENT_REG2 + jz IsNullInst + + mov eax, [ARGUMENT_REG2] ; get MethodTable + + push ebx + push esi + movzx ebx, word ptr [eax+MethodTable_m_wNumInterfaces] + + ; check if this MT implements any interfaces + test ebx, ebx + jz IsInstanceOfInterfaceDoBizarre + + ; move Interface map ptr into eax + mov eax, [eax+MethodTable_m_pInterfaceMap] + +IsInstanceOfInterfaceTop: + ; eax -> current InterfaceInfo_t entry in interface map list +ifdef FEATURE_PREJIT + mov esi, [eax] + test esi, 1 + ; Move the deference out of line so that this jump is correctly predicted for the case + ; when there is no indirection + jnz IsInstanceOfInterfaceIndir + cmp ARGUMENT_REG1, esi +else + cmp ARGUMENT_REG1, [eax] +endif + je IsInstanceOfInterfaceFound + +IsInstanceOfInterfaceNext: + add eax, SIZEOF_InterfaceInfo_t + dec ebx + jnz IsInstanceOfInterfaceTop + + ; fall through to DoBizarre + +IsInstanceOfInterfaceDoBizarre: + pop esi + pop ebx + mov eax, [ARGUMENT_REG2] ; get MethodTable + test dword ptr [eax+MethodTable_m_dwFlags], NonTrivialInterfaceCastFlags + jnz IsInstanceOfInterfaceNonTrivialCast + +IsNullInst: + xor eax,eax + ret + +ifdef FEATURE_PREJIT +IsInstanceOfInterfaceIndir: + cmp ARGUMENT_REG1,[esi-1] + jne IsInstanceOfInterfaceNext +endif + +IsInstanceOfInterfaceFound: + pop esi + pop ebx + mov eax, ARGUMENT_REG2 ; the successful instance + ret + +IsInstanceOfInterfaceNonTrivialCast: + jmp @JITutil_IsInstanceOfInterface@8 + +@JIT_IsInstanceOfInterface@8 endp + +; This is the ASM portion of JIT_ChkCastInterface. For all the bizarre cases, it quickly +; fails and falls back on the JITutil_ChkCastAny helper. So all failure cases take +; the slow path, too. +; +; ARGUMENT_REG1 = array or interface to check for. +; ARGUMENT_REG2 = instance to be cast. + + ALIGN 16 +PUBLIC @JIT_ChkCastInterface@8 +@JIT_ChkCastInterface@8 PROC + test ARGUMENT_REG2, ARGUMENT_REG2 + jz ChkCastInterfaceIsNullInst + + mov eax, [ARGUMENT_REG2] ; get MethodTable + + push ebx + push esi + movzx ebx, word ptr [eax+MethodTable_m_wNumInterfaces] + + ; speculatively move Interface map ptr into eax + mov eax, [eax+MethodTable_m_pInterfaceMap] + + ; check if this MT implements any interfaces + test ebx, ebx + jz ChkCastInterfaceDoBizarre + +ChkCastInterfaceTop: + ; eax -> current InterfaceInfo_t entry in interface map list +ifdef FEATURE_PREJIT + mov esi, [eax] + test esi, 1 + ; Move the deference out of line so that this jump is correctly predicted for the case + ; when there is no indirection + jnz ChkCastInterfaceIndir + cmp ARGUMENT_REG1, esi +else + cmp ARGUMENT_REG1, [eax] +endif + je ChkCastInterfaceFound + +ChkCastInterfaceNext: + add eax, SIZEOF_InterfaceInfo_t + dec ebx + jnz ChkCastInterfaceTop + + ; fall through to DoBizarre + +ChkCastInterfaceDoBizarre: + pop esi + pop ebx + jmp @JITutil_ChkCastInterface@8 + +ifdef FEATURE_PREJIT +ChkCastInterfaceIndir: + cmp ARGUMENT_REG1,[esi-1] + jne ChkCastInterfaceNext +endif + +ChkCastInterfaceFound: + pop esi + pop ebx + +ChkCastInterfaceIsNullInst: + mov eax, ARGUMENT_REG2 ; either null, or the successful instance + ret + +@JIT_ChkCastInterface@8 endp + + end diff --git a/src/vm/i386/jitinterfacex86.cpp b/src/vm/i386/jitinterfacex86.cpp new file mode 100644 index 0000000000..949b115ce2 --- /dev/null +++ b/src/vm/i386/jitinterfacex86.cpp @@ -0,0 +1,1922 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// =========================================================================== +// File: JITinterfaceX86.CPP +// +// =========================================================================== + +// This contains JITinterface routines that are tailored for +// X86 platforms. Non-X86 versions of these can be found in +// JITinterfaceGen.cpp + + +#include "common.h" +#include "jitinterface.h" +#include "eeconfig.h" +#include "excep.h" +#include "comdelegate.h" +#ifdef FEATURE_REMOTING +#include "remoting.h" // create context bound and remote class instances +#endif +#include "field.h" +#include "ecall.h" +#include "asmconstants.h" +#include "virtualcallstub.h" +#include "eventtrace.h" +#include "threadsuspend.h" + +#if defined(_DEBUG) && !defined (WRITE_BARRIER_CHECK) +#define WRITE_BARRIER_CHECK 1 +#endif + +// To test with MON_DEBUG off, comment out the following line. DO NOT simply define +// to be 0 as the checks are for #ifdef not #if 0. +// +#ifdef _DEBUG +#define MON_DEBUG 1 +#endif + +class generation; +extern "C" generation generation_table[]; + +extern "C" void STDCALL JIT_WriteBarrierReg_PreGrow();// JIThelp.asm/JIThelp.s +extern "C" void STDCALL JIT_WriteBarrierReg_PostGrow();// JIThelp.asm/JIThelp.s + +#ifdef _DEBUG +extern "C" void STDCALL WriteBarrierAssert(BYTE* ptr, Object* obj) +{ + STATIC_CONTRACT_SO_TOLERANT; + WRAPPER_NO_CONTRACT; + + static BOOL fVerifyHeap = -1; + + if (fVerifyHeap == -1) + fVerifyHeap = g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC; + + if (fVerifyHeap) + { + obj->Validate(FALSE); + if(GCHeap::GetGCHeap()->IsHeapPointer(ptr)) + { + Object* pObj = *(Object**)ptr; + _ASSERTE (pObj == NULL || GCHeap::GetGCHeap()->IsHeapPointer(pObj)); + } + } + else + { + _ASSERTE((g_lowest_address <= ptr && ptr < g_highest_address) || + ((size_t)ptr < MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT)); + } +} + +#endif // _DEBUG + +/****************************************************************************/ +/* assigns 'val to 'array[idx], after doing all the proper checks */ + +/* note that we can do almost as well in portable code, but this + squezes the last little bit of perf out */ + +__declspec(naked) void F_CALL_CONV JIT_Stelem_Ref(PtrArray* array, unsigned idx, Object* val) +{ + STATIC_CONTRACT_SO_TOLERANT; + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + + enum { CanCast = TypeHandle::CanCast, +#if CHECK_APP_DOMAIN_LEAKS + EEClassFlags = EEClass::AUXFLAG_APP_DOMAIN_AGILE | + EEClass::AUXFLAG_CHECK_APP_DOMAIN_AGILE, +#endif // CHECK_APP_DOMAIN_LEAKS + }; + + __asm { + mov EAX, [ESP+4] // EAX = val + + test ECX, ECX + je ThrowNullReferenceException + + cmp EDX, [ECX+4]; // test if in bounds + jae ThrowIndexOutOfRangeException + + test EAX, EAX + jz Assigning0 + +#if CHECK_APP_DOMAIN_LEAKS + mov EAX,[g_pConfig] + movzx EAX, [EAX]EEConfig.fAppDomainLeaks; + test EAX, EAX + jz NoCheck + // Check if the instance is agile or check agile + mov EAX, [ECX] + mov EAX, [EAX]MethodTable.m_ElementTypeHnd + test EAX, 2 // Check for non-MT + jnz NoCheck + // Check VMflags of element type + mov EAX, [EAX]MethodTable.m_pEEClass + mov EAX, dword ptr [EAX]EEClass.m_wAuxFlags + test EAX, EEClassFlags + jnz NeedFrame // Jump to the generic case so we can do an app domain check + NoCheck: + mov EAX, [ESP+4] // EAX = val +#endif // CHECK_APP_DOMAIN_LEAKS + + push EDX + mov EDX, [ECX] + mov EDX, [EDX]MethodTable.m_ElementTypeHnd + + cmp EDX, [EAX] // do we have an exact match + jne NotExactMatch + +DoWrite2: + pop EDX + lea EDX, [ECX + 4*EDX + 8] + call JIT_WriteBarrierEAX + ret 4 + +Assigning0: + // write barrier is not necessary for assignment of NULL references + mov [ECX + 4*EDX + 8], EAX + ret 4 + +DoWrite: + mov EAX, [ESP+4] // EAX = val + lea EDX, [ECX + 4*EDX + 8] + call JIT_WriteBarrierEAX + ret 4 + +NotExactMatch: + cmp EDX, [g_pObjectClass] // are we assigning to Array of objects + je DoWrite2 + + // push EDX // caller-save ECX and EDX + push ECX + + push EDX // element type handle + push EAX // object + + call ObjIsInstanceOfNoGC + + pop ECX // caller-restore ECX and EDX + pop EDX + + cmp EAX, CanCast + je DoWrite + +#if CHECK_APP_DOMAIN_LEAKS +NeedFrame: +#endif + // Call the helper that knows how to erect a frame + push EDX + push ECX + + lea ECX, [ESP+8+4] // ECX = address of object being stored + lea EDX, [ESP] // EDX = address of array + + call ArrayStoreCheck + + pop ECX // these might have been updated! + pop EDX + + cmp EAX, EAX // set zero flag + jnz Epilog // This jump never happens, it keeps the epilog walker happy + + jmp DoWrite + +ThrowNullReferenceException: + mov ECX, CORINFO_NullReferenceException + jmp Throw + +ThrowIndexOutOfRangeException: + mov ECX, CORINFO_IndexOutOfRangeException + +Throw: + call JIT_InternalThrowFromHelper +Epilog: + ret 4 + } +} + +extern "C" __declspec(naked) Object* F_CALL_CONV JIT_IsInstanceOfClass(MethodTable *pMT, Object *pObject) +{ + STATIC_CONTRACT_SO_TOLERANT; + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + +#if defined(FEATURE_TYPEEQUIVALENCE) || defined(FEATURE_REMOTING) + enum + { + MTEquivalenceFlags = MethodTable::public_enum_flag_HasTypeEquivalence, + }; +#endif + + __asm + { + // Check if the instance is NULL + test ARGUMENT_REG2, ARGUMENT_REG2 + je ReturnInst + + // Get the method table for the instance. + mov eax, dword ptr [ARGUMENT_REG2] + + // Check if they are the same. + cmp eax, ARGUMENT_REG1 + jne CheckParent + + ReturnInst: + // We matched the class. + mov eax, ARGUMENT_REG2 + ret + + // Check if the parent class matches. + CheckParent: + mov eax, dword ptr [eax]MethodTable.m_pParentMethodTable + cmp eax, ARGUMENT_REG1 + je ReturnInst + + // Check if we hit the top of the hierarchy. + test eax, eax + jne CheckParent + + // Check if the instance is a proxy. +#if defined(FEATURE_TYPEEQUIVALENCE) || defined(FEATURE_REMOTING) + mov eax, [ARGUMENT_REG2] + test dword ptr [eax]MethodTable.m_dwFlags, MTEquivalenceFlags + jne SlowPath +#endif + // It didn't match and it isn't a proxy and it doesn't have type equivalence + xor eax, eax + ret + + // Cast didn't match, so try the worker to check for the proxy/equivalence case. +#if defined(FEATURE_TYPEEQUIVALENCE) || defined(FEATURE_REMOTING) + SlowPath: + jmp JITutil_IsInstanceOfAny +#endif + } +} + +extern "C" __declspec(naked) Object* F_CALL_CONV JIT_ChkCastClass(MethodTable *pMT, Object *pObject) +{ + STATIC_CONTRACT_SO_TOLERANT; + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + + __asm + { + // Check if the instance is NULL + test ARGUMENT_REG2, ARGUMENT_REG2 + je ReturnInst + + // Get the method table for the instance. + mov eax, dword ptr [ARGUMENT_REG2] + + // Check if they are the same. + cmp eax, ARGUMENT_REG1 + jne CheckParent + + ReturnInst: + // We matched the class. + mov eax, ARGUMENT_REG2 + ret + + // Check if the parent class matches. + CheckParent: + mov eax, dword ptr [eax]MethodTable.m_pParentMethodTable + cmp eax, ARGUMENT_REG1 + je ReturnInst + + // Check if we hit the top of the hierarchy. + test eax, eax + jne CheckParent + + // Call out to JITutil_ChkCastAny to handle the proxy case and throw a rich + // InvalidCastException in case of failure. + jmp JITutil_ChkCastAny + } +} + +extern "C" __declspec(naked) Object* F_CALL_CONV JIT_ChkCastClassSpecial(MethodTable *pMT, Object *pObject) +{ + STATIC_CONTRACT_SO_TOLERANT; + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + + // Assumes that the check for the trivial cases has been inlined by the JIT. + + __asm + { + // Get the method table for the instance. + mov eax, dword ptr [ARGUMENT_REG2] + + // Check if the parent class matches. + CheckParent: + mov eax, dword ptr [eax]MethodTable.m_pParentMethodTable + cmp eax, ARGUMENT_REG1 + jne CheckNull + + // We matched the class. + mov eax, ARGUMENT_REG2 + ret + + CheckNull: + // Check if we hit the top of the hierarchy. + test eax, eax + jne CheckParent + + // Call out to JITutil_ChkCastAny to handle the proxy case and throw a rich + // InvalidCastException in case of failure. + jmp JITutil_ChkCastAny + } +} + +HCIMPL1_V(INT32, JIT_Dbl2IntOvf, double val) +{ + FCALL_CONTRACT; + + INT64 ret = HCCALL1_V(JIT_Dbl2Lng, val); + + if (ret != (INT32) ret) + goto THROW; + + return (INT32) ret; + +THROW: + FCThrow(kOverflowException); +} +HCIMPLEND + + +FCDECL1(Object*, JIT_New, CORINFO_CLASS_HANDLE typeHnd_); + +#ifdef FEATURE_REMOTING +HCIMPL1(Object*, JIT_NewCrossContextHelper, CORINFO_CLASS_HANDLE typeHnd_) +{ + CONTRACTL + { + FCALL_CHECK; + } + CONTRACTL_END; + + TypeHandle typeHnd(typeHnd_); + + OBJECTREF newobj = NULL; + HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame + + _ASSERTE(!typeHnd.IsTypeDesc()); // we never use this helper for arrays + MethodTable *pMT = typeHnd.AsMethodTable(); + pMT->CheckRestore(); + + // Remoting services determines if the current context is appropriate + // for activation. If the current context is OK then it creates an object + // else it creates a proxy. + // Note: 3/20/03 Added fIsNewObj flag to indicate that CreateProxyOrObject + // is being called from Jit_NewObj ... the fIsCom flag is FALSE by default - + // which used to be the case before this change as well. + newobj = CRemotingServices::CreateProxyOrObject(pMT,FALSE /*fIsCom*/,TRUE/*fIsNewObj*/); + + HELPER_METHOD_FRAME_END(); + return(OBJECTREFToObject(newobj)); +} +HCIMPLEND +#endif // FEATURE_REMOTING + +HCIMPL1(Object*, AllocObjectWrapper, MethodTable *pMT) +{ + CONTRACTL + { + FCALL_CHECK; + } + CONTRACTL_END; + + OBJECTREF newObj = NULL; + HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame + newObj = AllocateObject(pMT); + HELPER_METHOD_FRAME_END(); + return OBJECTREFToObject(newObj); +} +HCIMPLEND + +/*********************************************************************/ +// This is a frameless helper for allocating an object whose type derives +// from marshalbyref. We check quickly to see if it is configured to +// have remote activation. If not, we use the superfast allocator to +// allocate the object. Otherwise, we take the slow path of allocating +// the object via remoting services. +#ifdef FEATURE_REMOTING +__declspec(naked) Object* F_CALL_CONV JIT_NewCrossContext(CORINFO_CLASS_HANDLE typeHnd_) +{ + STATIC_CONTRACT_SO_TOLERANT; + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_TRIGGERS; + + _asm + { + // Check if remoting has been configured + push ARGUMENT_REG1 // save registers + push ARGUMENT_REG1 + call CRemotingServices::RequiresManagedActivation + test eax, eax + // Jump to the slow path + jne SpecialOrXCtxHelper +#ifdef _DEBUG + push LL_INFO10 + push LF_GCALLOC + call LoggingOn + test eax, eax + jne AllocWithLogHelper +#endif // _DEBUG + + // if the object doesn't have a finalizer and the size is small, jump to super fast asm helper + mov ARGUMENT_REG1, [esp] + call MethodTable::CannotUseSuperFastHelper + test eax, eax + jne FastHelper + + pop ARGUMENT_REG1 + // Jump to the super fast helper + jmp dword ptr [hlpDynamicFuncTable + DYNAMIC_CORINFO_HELP_NEWSFAST * SIZE VMHELPDEF]VMHELPDEF.pfnHelper + +FastHelper: + pop ARGUMENT_REG1 + // Jump to the helper + jmp JIT_New + +SpecialOrXCtxHelper: +#ifdef FEATURE_COMINTEROP + test eax, ComObjectType + jz XCtxHelper + pop ARGUMENT_REG1 + // Jump to the helper + jmp JIT_New + +XCtxHelper: +#endif // FEATURE_COMINTEROP + + pop ARGUMENT_REG1 + // Jump to the helper + jmp JIT_NewCrossContextHelper + +#ifdef _DEBUG +AllocWithLogHelper: + pop ARGUMENT_REG1 + // Jump to the helper + jmp AllocObjectWrapper +#endif // _DEBUG + } +} +#endif // FEATURE_REMOTING + + +/*********************************************************************/ +extern "C" void* g_TailCallFrameVptr; +void* g_TailCallFrameVptr; + +#ifdef FEATURE_HIJACK +extern "C" void STDCALL JIT_TailCallHelper(Thread * pThread); +void STDCALL JIT_TailCallHelper(Thread * pThread) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + } CONTRACTL_END; + + pThread->UnhijackThread(); +} +#endif // FEATURE_HIJACK + +#if CHECK_APP_DOMAIN_LEAKS +HCIMPL1(void *, SetObjectAppDomain, Object *pObject) +{ + FCALL_CONTRACT; + DEBUG_ONLY_FUNCTION; + + HELPER_METHOD_FRAME_BEGIN_RET_ATTRIB_NOPOLL(Frame::FRAME_ATTR_CAPTURE_DEPTH_2|Frame::FRAME_ATTR_EXACT_DEPTH|Frame::FRAME_ATTR_NO_THREAD_ABORT); + pObject->SetAppDomain(); + HELPER_METHOD_FRAME_END(); + + return pObject; +} +HCIMPLEND +#endif // CHECK_APP_DOMAIN_LEAKS + + // emit code that adds MIN_OBJECT_SIZE to reg if reg is unaligned thus making it aligned +void JIT_TrialAlloc::EmitAlignmentRoundup(CPUSTUBLINKER *psl, X86Reg testAlignReg, X86Reg adjReg, Flags flags) +{ + STANDARD_VM_CONTRACT; + + _ASSERTE((MIN_OBJECT_SIZE & 7) == 4); // want to change alignment + + CodeLabel *AlreadyAligned = psl->NewCodeLabel(); + + // test reg, 7 + psl->Emit16(0xC0F7 | (static_cast(testAlignReg) << 8)); + psl->Emit32(0x7); + + // jz alreadyAligned + if (flags & ALIGN8OBJ) + { + psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJNZ); + } + else + { + psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJZ); + } + + psl->X86EmitAddReg(adjReg, MIN_OBJECT_SIZE); + // AlreadyAligned: + psl->EmitLabel(AlreadyAligned); +} + + // if 'reg' is unaligned, then set the dummy object at EAX and increment EAX past + // the dummy object +void JIT_TrialAlloc::EmitDummyObject(CPUSTUBLINKER *psl, X86Reg alignTestReg, Flags flags) +{ + STANDARD_VM_CONTRACT; + + CodeLabel *AlreadyAligned = psl->NewCodeLabel(); + + // test reg, 7 + psl->Emit16(0xC0F7 | (static_cast(alignTestReg) << 8)); + psl->Emit32(0x7); + + // jz alreadyAligned + if (flags & ALIGN8OBJ) + { + psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJNZ); + } + else + { + psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJZ); + } + + // Make the fake object + // mov EDX, [g_pObjectClass] + psl->Emit16(0x158B); + psl->Emit32((int)(size_t)&g_pObjectClass); + + // mov [EAX], EDX + psl->X86EmitOffsetModRM(0x89, kEDX, kEAX, 0); + +#if CHECK_APP_DOMAIN_LEAKS + EmitSetAppDomain(psl); +#endif + + // add EAX, MIN_OBJECT_SIZE + psl->X86EmitAddReg(kEAX, MIN_OBJECT_SIZE); + + // AlreadyAligned: + psl->EmitLabel(AlreadyAligned); +} + +void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *noAlloc, Flags flags) +{ + STANDARD_VM_CONTRACT; + + // Upon entry here, ecx contains the method we are to try allocate memory for + // Upon exit, eax contains the allocated memory, edx is trashed, and ecx undisturbed + + if (flags & MP_ALLOCATOR) + { + if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ)) + { + if (flags & ALIGN8OBJ) + { + // mov eax, [ecx]MethodTable.m_BaseSize + psl->X86EmitIndexRegLoad(kEAX, kECX, offsetof(MethodTable, m_BaseSize)); + } + + psl->X86EmitPushReg(kEBX); // we need a spare register + } + else + { + // mov eax, [ecx]MethodTable.m_BaseSize + psl->X86EmitIndexRegLoad(kEAX, kECX, offsetof(MethodTable, m_BaseSize)); + } + + assert( ((flags & ALIGN8)==0 || // EAX loaded by else statement + (flags & SIZE_IN_EAX) || // EAX already comes filled out + (flags & ALIGN8OBJ) ) // EAX loaded in the if (flags & ALIGN8OBJ) statement + && "EAX should contain size for allocation and it doesnt!!!"); + + // Fetch current thread into EDX, preserving EAX and ECX + psl->X86EmitCurrentThreadFetch(kEDX, (1<X86EmitOffsetModRM(0x8B, kEBX, kEDX, offsetof(Thread, m_alloc_context) + offsetof(alloc_context, alloc_ptr)); + // add EAX, EBX + psl->Emit16(0xC303); + if (flags & ALIGN8) + EmitAlignmentRoundup(psl, kEBX, kEAX, flags); // bump EAX up size by 12 if EBX unaligned (so that we are aligned) + } + else + { + // add eax, [edx]Thread.m_alloc_context.alloc_ptr + psl->X86EmitOffsetModRM(0x03, kEAX, kEDX, offsetof(Thread, m_alloc_context) + offsetof(alloc_context, alloc_ptr)); + } + + // cmp eax, [edx]Thread.m_alloc_context.alloc_limit + psl->X86EmitOffsetModRM(0x3b, kEAX, kEDX, offsetof(Thread, m_alloc_context) + offsetof(alloc_context, alloc_limit)); + + // ja noAlloc + psl->X86EmitCondJump(noAlloc, X86CondCode::kJA); + + // Fill in the allocation and get out. + + // mov [edx]Thread.m_alloc_context.alloc_ptr, eax + psl->X86EmitIndexRegStore(kEDX, offsetof(Thread, m_alloc_context) + offsetof(alloc_context, alloc_ptr), kEAX); + + if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ)) + { + // mov EAX, EBX + psl->Emit16(0xC38B); + // pop EBX + psl->X86EmitPopReg(kEBX); + + if (flags & ALIGN8) + EmitDummyObject(psl, kEAX, flags); + } + else + { + // sub eax, [ecx]MethodTable.m_BaseSize + psl->X86EmitOffsetModRM(0x2b, kEAX, kECX, offsetof(MethodTable, m_BaseSize)); + } + + // mov dword ptr [eax], ecx + psl->X86EmitIndexRegStore(kEAX, 0, kECX); + } + else + { + // Take the GC lock (there is no lock prefix required - we will use JIT_TrialAllocSFastMP on an MP System). + // inc dword ptr [m_GCLock] + psl->Emit16(0x05ff); + psl->Emit32((int)(size_t)&m_GCLock); + + // jnz NoLock + psl->X86EmitCondJump(noLock, X86CondCode::kJNZ); + + if (flags & SIZE_IN_EAX) + { + // mov edx, eax + psl->Emit16(0xd08b); + } + else + { + // mov edx, [ecx]MethodTable.m_BaseSize + psl->X86EmitIndexRegLoad(kEDX, kECX, offsetof(MethodTable, m_BaseSize)); + } + + // mov eax, dword ptr [generation_table] + psl->Emit8(0xA1); + psl->Emit32((int)(size_t)&generation_table); + + // Try the allocation. + // add edx, eax + psl->Emit16(0xd003); + + if (flags & (ALIGN8 | ALIGN8OBJ)) + EmitAlignmentRoundup(psl, kEAX, kEDX, flags); // bump up EDX size by 12 if EAX unaligned (so that we are aligned) + + // cmp edx, dword ptr [generation_table+4] + psl->Emit16(0x153b); + psl->Emit32((int)(size_t)&generation_table + 4); + + // ja noAlloc + psl->X86EmitCondJump(noAlloc, X86CondCode::kJA); + + // Fill in the allocation and get out. + // mov dword ptr [generation_table], edx + psl->Emit16(0x1589); + psl->Emit32((int)(size_t)&generation_table); + + if (flags & (ALIGN8 | ALIGN8OBJ)) + EmitDummyObject(psl, kEAX, flags); + + // mov dword ptr [eax], ecx + psl->X86EmitIndexRegStore(kEAX, 0, kECX); + + // mov dword ptr [m_GCLock], 0FFFFFFFFh + psl->Emit16(0x05C7); + psl->Emit32((int)(size_t)&m_GCLock); + psl->Emit32(0xFFFFFFFF); + } + + +#ifdef INCREMENTAL_MEMCLR + // We're planning to get rid of this anyhow according to Patrick + _ASSERTE(!"NYI"); +#endif // INCREMENTAL_MEMCLR +} + +#if CHECK_APP_DOMAIN_LEAKS +void JIT_TrialAlloc::EmitSetAppDomain(CPUSTUBLINKER *psl) +{ + STANDARD_VM_CONTRACT; + + if (!g_pConfig->AppDomainLeaks()) + return; + + // At both entry & exit, eax contains the allocated object. + // ecx is preserved, edx is not. + + // + // Add in a call to SetAppDomain. (Note that this + // probably would have been easier to implement by just not using + // the generated helpers in a checked build, but we'd lose code + // coverage that way.) + // + + // Save ECX over function call + psl->X86EmitPushReg(kECX); + + // mov object to ECX + // mov ecx, eax + psl->Emit16(0xc88b); + + // SetObjectAppDomain pops its arg & returns object in EAX + psl->X86EmitCall(psl->NewExternalCodeLabel((LPVOID)SetObjectAppDomain), 4); + + psl->X86EmitPopReg(kECX); +} + +#endif // CHECK_APP_DOMAIN_LEAKS + + +void JIT_TrialAlloc::EmitNoAllocCode(CPUSTUBLINKER *psl, Flags flags) +{ + STANDARD_VM_CONTRACT; + + if (flags & MP_ALLOCATOR) + { + if (flags & (ALIGN8|SIZE_IN_EAX)) + psl->X86EmitPopReg(kEBX); + } + else + { + // mov dword ptr [m_GCLock], 0FFFFFFFFh + psl->Emit16(0x05c7); + psl->Emit32((int)(size_t)&m_GCLock); + psl->Emit32(0xFFFFFFFF); + } +} + +void *JIT_TrialAlloc::GenAllocSFast(Flags flags) +{ + STANDARD_VM_CONTRACT; + + CPUSTUBLINKER sl; + + CodeLabel *noLock = sl.NewCodeLabel(); + CodeLabel *noAlloc = sl.NewCodeLabel(); + + // Emit the main body of the trial allocator, be it SP or MP + EmitCore(&sl, noLock, noAlloc, flags); + +#if CHECK_APP_DOMAIN_LEAKS + EmitSetAppDomain(&sl); +#endif + + // Here we are at the end of the success case - just emit a ret + sl.X86EmitReturn(0); + + // Come here in case of no space + sl.EmitLabel(noAlloc); + + // Release the lock in the uniprocessor case + EmitNoAllocCode(&sl, flags); + + // Come here in case of failure to get the lock + sl.EmitLabel(noLock); + + // Jump to the framed helper + sl.X86EmitNearJump(sl.NewExternalCodeLabel((LPVOID)JIT_New)); + + Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()); + + return (void *)pStub->GetEntryPoint(); +} + + +void *JIT_TrialAlloc::GenBox(Flags flags) +{ + STANDARD_VM_CONTRACT; + + CPUSTUBLINKER sl; + + CodeLabel *noLock = sl.NewCodeLabel(); + CodeLabel *noAlloc = sl.NewCodeLabel(); + + // Save address of value to be boxed + sl.X86EmitPushReg(kEBX); + sl.Emit16(0xda8b); + + // Save the MethodTable ptr + sl.X86EmitPushReg(kECX); + + // mov ecx, [ecx]MethodTable.m_pWriteableData + sl.X86EmitOffsetModRM(0x8b, kECX, kECX, offsetof(MethodTable, m_pWriteableData)); + + // Check whether the class has not been initialized + // test [ecx]MethodTableWriteableData.m_dwFlags,MethodTableWriteableData::enum_flag_Unrestored + sl.X86EmitOffsetModRM(0xf7, (X86Reg)0x0, kECX, offsetof(MethodTableWriteableData, m_dwFlags)); + sl.Emit32(MethodTableWriteableData::enum_flag_Unrestored); + + // Restore the MethodTable ptr in ecx + sl.X86EmitPopReg(kECX); + + // jne noAlloc + sl.X86EmitCondJump(noAlloc, X86CondCode::kJNE); + + // Emit the main body of the trial allocator + EmitCore(&sl, noLock, noAlloc, flags); + +#if CHECK_APP_DOMAIN_LEAKS + EmitSetAppDomain(&sl); +#endif + + // Here we are at the end of the success case + + // Check whether the object contains pointers + // test [ecx]MethodTable.m_dwFlags,MethodTable::enum_flag_ContainsPointers + sl.X86EmitOffsetModRM(0xf7, (X86Reg)0x0, kECX, offsetof(MethodTable, m_dwFlags)); + sl.Emit32(MethodTable::enum_flag_ContainsPointers); + + CodeLabel *pointerLabel = sl.NewCodeLabel(); + + // jne pointerLabel + sl.X86EmitCondJump(pointerLabel, X86CondCode::kJNE); + + // We have no pointers - emit a simple inline copy loop + + // mov ecx, [ecx]MethodTable.m_BaseSize + sl.X86EmitOffsetModRM(0x8b, kECX, kECX, offsetof(MethodTable, m_BaseSize)); + + // sub ecx,12 + sl.X86EmitSubReg(kECX, 12); + + CodeLabel *loopLabel = sl.NewCodeLabel(); + + sl.EmitLabel(loopLabel); + + // mov edx,[ebx+ecx] + sl.X86EmitOp(0x8b, kEDX, kEBX, 0, kECX, 1); + + // mov [eax+ecx+4],edx + sl.X86EmitOp(0x89, kEDX, kEAX, 4, kECX, 1); + + // sub ecx,4 + sl.X86EmitSubReg(kECX, 4); + + // jg loopLabel + sl.X86EmitCondJump(loopLabel, X86CondCode::kJGE); + + sl.X86EmitPopReg(kEBX); + + sl.X86EmitReturn(0); + + // Arrive at this label if there are pointers in the object + sl.EmitLabel(pointerLabel); + + // Do call to CopyValueClassUnchecked(object, data, pMT) + + // Pass pMT (still in ECX) + sl.X86EmitPushReg(kECX); + + // Pass data (still in EBX) + sl.X86EmitPushReg(kEBX); + + // Save the address of the object just allocated + // mov ebx,eax + sl.Emit16(0xD88B); + + + // Pass address of first user byte in the newly allocated object + sl.X86EmitAddReg(kEAX, 4); + sl.X86EmitPushReg(kEAX); + + // call CopyValueClass + sl.X86EmitCall(sl.NewExternalCodeLabel((LPVOID) CopyValueClassUnchecked), 12); + + // Restore the address of the newly allocated object and return it. + // mov eax,ebx + sl.Emit16(0xC38B); + + sl.X86EmitPopReg(kEBX); + + sl.X86EmitReturn(0); + + // Come here in case of no space + sl.EmitLabel(noAlloc); + + // Release the lock in the uniprocessor case + EmitNoAllocCode(&sl, flags); + + // Come here in case of failure to get the lock + sl.EmitLabel(noLock); + + // Restore the address of the value to be boxed + // mov edx,ebx + sl.Emit16(0xD38B); + + // pop ebx + sl.X86EmitPopReg(kEBX); + + // Jump to the slow version of JIT_Box + sl.X86EmitNearJump(sl.NewExternalCodeLabel((LPVOID) JIT_Box)); + + Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()); + + return (void *)pStub->GetEntryPoint(); +} + + +HCIMPL2_RAW(Object*, UnframedAllocateObjectArray, /*TypeHandle*/PVOID ArrayType, DWORD cElements) +{ + // This isn't _really_ an FCALL and therefore shouldn't have the + // SO_TOLERANT part of the FCALL_CONTRACT b/c it is not entered + // from managed code. + CONTRACTL { + THROWS; + GC_TRIGGERS; + MODE_COOPERATIVE; + SO_INTOLERANT; + } CONTRACTL_END; + + return OBJECTREFToObject(AllocateArrayEx(TypeHandle::FromPtr(ArrayType), + (INT32 *)(&cElements), + 1, + FALSE + DEBUG_ARG(FALSE))); +} +HCIMPLEND_RAW + + +HCIMPL2_RAW(Object*, UnframedAllocatePrimitiveArray, CorElementType type, DWORD cElements) +{ + // This isn't _really_ an FCALL and therefore shouldn't have the + // SO_TOLERANT part of the FCALL_CONTRACT b/c it is not entered + // from managed code. + CONTRACTL { + THROWS; + GC_TRIGGERS; + MODE_COOPERATIVE; + SO_INTOLERANT; + } CONTRACTL_END; + + return OBJECTREFToObject( AllocatePrimitiveArray(type, cElements, FALSE) ); +} +HCIMPLEND_RAW + + +void *JIT_TrialAlloc::GenAllocArray(Flags flags) +{ + STANDARD_VM_CONTRACT; + + CPUSTUBLINKER sl; + + CodeLabel *noLock = sl.NewCodeLabel(); + CodeLabel *noAlloc = sl.NewCodeLabel(); + + // We were passed a type descriptor in ECX, which contains the (shared) + // array method table and the element type. + + // If this is the allocator for use from unmanaged code, ECX contains the + // element type descriptor, or the CorElementType. + + // We need to save ECX for later + + // push ecx + sl.X86EmitPushReg(kECX); + + // The element count is in EDX - we need to save it for later. + + // push edx + sl.X86EmitPushReg(kEDX); + + if (flags & NO_FRAME) + { + if (flags & OBJ_ARRAY) + { + // we need to load the true method table from the type desc + sl.X86EmitIndexRegLoad(kECX, kECX, offsetof(ArrayTypeDesc,m_TemplateMT)-2); + } + else + { + // mov ecx,[g_pPredefinedArrayTypes+ecx*4] + sl.Emit8(0x8b); + sl.Emit16(0x8d0c); + sl.Emit32((int)(size_t)&g_pPredefinedArrayTypes); + + // test ecx,ecx + sl.Emit16(0xc985); + + // je noLock + sl.X86EmitCondJump(noLock, X86CondCode::kJZ); + + // we need to load the true method table from the type desc + sl.X86EmitIndexRegLoad(kECX, kECX, offsetof(ArrayTypeDesc,m_TemplateMT)); + } + } + else + { + // we need to load the true method table from the type desc + sl.X86EmitIndexRegLoad(kECX, kECX, offsetof(ArrayTypeDesc,m_TemplateMT)-2); + +#ifdef FEATURE_PREJIT + CodeLabel *indir = sl.NewCodeLabel(); + + // test cl,1 + sl.Emit16(0xC1F6); + sl.Emit8(0x01); + + // je indir + sl.X86EmitCondJump(indir, X86CondCode::kJZ); + + // mov ecx, [ecx-1] + sl.X86EmitIndexRegLoad(kECX, kECX, -1); + + sl.EmitLabel(indir); +#endif + } + + // Do a conservative check here. This is to avoid doing overflow checks within this function. We'll + // still have to do a size check before running through the body of EmitCore. The way we do the check + // against the allocation quantum there requires that we not overflow when adding the size to the + // current allocation context pointer. There is exactly LARGE_OBJECT_SIZE of headroom there, so do that + // check before we EmitCore. + // + // For reference types, we can just pick the correct value of maxElems and skip the second check. + // + // By the way, we use 258 as a "slack" value to ensure that we don't overflow because of the size of the + // array header or alignment. + sl.Emit16(0xfa81); + + + // The large object heap is 8 byte aligned, so for double arrays we + // want to bias toward putting things in the large object heap + unsigned maxElems = 0xffff - 256; + + if ((flags & ALIGN8) && g_pConfig->GetDoubleArrayToLargeObjectHeapThreshold() < maxElems) + maxElems = g_pConfig->GetDoubleArrayToLargeObjectHeapThreshold(); + if (flags & OBJ_ARRAY) + { + //Since we know that the array elements are sizeof(OBJECTREF), set maxElems exactly here (use the + //same slack from above. + maxElems = min(maxElems, (LARGE_OBJECT_SIZE/sizeof(OBJECTREF)) - 256); + } + sl.Emit32(maxElems); + + + // jae noLock - seems tempting to jump to noAlloc, but we haven't taken the lock yet + sl.X86EmitCondJump(noLock, X86CondCode::kJAE); + + if (flags & OBJ_ARRAY) + { + // In this case we know the element size is sizeof(void *), or 4 for x86 + // This helps us in two ways - we can shift instead of multiplying, and + // there's no need to align the size either + + _ASSERTE(sizeof(void *) == 4); + + // mov eax, [ecx]MethodTable.m_BaseSize + sl.X86EmitIndexRegLoad(kEAX, kECX, offsetof(MethodTable, m_BaseSize)); + + // lea eax, [eax+edx*4] + sl.X86EmitOp(0x8d, kEAX, kEAX, 0, kEDX, 4); + } + else + { + // movzx eax, [ECX]MethodTable.m_dwFlags /* component size */ + sl.Emit8(0x0f); + sl.X86EmitOffsetModRM(0xb7, kEAX, kECX, offsetof(MethodTable, m_dwFlags /* component size */)); + + // mul eax, edx + sl.Emit16(0xe2f7); + + // add eax, [ecx]MethodTable.m_BaseSize + sl.X86EmitOffsetModRM(0x03, kEAX, kECX, offsetof(MethodTable, m_BaseSize)); + + // Since this is an array of value classes, we need an extra compare here to make sure we're still + // less than LARGE_OBJECT_SIZE. This is the last bit of arithmetic before we compare against the + // allocation context, so do it here. + + // cmp eax, LARGE_OBJECT_SIZE + // ja noLock + sl.Emit8(0x3d); + sl.Emit32(LARGE_OBJECT_SIZE); + sl.X86EmitCondJump(noLock, X86CondCode::kJA); + } + +#if DATA_ALIGNMENT == 4 + if (flags & OBJ_ARRAY) + { + // No need for rounding in this case - element size is 4, and m_BaseSize is guaranteed + // to be a multiple of 4. + } + else +#endif // DATA_ALIGNMENT == 4 + { + // round the size to a multiple of 4 + + // add eax, 3 + sl.X86EmitAddReg(kEAX, (DATA_ALIGNMENT-1)); + + // and eax, ~3 + sl.Emit16(0xe083); + sl.Emit8(~(DATA_ALIGNMENT-1)); + } + + flags = (Flags)(flags | SIZE_IN_EAX); + + // Emit the main body of the trial allocator, be it SP or MP + EmitCore(&sl, noLock, noAlloc, flags); + + // Here we are at the end of the success case - store element count + // and possibly the element type descriptor and return + + // pop edx - element count + sl.X86EmitPopReg(kEDX); + + // pop ecx - array type descriptor + sl.X86EmitPopReg(kECX); + + // mov dword ptr [eax]ArrayBase.m_NumComponents, edx + sl.X86EmitIndexRegStore(kEAX, offsetof(ArrayBase,m_NumComponents), kEDX); + +#if CHECK_APP_DOMAIN_LEAKS + EmitSetAppDomain(&sl); +#endif + + // no stack parameters + sl.X86EmitReturn(0); + + // Come here in case of no space + sl.EmitLabel(noAlloc); + + // Release the lock in the uniprocessor case + EmitNoAllocCode(&sl, flags); + + // Come here in case of failure to get the lock + sl.EmitLabel(noLock); + + // pop edx - element count + sl.X86EmitPopReg(kEDX); + + // pop ecx - array type descriptor + sl.X86EmitPopReg(kECX); + + CodeLabel * target; + if (flags & NO_FRAME) + { + if (flags & OBJ_ARRAY) + { + // Jump to the unframed helper + target = sl.NewExternalCodeLabel((LPVOID)UnframedAllocateObjectArray); + _ASSERTE(target->e.m_pExternalAddress); + } + else + { + // Jump to the unframed helper + target = sl.NewExternalCodeLabel((LPVOID)UnframedAllocatePrimitiveArray); + _ASSERTE(target->e.m_pExternalAddress); + } + } + else + { + // Jump to the framed helper + target = sl.NewExternalCodeLabel((LPVOID)JIT_NewArr1); + _ASSERTE(target->e.m_pExternalAddress); + } + sl.X86EmitNearJump(target); + + Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()); + + return (void *)pStub->GetEntryPoint(); +} + + +void *JIT_TrialAlloc::GenAllocString(Flags flags) +{ + STANDARD_VM_CONTRACT; + + CPUSTUBLINKER sl; + + CodeLabel *noLock = sl.NewCodeLabel(); + CodeLabel *noAlloc = sl.NewCodeLabel(); + + // We were passed the number of characters in ECX + + // push ecx + sl.X86EmitPushReg(kECX); + + // mov eax, ecx + sl.Emit16(0xc18b); + + // we need to load the method table for string from the global + + // mov ecx, [g_pStringMethodTable] + sl.Emit16(0x0d8b); + sl.Emit32((int)(size_t)&g_pStringClass); + + // Instead of doing elaborate overflow checks, we just limit the number of elements + // to (LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR) or less. + // This will avoid all overflow problems, as well as making sure + // big string objects are correctly allocated in the big object heap. + + _ASSERTE(sizeof(WCHAR) == 2); + + // cmp edx,(LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR) + sl.Emit16(0xf881); + sl.Emit32((LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR)); + + // jae noLock - seems tempting to jump to noAlloc, but we haven't taken the lock yet + sl.X86EmitCondJump(noLock, X86CondCode::kJAE); + + // mov edx, [ecx]MethodTable.m_BaseSize + sl.X86EmitIndexRegLoad(kEDX, kECX, offsetof(MethodTable,m_BaseSize)); + + // Calculate the final size to allocate. + // We need to calculate baseSize + cnt*2, then round that up by adding 3 and anding ~3. + + // lea eax, [edx+eax*2+5] + sl.X86EmitOp(0x8d, kEAX, kEDX, (DATA_ALIGNMENT-1), kEAX, 2); + + // and eax, ~3 + sl.Emit16(0xe083); + sl.Emit8(~(DATA_ALIGNMENT-1)); + + flags = (Flags)(flags | SIZE_IN_EAX); + + // Emit the main body of the trial allocator, be it SP or MP + EmitCore(&sl, noLock, noAlloc, flags); + + // Here we are at the end of the success case - store element count + // and possibly the element type descriptor and return + + // pop ecx - element count + sl.X86EmitPopReg(kECX); + + // mov dword ptr [eax]ArrayBase.m_StringLength, ecx + sl.X86EmitIndexRegStore(kEAX, offsetof(StringObject,m_StringLength), kECX); + +#if CHECK_APP_DOMAIN_LEAKS + EmitSetAppDomain(&sl); +#endif + + // no stack parameters + sl.X86EmitReturn(0); + + // Come here in case of no space + sl.EmitLabel(noAlloc); + + // Release the lock in the uniprocessor case + EmitNoAllocCode(&sl, flags); + + // Come here in case of failure to get the lock + sl.EmitLabel(noLock); + + // pop ecx - element count + sl.X86EmitPopReg(kECX); + + CodeLabel * target; + if (flags & NO_FRAME) + { + // Jump to the unframed helper + target = sl.NewExternalCodeLabel((LPVOID)UnframedAllocateString); + } + else + { + // Jump to the framed helper + target = sl.NewExternalCodeLabel((LPVOID)FramedAllocateString); + } + sl.X86EmitNearJump(target); + + Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()); + + return (void *)pStub->GetEntryPoint(); +} + + +FastStringAllocatorFuncPtr fastStringAllocator = UnframedAllocateString; + +FastObjectArrayAllocatorFuncPtr fastObjectArrayAllocator = UnframedAllocateObjectArray; + +FastPrimitiveArrayAllocatorFuncPtr fastPrimitiveArrayAllocator = UnframedAllocatePrimitiveArray; + +// For this helper, +// If bCCtorCheck == true +// ECX contains the domain neutral module ID +// EDX contains the class domain ID, and the +// else +// ECX contains the domain neutral module ID +// EDX is junk +// shared static base is returned in EAX. + +// "init" should be the address of a routine which takes an argument of +// the module domain ID, the class domain ID, and returns the static base pointer +void EmitFastGetSharedStaticBase(CPUSTUBLINKER *psl, CodeLabel *init, bool bCCtorCheck, bool bGCStatic, bool bSingleAppDomain) +{ + STANDARD_VM_CONTRACT; + + CodeLabel *DoInit = 0; + if (bCCtorCheck) + { + DoInit = psl->NewCodeLabel(); + } + + // mov eax, ecx + psl->Emit8(0x89); + psl->Emit8(0xc8); + + if(!bSingleAppDomain) + { + // Check tag + CodeLabel *cctorCheck = psl->NewCodeLabel(); + + + // test eax, 1 + psl->Emit8(0xa9); + psl->Emit32(1); + + // jz cctorCheck + psl->X86EmitCondJump(cctorCheck, X86CondCode::kJZ); + + // mov eax GetAppDomain() + psl->X86EmitCurrentAppDomainFetch(kEAX, (1<m_sDomainLocalBlock.m_pModuleSlots] + psl->X86EmitIndexRegLoad(kEAX, kEAX, (__int32) AppDomain::GetOffsetOfModuleSlotsPointer()); + + // Note: weird address arithmetic effectively does: + // shift over 1 to remove tag bit (which is always 1), then multiply by 4. + // mov eax [eax + ecx*2 - 2] + psl->X86EmitOp(0x8b, kEAX, kEAX, -2, kECX, 2); + + // cctorCheck: + psl->EmitLabel(cctorCheck); + + } + + if (bCCtorCheck) + { + // test [eax + edx + offsetof(DomainLocalModule, m_pDataBlob], ClassInitFlags::INITIALIZED_FLAG // Is class inited + _ASSERTE(FitsInI1(ClassInitFlags::INITIALIZED_FLAG)); + _ASSERTE(FitsInI1(DomainLocalModule::GetOffsetOfDataBlob())); + + BYTE testClassInit[] = { 0xF6, 0x44, 0x10, + (BYTE) DomainLocalModule::GetOffsetOfDataBlob(), (BYTE)ClassInitFlags::INITIALIZED_FLAG }; + + psl->EmitBytes(testClassInit, sizeof(testClassInit)); + + // jz init // no, init it + psl->X86EmitCondJump(DoInit, X86CondCode::kJZ); + } + + if (bGCStatic) + { + // Indirect to get the pointer to the first GC Static + psl->X86EmitIndexRegLoad(kEAX, kEAX, (__int32) DomainLocalModule::GetOffsetOfGCStaticPointer()); + } + + // ret + psl->X86EmitReturn(0); + + if (bCCtorCheck) + { + // DoInit: + psl->EmitLabel(DoInit); + + // push edx (must be preserved) + psl->X86EmitPushReg(kEDX); + + // call init + psl->X86EmitCall(init, 0); + + // pop edx + psl->X86EmitPopReg(kEDX); + + // ret + psl->X86EmitReturn(0); + } + +} + +void *GenFastGetSharedStaticBase(bool bCheckCCtor, bool bGCStatic, bool bSingleAppDomain) +{ + STANDARD_VM_CONTRACT; + + CPUSTUBLINKER sl; + + CodeLabel *init; + if (bGCStatic) + { + init = sl.NewExternalCodeLabel((LPVOID)JIT_GetSharedGCStaticBase); + } + else + { + init = sl.NewExternalCodeLabel((LPVOID)JIT_GetSharedNonGCStaticBase); + } + + EmitFastGetSharedStaticBase(&sl, init, bCheckCCtor, bGCStatic, bSingleAppDomain); + + Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()); + + return (void*) pStub->GetEntryPoint(); +} + + +#ifdef ENABLE_FAST_GCPOLL_HELPER +void EnableJitGCPoll() +{ + SetJitHelperFunction(CORINFO_HELP_POLL_GC, (void*)JIT_PollGC); +} +void DisableJitGCPoll() +{ + SetJitHelperFunction(CORINFO_HELP_POLL_GC, (void*)JIT_PollGC_Nop); +} +#endif + +#define NUM_WRITE_BARRIERS 6 + +static const BYTE c_rgWriteBarrierRegs[NUM_WRITE_BARRIERS] = { + 0, // EAX + 1, // ECX + 3, // EBX + 6, // ESI + 7, // EDI + 5, // EBP +}; + +static const void * const c_rgWriteBarriers[NUM_WRITE_BARRIERS] = { + (void *)JIT_WriteBarrierEAX, + (void *)JIT_WriteBarrierECX, + (void *)JIT_WriteBarrierEBX, + (void *)JIT_WriteBarrierESI, + (void *)JIT_WriteBarrierEDI, + (void *)JIT_WriteBarrierEBP, +}; + +#ifdef WRITE_BARRIER_CHECK +static const void * const c_rgDebugWriteBarriers[NUM_WRITE_BARRIERS] = { + (void *)JIT_DebugWriteBarrierEAX, + (void *)JIT_DebugWriteBarrierECX, + (void *)JIT_DebugWriteBarrierEBX, + (void *)JIT_DebugWriteBarrierESI, + (void *)JIT_DebugWriteBarrierEDI, + (void *)JIT_DebugWriteBarrierEBP, +}; +#endif // WRITE_BARRIER_CHECK + +#define DEBUG_RANDOM_BARRIER_CHECK DbgGetEXETimeStamp() % 7 == 4 + +/*********************************************************************/ +// Initialize the part of the JIT helpers that require very little of +// EE infrastructure to be in place. +/*********************************************************************/ +void InitJITHelpers1() +{ + STANDARD_VM_CONTRACT; + +#define ETW_NUM_JIT_HELPERS 10 + static const LPCWSTR pHelperNames[ETW_NUM_JIT_HELPERS] = { + W("@NewObject"), + W("@NewObjectAlign8"), + W("@Box"), + W("@NewArray1Object"), + W("@NewArray1ValueType"), + W("@NewArray1ObjectAlign8"), + W("@StaticBaseObject"), + W("@StaticBaseNonObject"), + W("@StaticBaseObjectNoCCtor"), + W("@StaticBaseNonObjectNoCCtor") + }; + + PVOID pMethodAddresses[ETW_NUM_JIT_HELPERS]={0}; + + _ASSERTE(g_SystemInfo.dwNumberOfProcessors != 0); + + JIT_TrialAlloc::Flags flags = GCHeap::UseAllocationContexts() ? + JIT_TrialAlloc::MP_ALLOCATOR : JIT_TrialAlloc::NORMAL; + + // Get CPU features and check for SSE2 support. + // This code should eventually probably be moved into codeman.cpp, + // where we set the cpu feature flags for the JIT based on CPU type and features. + DWORD dwCPUFeaturesECX; + DWORD dwCPUFeaturesEDX; + + __asm + { + pushad + mov eax, 1 + cpuid + mov dwCPUFeaturesECX, ecx + mov dwCPUFeaturesEDX, edx + popad + } + + // If bit 26 (SSE2) is set, then we can use the SSE2 flavors + // and faster x87 implementation for the P4 of Dbl2Lng. + if (dwCPUFeaturesEDX & (1<<26)) + { + SetJitHelperFunction(CORINFO_HELP_DBL2INT, JIT_Dbl2IntSSE2); + if (dwCPUFeaturesECX & 1) // check SSE3 + { + SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngSSE3); + SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngSSE3); + } + else + { + SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngP4x87); // SSE2 only for signed + SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngP4x87); + } + } + + if (!(TrackAllocationsEnabled() + || LoggingOn(LF_GCALLOC, LL_INFO10) +#ifdef _DEBUG + || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0) +#endif + ) + ) + { + // Replace the slow helpers with faster version + + pMethodAddresses[0] = JIT_TrialAlloc::GenAllocSFast(flags); + SetJitHelperFunction(CORINFO_HELP_NEWSFAST, pMethodAddresses[0]); + pMethodAddresses[1] = JIT_TrialAlloc::GenAllocSFast((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::ALIGN8 | JIT_TrialAlloc::ALIGN8OBJ)); + SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, pMethodAddresses[1]); + pMethodAddresses[2] = JIT_TrialAlloc::GenBox(flags); + SetJitHelperFunction(CORINFO_HELP_BOX, pMethodAddresses[2]); + pMethodAddresses[3] = JIT_TrialAlloc::GenAllocArray((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::OBJ_ARRAY)); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, pMethodAddresses[3]); + pMethodAddresses[4] = JIT_TrialAlloc::GenAllocArray(flags); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, pMethodAddresses[4]); + pMethodAddresses[5] = JIT_TrialAlloc::GenAllocArray((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::ALIGN8)); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_ALIGN8, pMethodAddresses[5]); + + fastObjectArrayAllocator = (FastObjectArrayAllocatorFuncPtr)JIT_TrialAlloc::GenAllocArray((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::NO_FRAME|JIT_TrialAlloc::OBJ_ARRAY)); + fastPrimitiveArrayAllocator = (FastPrimitiveArrayAllocatorFuncPtr)JIT_TrialAlloc::GenAllocArray((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::NO_FRAME)); + + // If allocation logging is on, then we divert calls to FastAllocateString to an Ecall method, not this + // generated method. Find this workaround in Ecall::Init() in ecall.cpp. + ECall::DynamicallyAssignFCallImpl((PCODE) JIT_TrialAlloc::GenAllocString(flags), ECall::FastAllocateString); + + // generate another allocator for use from unmanaged code (won't need a frame) + fastStringAllocator = (FastStringAllocatorFuncPtr) JIT_TrialAlloc::GenAllocString((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::NO_FRAME)); + //UnframedAllocateString; + } + + bool bSingleAppDomain = IsSingleAppDomain(); + + // Replace static helpers with faster assembly versions + pMethodAddresses[6] = GenFastGetSharedStaticBase(true, true, bSingleAppDomain); + SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE, pMethodAddresses[6]); + pMethodAddresses[7] = GenFastGetSharedStaticBase(true, false, bSingleAppDomain); + SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE, pMethodAddresses[7]); + pMethodAddresses[8] = GenFastGetSharedStaticBase(false, true, bSingleAppDomain); + SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR, pMethodAddresses[8]); + pMethodAddresses[9] = GenFastGetSharedStaticBase(false, false, bSingleAppDomain); + SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR, pMethodAddresses[9]); + + ETW::MethodLog::StubsInitialized(pMethodAddresses, (PVOID *)pHelperNames, ETW_NUM_JIT_HELPERS); + +#ifdef ENABLE_FAST_GCPOLL_HELPER + // code:JIT_PollGC_Nop + SetJitHelperFunction(CORINFO_HELP_POLL_GC, (void*)JIT_PollGC_Nop); +#endif //ENABLE_FAST_GCPOLL_HELPER + + // All write barrier helpers should fit into one page. + // If you hit this assert on retail build, there is most likely problem with BBT script. + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (BYTE*)JIT_WriteBarrierLast - (BYTE*)JIT_WriteBarrierStart < PAGE_SIZE); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (BYTE*)JIT_PatchedWriteBarrierLast - (BYTE*)JIT_PatchedWriteBarrierStart < PAGE_SIZE); + + // Copy the write barriers to their final resting place. + for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++) + { + BYTE * pfunc = (BYTE *) JIT_WriteBarrierReg_PreGrow; + + BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier]; + int reg = c_rgWriteBarrierRegs[iBarrier]; + + memcpy(pBuf, pfunc, 34); + + // assert the copied code ends in a ret to make sure we got the right length + _ASSERTE(pBuf[33] == 0xC3); + + // We need to adjust registers in a couple of instructions + // It would be nice to have the template contain all zeroes for + // the register fields (corresponding to EAX), but that doesn't + // work because then we get a smaller encoding for the compares + // that only works for EAX but not the other registers. + // So we always have to clear the register fields before updating them. + + // First instruction to patch is a mov [edx], reg + + _ASSERTE(pBuf[0] == 0x89); + // Update the reg field (bits 3..5) of the ModR/M byte of this instruction + pBuf[1] &= 0xc7; + pBuf[1] |= reg << 3; + + // Second instruction to patch is cmp reg, imm32 (low bound) + + _ASSERTE(pBuf[2] == 0x81); + // Here the lowest three bits in ModR/M field are the register + pBuf[3] &= 0xf8; + pBuf[3] |= reg; + +#ifdef WRITE_BARRIER_CHECK + // Don't do the fancy optimization just jump to the old one + // Use the slow one from time to time in a debug build because + // there are some good asserts in the unoptimized one + if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DEBUG_RANDOM_BARRIER_CHECK) { + pfunc = &pBuf[0]; + *pfunc++ = 0xE9; // JMP c_rgDebugWriteBarriers[iBarrier] + *((DWORD*) pfunc) = (BYTE*) c_rgDebugWriteBarriers[iBarrier] - (pfunc + sizeof(DWORD)); + } +#endif // WRITE_BARRIER_CHECK + } + +#ifndef CODECOVERAGE + ValidateWriteBarrierHelpers(); +#endif + + // Leave the patched region writable for StompWriteBarrierEphemeral(), StompWriteBarrierResize() + // and CTPMethodTable::ActivatePrecodeRemotingThunk + + // Initialize g_TailCallFrameVptr for JIT_TailCall helper + g_TailCallFrameVptr = (void*)TailCallFrame::GetMethodFrameVPtr(); +} + +// these constans are offsets into our write barrier helpers for values that get updated as the bounds of the managed heap change. +// ephemeral region +const int AnyGrow_EphemeralLowerBound = 4; // offset is the same for both pre and post grow functions +const int PostGrow_EphemeralUpperBound = 12; + +// card table +const int PreGrow_CardTableFirstLocation = 16; +const int PreGrow_CardTableSecondLocation = 28; +const int PostGrow_CardTableFirstLocation = 24; +const int PostGrow_CardTableSecondLocation = 36; + + +#ifndef CODECOVERAGE // Deactivate alignment validation for code coverage builds + // because the instrumented binaries will not preserve alignmant constraits and we will fail. + +void ValidateWriteBarrierHelpers() +{ + // we have an invariant that the addresses of all the values that we update in our write barrier + // helpers must be naturally aligned, this is so that the update can happen atomically since there + // are places where we update these values while the EE is running + +#ifdef WRITE_BARRIER_CHECK + // write barrier checking uses the slower helpers that we don't bash so there is no need for validation + if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DEBUG_RANDOM_BARRIER_CHECK) + return; +#endif // WRITE_BARRIER_CHECK + + // first validate the PreGrow helper + BYTE* pWriteBarrierFunc = reinterpret_cast(JIT_WriteBarrierEAX); + + // ephemeral region + DWORD* pLocation = reinterpret_cast(&pWriteBarrierFunc[AnyGrow_EphemeralLowerBound]); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast(pLocation) & 0x3) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0); + + // card table + pLocation = reinterpret_cast(&pWriteBarrierFunc[PreGrow_CardTableFirstLocation]); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast(pLocation) & 0x3) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0); + pLocation = reinterpret_cast(&pWriteBarrierFunc[PreGrow_CardTableSecondLocation]); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast(pLocation) & 0x3) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0); + + // now validate the PostGrow helper + pWriteBarrierFunc = reinterpret_cast(JIT_WriteBarrierReg_PostGrow); + + // ephemeral region + pLocation = reinterpret_cast(&pWriteBarrierFunc[AnyGrow_EphemeralLowerBound]); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast(pLocation) & 0x3) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0); + pLocation = reinterpret_cast(&pWriteBarrierFunc[PostGrow_EphemeralUpperBound]); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast(pLocation) & 0x3) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0); + + // card table + pLocation = reinterpret_cast(&pWriteBarrierFunc[PostGrow_CardTableFirstLocation]); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast(pLocation) & 0x3) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0); + pLocation = reinterpret_cast(&pWriteBarrierFunc[PostGrow_CardTableSecondLocation]); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", (reinterpret_cast(pLocation) & 0x3) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/i386/JITinterfaceX86.cpp", *pLocation == 0xf0f0f0f0); +} + +#endif //CODECOVERAGE +/*********************************************************************/ + +#define WriteBarrierIsPreGrow() (((BYTE *)JIT_WriteBarrierEAX)[10] == 0xc1) + + +/*********************************************************************/ +// When a GC happens, the upper and lower bounds of the ephemeral +// generation change. This routine updates the WriteBarrier thunks +// with the new values. +void StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + } CONTRACTL_END; + +#ifdef WRITE_BARRIER_CHECK + // Don't do the fancy optimization if we are checking write barrier + if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9) // we are using slow write barrier + return; +#endif // WRITE_BARRIER_CHECK + + BOOL flushICache = FALSE; + + // Update the lower bound. + for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++) + { + BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier]; + + // assert there is in fact a cmp r/m32, imm32 there + _ASSERTE(pBuf[2] == 0x81); + + // Update the immediate which is the lower bound of the ephemeral generation + size_t *pfunc = (size_t *) &pBuf[AnyGrow_EphemeralLowerBound]; + //avoid trivial self modifying code + if (*pfunc != (size_t) g_ephemeral_low) + { + flushICache = TRUE; + *pfunc = (size_t) g_ephemeral_low; + } + if (!WriteBarrierIsPreGrow()) + { + // assert there is in fact a cmp r/m32, imm32 there + _ASSERTE(pBuf[10] == 0x81); + + // Update the upper bound if we are using the PostGrow thunk. + pfunc = (size_t *) &pBuf[PostGrow_EphemeralUpperBound]; + //avoid trivial self modifying code + if (*pfunc != (size_t) g_ephemeral_high) + { + flushICache = TRUE; + *pfunc = (size_t) g_ephemeral_high; + } + } + } + + if (flushICache) + FlushInstructionCache(GetCurrentProcess(), (void *)JIT_PatchedWriteBarrierStart, + (BYTE*)JIT_PatchedWriteBarrierLast - (BYTE*)JIT_PatchedWriteBarrierStart); +} + +/*********************************************************************/ +// When the GC heap grows, the ephemeral generation may no longer +// be after the older generations. If this happens, we need to switch +// to the PostGrow thunk that checks both upper and lower bounds. +// regardless we need to update the thunk with the +// card_table - lowest_address. +void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) +{ + CONTRACTL { + NOTHROW; + if (GetThread()) {GC_TRIGGERS;} else {GC_NOTRIGGER;} + } CONTRACTL_END; + +#ifdef WRITE_BARRIER_CHECK + // Don't do the fancy optimization if we are checking write barrier + if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9) // we are using slow write barrier + return; +#endif // WRITE_BARRIER_CHECK + + bool bWriteBarrierIsPreGrow = WriteBarrierIsPreGrow(); + bool bStompWriteBarrierEphemeral = false; + + BOOL bEESuspendedHere = FALSE; + + for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++) + { + BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier]; + int reg = c_rgWriteBarrierRegs[iBarrier]; + + size_t *pfunc; + + // Check if we are still using the pre-grow version of the write barrier. + if (bWriteBarrierIsPreGrow) + { + // Check if we need to use the upper bounds checking barrier stub. + if (bReqUpperBoundsCheck) + { + GCX_MAYBE_COOP_NO_THREAD_BROKEN((GetThread()!=NULL)); + if( !isRuntimeSuspended && !bEESuspendedHere) { + ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_FOR_GC_PREP); + bEESuspendedHere = TRUE; + } + + pfunc = (size_t *) JIT_WriteBarrierReg_PostGrow; + memcpy(pBuf, pfunc, 42); + + // assert the copied code ends in a ret to make sure we got the right length + _ASSERTE(pBuf[41] == 0xC3); + + // We need to adjust registers in a couple of instructions + // It would be nice to have the template contain all zeroes for + // the register fields (corresponding to EAX), but that doesn't + // work because then we get a smaller encoding for the compares + // that only works for EAX but not the other registers + // So we always have to clear the register fields before updating them. + + // First instruction to patch is a mov [edx], reg + + _ASSERTE(pBuf[0] == 0x89); + // Update the reg field (bits 3..5) of the ModR/M byte of this instruction + pBuf[1] &= 0xc7; + pBuf[1] |= reg << 3; + + // Second instruction to patch is cmp reg, imm32 (low bound) + + _ASSERTE(pBuf[2] == 0x81); + // Here the lowest three bits in ModR/M field are the register + pBuf[3] &= 0xf8; + pBuf[3] |= reg; + + // Third instruction to patch is another cmp reg, imm32 (high bound) + + _ASSERTE(pBuf[10] == 0x81); + // Here the lowest three bits in ModR/M field are the register + pBuf[11] &= 0xf8; + pBuf[11] |= reg; + + bStompWriteBarrierEphemeral = true; + // What we're trying to update is the offset field of a + + // cmp offset[edx], 0ffh instruction + _ASSERTE(pBuf[22] == 0x80); + pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation]; + *pfunc = (size_t) g_card_table; + + // What we're trying to update is the offset field of a + // mov offset[edx], 0ffh instruction + _ASSERTE(pBuf[34] == 0xC6); + pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation]; + + } + else + { + // What we're trying to update is the offset field of a + + // cmp offset[edx], 0ffh instruction + _ASSERTE(pBuf[14] == 0x80); + pfunc = (size_t *) &pBuf[PreGrow_CardTableFirstLocation]; + *pfunc = (size_t) g_card_table; + + // What we're trying to update is the offset field of a + + // mov offset[edx], 0ffh instruction + _ASSERTE(pBuf[26] == 0xC6); + pfunc = (size_t *) &pBuf[PreGrow_CardTableSecondLocation]; + } + } + else + { + // What we're trying to update is the offset field of a + + // cmp offset[edx], 0ffh instruction + _ASSERTE(pBuf[22] == 0x80); + pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation]; + *pfunc = (size_t) g_card_table; + + // What we're trying to update is the offset field of a + // mov offset[edx], 0ffh instruction + _ASSERTE(pBuf[34] == 0xC6); + pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation]; + } + + // Stick in the adjustment value. + *pfunc = (size_t) g_card_table; + } + + if (bStompWriteBarrierEphemeral) + { + _ASSERTE(isRuntimeSuspended || bEESuspendedHere); + StompWriteBarrierEphemeral(true); + } + else + { + FlushInstructionCache(GetCurrentProcess(), (void *)JIT_PatchedWriteBarrierStart, + (BYTE*)JIT_PatchedWriteBarrierLast - (BYTE*)JIT_PatchedWriteBarrierStart); + } + + if(bEESuspendedHere) + ThreadSuspend::RestartEE(FALSE, TRUE); +} + diff --git a/src/vm/i386/profiler.cpp b/src/vm/i386/profiler.cpp new file mode 100644 index 0000000000..11d4247aef --- /dev/null +++ b/src/vm/i386/profiler.cpp @@ -0,0 +1,336 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// FILE: profiler.cpp +// + +// + +// +// ====================================================================================== + +#include "common.h" + +#ifdef PROFILING_SUPPORTED +#include "proftoeeinterfaceimpl.h" + +// +// The following structure is the format on x86 builds of the data +// being passed in plaformSpecificHandle for ProfileEnter/Leave/Tailcall +// +typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA +{ + FunctionID functionId; + DWORD doubleBuffer1; + DWORD doubleBuffer2; + DWORD floatBuffer; + DWORD floatingPointValuePresent; + UINT_PTR eax; // eax and edx must be continuous in this structure to make getting 64 bit return values easier. + UINT_PTR edx; + UINT_PTR ecx; + UINT_PTR esp; + UINT_PTR ip; +} PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA; + + +/* + * ProfileGetIPFromPlatformSpecificHandle + * + * This routine takes the platformSpecificHandle and retrieves from it the + * IP value. + * + * Parameters: + * handle - the platformSpecificHandle passed to ProfileEnter/Leave/Tailcall + * + * Returns: + * The IP value stored in the handle. + */ +UINT_PTR ProfileGetIPFromPlatformSpecificHandle(void *handle) +{ + LIMITED_METHOD_CONTRACT; + + return ((PROFILE_PLATFORM_SPECIFIC_DATA *)handle)->ip; +} + + +/* + * ProfileSetFunctionIDInPlatformSpecificHandle + * + * This routine takes the platformSpecificHandle and functionID, and assign + * functionID to functionID field of platformSpecificHandle. + * + * Parameters: + * pPlatformSpecificHandle - the platformSpecificHandle passed to ProfileEnter/Leave/Tailcall + * functionID - the FunctionID to be assigned + * + * Returns: + * None + */ +void ProfileSetFunctionIDInPlatformSpecificHandle(void * pPlatformSpecificHandle, FunctionID functionID) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE(pPlatformSpecificHandle != NULL); + _ASSERTE(functionID != NULL); + + PROFILE_PLATFORM_SPECIFIC_DATA * pData = reinterpret_cast(pPlatformSpecificHandle); + pData->functionId = functionID; +} + +/* + * ProfileArgIterator::ProfileArgIterator + * + * Constructor. Initializes for arg iteration. + * + * Parameters: + * pMetaSig - The signature of the method we are going iterate over + * platformSpecificHandle - the value passed to ProfileEnter/Leave/Tailcall + * + * Returns: + * None. + */ +ProfileArgIterator::ProfileArgIterator(MetaSig * pMetaSig, void * platformSpecificHandle): + m_argIterator(pMetaSig) +{ + // + // It would be really nice to contract this, but the underlying functions are convolutedly + // contracted. Basically everything should be loaded by the time the profiler gets a call + // back, so everything is NOTHROW/NOTRIGGER, but there is not mechanism for saying that the + // contracts in called functions should be for the best case, not the worst case, now. + // + WRAPPER_NO_CONTRACT; + + m_handle = platformSpecificHandle; +} + +/* + * ProfileArgIterator::~ProfileArgIterator + * + * Destructor, releases all resources. + * + */ +ProfileArgIterator::~ProfileArgIterator() +{ + LIMITED_METHOD_CONTRACT; +} + +/* + * ProfileArgIterator::GetNextArgAddr + * + * After initialization, this method is called repeatedly until it + * returns NULL to get the address of each arg. Note: this address + * could be anywhere on the stack. + * + * Returns: + * Address of the argument, or NULL if iteration is complete. + */ +LPVOID ProfileArgIterator::GetNextArgAddr() +{ + // + // It would be really nice to contract this, but the underlying functions are convolutedly + // contracted. Basically everything should be loaded by the time the profiler gets a call + // back, so everything is NOTHROW/NOTRIGGER, but there is not mechanism for saying that the + // contracts in called functions should be for the best case, not the worst case, now. + // + WRAPPER_NO_CONTRACT; + + int argOffset = m_argIterator.GetNextOffset(); + + // + // Value is enregistered, figure out where and return that. + // + PROFILE_PLATFORM_SPECIFIC_DATA *pData = (PROFILE_PLATFORM_SPECIFIC_DATA *)m_handle; + + // + // Zero indicates the end of the args. + // + if (argOffset == TransitionBlock::InvalidOffset) + { + return NULL; + } + + if (pData == NULL) + { + // + // Something wrong. + // + _ASSERTE(!"Why do we have a NULL data pointer here?"); + return NULL; + } + + // + // If this is not enregistered, return the value + // + if (TransitionBlock::IsStackArgumentOffset(argOffset)) + { + return ((LPBYTE)pData->esp) + (argOffset - TransitionBlock::GetOffsetOfArgs()); + } + + switch (argOffset - TransitionBlock::GetOffsetOfArgumentRegisters()) + { + case offsetof(ArgumentRegisters, ECX): + return &(pData->ecx); + case offsetof(ArgumentRegisters, EDX): + return &(pData->edx); + } + + _ASSERTE(!"Arg is an unsaved register!"); + return NULL; +} + +/* + * ProfileArgIterator::GetHiddenArgValue + * + * Called after initialization, any number of times, to retrieve any + * hidden argument, so that resolution for Generics can be done. + * + * Parameters: + * None. + * + * Returns: + * Value of the hidden parameter, or NULL if none exists. + */ +LPVOID ProfileArgIterator::GetHiddenArgValue(void) +{ + // + // It would be really nice to contract this, but the underlying functions are convolutedly + // contracted. Basically everything should be loaded by the time the profiler gets a call + // back, so everything is NOTHROW/NOTRIGGER, but there is not mechanism for saying that the + // contracts in called functions should be for the best case, not the worst case, now. + // + WRAPPER_NO_CONTRACT; + + PROFILE_PLATFORM_SPECIFIC_DATA *pData = (PROFILE_PLATFORM_SPECIFIC_DATA *)m_handle; + + MethodDesc *pMethodDesc = FunctionIdToMethodDesc(pData->functionId); + + if (!pMethodDesc->RequiresInstArg()) + { + return NULL; + } + + // + // The ArgIterator::GetParamTypeOffset() can only be called after calling GetNextOffset until the + // entire signature has been walked, but *before* GetNextOffset returns TransitionBlock::InvalidOffset + // - indicating the end. + // + + // + // Get the offset of the hidden arg + // + int argOffset = m_argIterator.GetParamTypeArgOffset(); + + // + // If this is not enregistered, return the value + // + if (TransitionBlock::IsStackArgumentOffset(argOffset)) + { + return *(LPVOID *)(((LPBYTE)pData->esp) + (argOffset - TransitionBlock::GetOffsetOfArgs())); + } + + switch (argOffset - TransitionBlock::GetOffsetOfArgumentRegisters()) + { + case offsetof(ArgumentRegisters, ECX): + return (LPVOID)(pData->ecx); + case offsetof(ArgumentRegisters, EDX): + return (LPVOID)(pData->edx); + } + + _ASSERTE(!"Arg is an unsaved register!"); + return NULL; +} + +/* + * ProfileArgIterator::GetThis + * + * Called after initialization, any number of times, to retrieve the + * value of 'this'. + * + * Parameters: + * None. + * + * Returns: + * value of the 'this' parameter, or NULL if none exists. + */ +LPVOID ProfileArgIterator::GetThis(void) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + PROFILE_PLATFORM_SPECIFIC_DATA *pData = (PROFILE_PLATFORM_SPECIFIC_DATA *)m_handle; + + if (pData->ip == 0) + { + return NULL; + } + + if (!m_argIterator.HasThis()) + { + return NULL; + } + + switch (offsetof(ArgumentRegisters, THIS_REG)) + { + case offsetof(ArgumentRegisters, ECX): + return (LPVOID)pData->ecx; + + case offsetof(ArgumentRegisters, EDX): + return (LPVOID)pData->edx; + } + + _ASSERTE(!"This is an unsaved register!"); + return NULL; +} + + + +/* + * ProfileArgIterator::GetReturnBufferAddr + * + * Called after initialization, any number of times, to retrieve the + * address of the return buffer. NULL indicates no return value. + * + * Parameters: + * None. + * + * Returns: + * Address of the return buffer, or NULL if none exists. + */ +LPVOID ProfileArgIterator::GetReturnBufferAddr(void) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + PROFILE_PLATFORM_SPECIFIC_DATA *pData = (PROFILE_PLATFORM_SPECIFIC_DATA *)m_handle; + + if (m_argIterator.HasRetBuffArg()) + { + return (void *)(pData->eax); + } + + switch (m_argIterator.GetSig()->GetReturnType()) + { + case ELEMENT_TYPE_R8: + _ASSERTE(pData->floatingPointValuePresent); + return (void *)(&(pData->doubleBuffer1)); + + case ELEMENT_TYPE_R4: + _ASSERTE(pData->floatingPointValuePresent); + return (void *)(&(pData->floatBuffer)); + + default: + return &(pData->eax); + } +} + +#endif // PROFILING_SUPPORTED + diff --git a/src/vm/i386/remotingx86.cpp b/src/vm/i386/remotingx86.cpp new file mode 100644 index 0000000000..3a9e891267 --- /dev/null +++ b/src/vm/i386/remotingx86.cpp @@ -0,0 +1,225 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// +// +// File: remotingx86.cpp +// + +// +// +// Purpose: Defines various remoting related functions for the x86 architecture +// + +// +// + +// + +#include "common.h" + +#ifdef FEATURE_REMOTING + +#include "excep.h" +#include "comdelegate.h" +#include "remoting.h" +#include "field.h" +#include "siginfo.hpp" +#include "stackbuildersink.h" +#include "threads.h" +#include "method.hpp" +#include "asmconstants.h" +#include "interoputil.h" +#include "virtualcallstub.h" + +#ifdef FEATURE_COMINTEROP +#include "comcallablewrapper.h" +#include "comcache.h" +#endif // FEATURE_COMINTEROP + +//+---------------------------------------------------------------------------- +// +// Method: CTPMethodTable::CreateThunkForVirtualMethod private +// +// Synopsis: Creates the thunk that pushes the supplied slot number and jumps +// to TP Stub +// +//+---------------------------------------------------------------------------- +PCODE CTPMethodTable::CreateThunkForVirtualMethod(DWORD dwSlot, BYTE *startaddr) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(CheckPointer(startaddr)); + } + CONTRACTL_END; + + BYTE *pCode = startaddr; + + // 0000 B8 67 45 23 01 MOV EAX, dwSlot + // 0005 E9 ?? ?? ?? ?? JMP TransparentProxyStub + *pCode++ = 0xB8; + *((DWORD *) pCode) = dwSlot; + pCode += sizeof(DWORD); + *pCode++ = 0xE9; + // self-relative call, based on the start of the next instruction. + *((LONG *) pCode) = (LONG)((size_t)GetTPStubEntryPoint() - (size_t) (pCode + sizeof(LONG))); + + _ASSERTE(CVirtualThunkMgr::IsThunkByASM((PCODE)startaddr)); + + return (PCODE)startaddr; +} + + +//+---------------------------------------------------------------------------- +// +// Method: CTPMethodTable::ActivatePrecodeRemotingThunk private +// +// Synopsis: Patch the precode remoting thunk to begin interception +// +//+---------------------------------------------------------------------------- +void CTPMethodTable::ActivatePrecodeRemotingThunk() +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; + MODE_ANY; + } + CONTRACTL_END; + + // Before activation: + // 0000 C3 ret + // 0001 90 nop + + // After activation: + // 0000 85 C9 test ecx,ecx + + // 0002 74 XX je RemotingDone + // 0004 81 39 XX XX XX XX cmp dword ptr [ecx],11111111h + // 000A 74 XX je RemotingCheck + + // Switch offset and size of patch based on the jump opcode used. + BYTE* pCode = (BYTE*)PrecodeRemotingThunk; + + SIZE_T mtOffset = 0x0006; + SIZE_T size = 0x000A; + + // Patch "ret + nop" to "test ecx,ecx" + *(UINT16 *)pCode = 0xC985; + + // Replace placeholder value with the actual address of TP method table + _ASSERTE(*(PVOID*)(pCode+mtOffset) == (PVOID*)0x11111111); + *(PVOID*)(pCode+mtOffset) = GetMethodTable(); + + FlushInstructionCache(GetCurrentProcess(), pCode, size); +} + +//+---------------------------------------------------------------------------- +// +// Method: CVirtualThunkMgr::DoTraceStub public +// +// Synopsis: Traces the stub given the starting address +// +//+---------------------------------------------------------------------------- +BOOL CVirtualThunkMgr::DoTraceStub(PCODE stubStartAddress, TraceDestination *trace) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(stubStartAddress != NULL); + PRECONDITION(CheckPointer(trace)); + } + CONTRACTL_END; + + BOOL bIsStub = FALSE; + + // Find a thunk whose code address matching the starting address + LPBYTE pThunk = FindThunk((LPBYTE)stubStartAddress); + if(NULL != pThunk) + { + LPBYTE pbAddr = NULL; + LONG destAddress = 0; + if((LPBYTE)stubStartAddress == pThunk) + { + + // Extract the long which gives the self relative address + // of the destination + pbAddr = pThunk + sizeof(BYTE) + sizeof(DWORD) + sizeof(BYTE); + destAddress = *(LONG *)pbAddr; + + // Calculate the absolute address by adding the offset of the next + // instruction after the call instruction + destAddress += (LONG)(size_t)(pbAddr + sizeof(LONG)); + + } + + // We cannot tell where the stub will end up until OnCall is reached. + // So we tell the debugger to run till OnCall is reached and then + // come back and ask us again for the actual destination address of + // the call + + Stub *stub = Stub::RecoverStub((TADDR)destAddress); + + trace->InitForFramePush(stub->GetPatchAddress()); + bIsStub = TRUE; + } + + return bIsStub; +} + +//+---------------------------------------------------------------------------- +// +// Method: CVirtualThunkMgr::IsThunkByASM public +// +// Synopsis: Check assembly to see if this one of our thunks +// +//+---------------------------------------------------------------------------- +BOOL CVirtualThunkMgr::IsThunkByASM(PCODE startaddr) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(startaddr != NULL); + } + CONTRACTL_END; + + PTR_BYTE pbCode = PTR_BYTE(startaddr); + + return ((pbCode[0] == 0xB8) && + (pbCode[5] == 0xe9) && + (rel32Decode((TADDR)(pbCode + 6)) == CTPMethodTable::GetTPStubEntryPoint())); +} + +//+---------------------------------------------------------------------------- +// +// Method: CVirtualThunkMgr::GetMethodDescByASM public +// +// Synopsis: Parses MethodDesc out of assembly code +// +//+---------------------------------------------------------------------------- +MethodDesc *CVirtualThunkMgr::GetMethodDescByASM(PCODE startaddr, MethodTable *pMT) +{ + CONTRACT (MethodDesc*) + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(startaddr != NULL); + PRECONDITION(CheckPointer(pMT)); + POSTCONDITION(CheckPointer(RETVAL)); + } + CONTRACT_END; + + RETURN (pMT->GetMethodDescForSlot(*((DWORD *) (startaddr + 1)))); +} + +#endif// FEATURE_REMOTING + diff --git a/src/vm/i386/stublinkerx86.cpp b/src/vm/i386/stublinkerx86.cpp new file mode 100644 index 0000000000..0037a7d3e6 --- /dev/null +++ b/src/vm/i386/stublinkerx86.cpp @@ -0,0 +1,6806 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + + +// NOTE on Frame Size C_ASSERT usage in this file +// if the frame size changes then the stubs have to be revisited for correctness +// kindly revist the logic and then update the constants so that the C_ASSERT will again fire +// if someone changes the frame size. You are expected to keep this hard coded constant +// up to date so that changes in the frame size trigger errors at compile time if the code is not altered + +// Precompiled Header + +#include "common.h" + +#include "field.h" +#include "stublink.h" + +#include "tls.h" +#include "frames.h" +#include "excep.h" +#include "dllimport.h" +#include "log.h" +#include "security.h" +#include "comdelegate.h" +#include "array.h" +#include "jitinterface.h" +#include "codeman.h" +#ifdef FEATURE_REMOTING +#include "remoting.h" +#endif +#include "dbginterface.h" +#include "eeprofinterfaces.h" +#include "eeconfig.h" +#include "securitydeclarative.h" +#ifdef _TARGET_X86_ +#include "asmconstants.h" +#endif // _TARGET_X86_ +#include "class.h" +#include "stublink.inl" + +#ifdef FEATURE_COMINTEROP +#include "comtoclrcall.h" +#include "runtimecallablewrapper.h" +#include "comcache.h" +#include "olevariant.h" +#include "notifyexternals.h" +#endif // FEATURE_COMINTEROP + +#ifdef FEATURE_PREJIT +#include "compile.h" +#endif + +#if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO) +#include +#endif + + +#ifndef DACCESS_COMPILE + +extern "C" VOID __cdecl StubRareEnable(Thread *pThread); +#ifdef FEATURE_COMINTEROP +extern "C" HRESULT __cdecl StubRareDisableHR(Thread *pThread); +#endif // FEATURE_COMINTEROP +extern "C" VOID __cdecl StubRareDisableTHROW(Thread *pThread, Frame *pFrame); + +extern "C" VOID __cdecl ArrayOpStubNullException(void); +extern "C" VOID __cdecl ArrayOpStubRangeException(void); +extern "C" VOID __cdecl ArrayOpStubTypeMismatchException(void); + +#if defined(_TARGET_AMD64_) +#define EXCEPTION_HELPERS(base) \ + extern "C" VOID __cdecl base##_RSIRDI_ScratchArea(void); \ + extern "C" VOID __cdecl base##_ScratchArea(void); \ + extern "C" VOID __cdecl base##_RSIRDI(void); \ + extern "C" VOID __cdecl base(void) +EXCEPTION_HELPERS(ArrayOpStubNullException); +EXCEPTION_HELPERS(ArrayOpStubRangeException); +EXCEPTION_HELPERS(ArrayOpStubTypeMismatchException); +#undef EXCEPTION_HELPERS + +#if defined(_DEBUG) +extern "C" VOID __cdecl DebugCheckStubUnwindInfo(); +#endif +#endif // _TARGET_AMD64_ + +// Presumably this code knows what it is doing with TLS. If we are hiding these +// services from normal code, reveal them here. +#ifdef TlsGetValue +#undef TlsGetValue +#endif + +#ifdef FEATURE_COMINTEROP +Thread* __stdcall CreateThreadBlockReturnHr(ComMethodFrame *pFrame); +#endif + + + +#ifdef _TARGET_AMD64_ + +BOOL IsPreservedReg (X86Reg reg) +{ + UINT16 PreservedRegMask = + (1 << kRBX) + | (1 << kRBP) + | (1 << kRSI) + | (1 << kRDI) + | (1 << kR12) + | (1 << kR13) + | (1 << kR14) + | (1 << kR15); + return PreservedRegMask & (1 << reg); +} + +#endif // _TARGET_AMD64_ + +#ifdef _TARGET_AMD64_ +//----------------------------------------------------------------------- +// InstructionFormat for near Jump and short Jump +//----------------------------------------------------------------------- + +//X64EmitTailcallWithRSPAdjust +class X64NearJumpSetup : public InstructionFormat +{ + public: + X64NearJumpSetup() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32 + | InstructionFormat::k64Small | InstructionFormat::k64 + ) + { + LIMITED_METHOD_CONTRACT; + } + + virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT + switch (refsize) + { + case k8: + return 0; + + case k32: + return 0; + + case k64Small: + return 5; + + case k64: + return 10; + + default: + _ASSERTE(!"unexpected refsize"); + return 0; + + } + } + + virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer) + { + LIMITED_METHOD_CONTRACT + if (k8 == refsize) + { + // do nothing, X64NearJump will take care of this + } + else if (k32 == refsize) + { + // do nothing, X64NearJump will take care of this + } + else if (k64Small == refsize) + { + UINT64 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode); + _ASSERTE(FitsInU4(TargetAddress)); + + // mov eax, imm32 ; zero-extended + pOutBuffer[0] = 0xB8; + *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress; + } + else if (k64 == refsize) + { + // mov rax, imm64 + pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + pOutBuffer[1] = 0xB8; + *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode)); + } + else + { + _ASSERTE(!"unreached"); + } + } + + virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset) + { + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + STATIC_CONTRACT_FORBID_FAULT; + + + if (fExternal) + { + switch (refsize) + { + case InstructionFormat::k8: + // For external, we don't have enough info to predict + // the offset. + return FALSE; + + case InstructionFormat::k32: + return sizeof(PVOID) <= sizeof(UINT32); + + case InstructionFormat::k64Small: + return FitsInI4(offset); + + case InstructionFormat::k64: + // intentional fallthru + case InstructionFormat::kAllowAlways: + return TRUE; + + default: + _ASSERTE(0); + return FALSE; + } + } + else + { + switch (refsize) + { + case InstructionFormat::k8: + return FitsInI1(offset); + + case InstructionFormat::k32: + return FitsInI4(offset); + + case InstructionFormat::k64Small: + // EmitInstruction emits a non-relative jmp for + // k64Small. We don't have enough info to predict the + // target address. (Even if we did, this would only + // handle the set of unsigned offsets with bit 31 set + // and no higher bits set, too uncommon/hard to test.) + return FALSE; + + case InstructionFormat::k64: + // intentional fallthru + case InstructionFormat::kAllowAlways: + return TRUE; + default: + _ASSERTE(0); + return FALSE; + } + } + } +}; + +class X64NearJumpExecute : public InstructionFormat +{ + public: + X64NearJumpExecute() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32 + | InstructionFormat::k64Small | InstructionFormat::k64 + ) + { + LIMITED_METHOD_CONTRACT; + } + + virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT + switch (refsize) + { + case k8: + return 2; + + case k32: + return 5; + + case k64Small: + return 3; + + case k64: + return 3; + + default: + _ASSERTE(!"unexpected refsize"); + return 0; + + } + } + + virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer) + { + LIMITED_METHOD_CONTRACT + if (k8 == refsize) + { + pOutBuffer[0] = 0xeb; + *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference; + } + else if (k32 == refsize) + { + pOutBuffer[0] = 0xe9; + *((__int32*)(pOutBuffer+1)) = (__int32)fixedUpReference; + } + else if (k64Small == refsize) + { + // REX.W jmp rax + pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + pOutBuffer[1] = 0xFF; + pOutBuffer[2] = 0xE0; + } + else if (k64 == refsize) + { + // REX.W jmp rax + pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + pOutBuffer[1] = 0xFF; + pOutBuffer[2] = 0xE0; + } + else + { + _ASSERTE(!"unreached"); + } + } + + virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset) + { + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + STATIC_CONTRACT_FORBID_FAULT; + + + if (fExternal) + { + switch (refsize) + { + case InstructionFormat::k8: + // For external, we don't have enough info to predict + // the offset. + return FALSE; + + case InstructionFormat::k32: + return sizeof(PVOID) <= sizeof(UINT32); + + case InstructionFormat::k64Small: + return FitsInI4(offset); + + case InstructionFormat::k64: + // intentional fallthru + case InstructionFormat::kAllowAlways: + return TRUE; + + default: + _ASSERTE(0); + return FALSE; + } + } + else + { + switch (refsize) + { + case InstructionFormat::k8: + return FitsInI1(offset); + + case InstructionFormat::k32: + return FitsInI4(offset); + + case InstructionFormat::k64Small: + // EmitInstruction emits a non-relative jmp for + // k64Small. We don't have enough info to predict the + // target address. (Even if we did, this would only + // handle the set of unsigned offsets with bit 31 set + // and no higher bits set, too uncommon/hard to test.) + return FALSE; + + case InstructionFormat::k64: + // intentional fallthru + case InstructionFormat::kAllowAlways: + return TRUE; + default: + _ASSERTE(0); + return FALSE; + } + } + } +}; + +#endif + +//----------------------------------------------------------------------- +// InstructionFormat for near Jump and short Jump +//----------------------------------------------------------------------- +class X86NearJump : public InstructionFormat +{ + public: + X86NearJump() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32 +#ifdef _TARGET_AMD64_ + | InstructionFormat::k64Small | InstructionFormat::k64 +#endif // _TARGET_AMD64_ + ) + { + LIMITED_METHOD_CONTRACT; + } + + virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT + switch (refsize) + { + case k8: + return 2; + + case k32: + return 5; +#ifdef _TARGET_AMD64_ + case k64Small: + return 5 + 2; + + case k64: + return 12; +#endif // _TARGET_AMD64_ + default: + _ASSERTE(!"unexpected refsize"); + return 0; + + } + } + + virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer) + { + LIMITED_METHOD_CONTRACT + if (k8 == refsize) + { + pOutBuffer[0] = 0xeb; + *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference; + } + else if (k32 == refsize) + { + pOutBuffer[0] = 0xe9; + *((__int32*)(pOutBuffer+1)) = (__int32)fixedUpReference; + } +#ifdef _TARGET_AMD64_ + else if (k64Small == refsize) + { + UINT64 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode); + _ASSERTE(FitsInU4(TargetAddress)); + + // mov eax, imm32 ; zero-extended + pOutBuffer[0] = 0xB8; + *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress; + + // jmp rax + pOutBuffer[5] = 0xFF; + pOutBuffer[6] = 0xE0; + } + else if (k64 == refsize) + { + // mov rax, imm64 + pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + pOutBuffer[1] = 0xB8; + *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode)); + + // jmp rax + pOutBuffer[10] = 0xFF; + pOutBuffer[11] = 0xE0; + } +#endif // _TARGET_AMD64_ + else + { + _ASSERTE(!"unreached"); + } + } + + virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset) + { + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + STATIC_CONTRACT_FORBID_FAULT; + + + if (fExternal) + { + switch (refsize) + { + case InstructionFormat::k8: + // For external, we don't have enough info to predict + // the offset. + return FALSE; + + case InstructionFormat::k32: + return sizeof(PVOID) <= sizeof(UINT32); + +#ifdef _TARGET_AMD64_ + case InstructionFormat::k64Small: + return FitsInI4(offset); + + case InstructionFormat::k64: + // intentional fallthru +#endif + case InstructionFormat::kAllowAlways: + return TRUE; + + default: + _ASSERTE(0); + return FALSE; + } + } + else + { + switch (refsize) + { + case InstructionFormat::k8: + return FitsInI1(offset); + + case InstructionFormat::k32: +#ifdef _TARGET_AMD64_ + return FitsInI4(offset); +#else + return TRUE; +#endif + +#ifdef _TARGET_AMD64_ + case InstructionFormat::k64Small: + // EmitInstruction emits a non-relative jmp for + // k64Small. We don't have enough info to predict the + // target address. (Even if we did, this would only + // handle the set of unsigned offsets with bit 31 set + // and no higher bits set, too uncommon/hard to test.) + return FALSE; + + case InstructionFormat::k64: + // intentional fallthru +#endif + case InstructionFormat::kAllowAlways: + return TRUE; + default: + _ASSERTE(0); + return FALSE; + } + } + } +}; + + +//----------------------------------------------------------------------- +// InstructionFormat for conditional jump. Set the variationCode +// to members of X86CondCode. +//----------------------------------------------------------------------- +class X86CondJump : public InstructionFormat +{ + public: + X86CondJump(UINT allowedSizes) : InstructionFormat(allowedSizes) + { + LIMITED_METHOD_CONTRACT; + } + + virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT + return (refsize == k8 ? 2 : 6); + } + + virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer) + { + LIMITED_METHOD_CONTRACT + if (refsize == k8) + { + pOutBuffer[0] = static_cast(0x70 | variationCode); + *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference; + } + else + { + pOutBuffer[0] = 0x0f; + pOutBuffer[1] = static_cast(0x80 | variationCode); + *((__int32*)(pOutBuffer+2)) = (__int32)fixedUpReference; + } + } +}; + + +//----------------------------------------------------------------------- +// InstructionFormat for near call. +//----------------------------------------------------------------------- +class X86Call : public InstructionFormat +{ + public: + X86Call () + : InstructionFormat( InstructionFormat::k32 +#ifdef _TARGET_AMD64_ + | InstructionFormat::k64Small | InstructionFormat::k64 +#endif // _TARGET_AMD64_ + ) + { + LIMITED_METHOD_CONTRACT; + } + + virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT; + + switch (refsize) + { + case k32: + return 5; + +#ifdef _TARGET_AMD64_ + case k64Small: + return 5 + 2; + + case k64: + return 10 + 2; +#endif // _TARGET_AMD64_ + + default: + _ASSERTE(!"unexpected refsize"); + return 0; + } + } + + virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer) + { + LIMITED_METHOD_CONTRACT + + switch (refsize) + { + case k32: + pOutBuffer[0] = 0xE8; + *((__int32*)(1+pOutBuffer)) = (__int32)fixedUpReference; + break; + +#ifdef _TARGET_AMD64_ + case k64Small: + UINT64 TargetAddress; + + TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode); + _ASSERTE(FitsInU4(TargetAddress)); + + // mov eax, ; zero-extends + pOutBuffer[0] = 0xB8; + *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress; + + // call rax + pOutBuffer[5] = 0xff; + pOutBuffer[6] = 0xd0; + break; + + case k64: + // mov rax, + pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + pOutBuffer[1] = 0xB8; + *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode)); + + // call rax + pOutBuffer[10] = 0xff; + pOutBuffer[11] = 0xd0; + break; +#endif // _TARGET_AMD64_ + + default: + _ASSERTE(!"unreached"); + break; + } + } + +// For x86, the default CanReach implementation will suffice. It only needs +// to handle k32. +#ifdef _TARGET_AMD64_ + virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset) + { + if (fExternal) + { + switch (refsize) + { + case InstructionFormat::k32: + // For external, we don't have enough info to predict + // the offset. + return FALSE; + + case InstructionFormat::k64Small: + return FitsInI4(offset); + + case InstructionFormat::k64: + // intentional fallthru + case InstructionFormat::kAllowAlways: + return TRUE; + + default: + _ASSERTE(0); + return FALSE; + } + } + else + { + switch (refsize) + { + case InstructionFormat::k32: + return FitsInI4(offset); + + case InstructionFormat::k64Small: + // EmitInstruction emits a non-relative jmp for + // k64Small. We don't have enough info to predict the + // target address. (Even if we did, this would only + // handle the set of unsigned offsets with bit 31 set + // and no higher bits set, too uncommon/hard to test.) + return FALSE; + + case InstructionFormat::k64: + // intentional fallthru + case InstructionFormat::kAllowAlways: + return TRUE; + default: + _ASSERTE(0); + return FALSE; + } + } + } +#endif // _TARGET_AMD64_ +}; + + +//----------------------------------------------------------------------- +// InstructionFormat for push imm32. +//----------------------------------------------------------------------- +class X86PushImm32 : public InstructionFormat +{ + public: + X86PushImm32(UINT allowedSizes) : InstructionFormat(allowedSizes) + { + LIMITED_METHOD_CONTRACT; + } + + virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT; + + return 5; + } + + virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer) + { + LIMITED_METHOD_CONTRACT; + + pOutBuffer[0] = 0x68; + // only support absolute pushimm32 of the label address. The fixedUpReference is + // the offset to the label from the current point, so add to get address + *((__int32*)(1+pOutBuffer)) = (__int32)(fixedUpReference); + } +}; + +#if defined(_TARGET_AMD64_) +//----------------------------------------------------------------------- +// InstructionFormat for lea reg, [RIP relative]. +//----------------------------------------------------------------------- +class X64LeaRIP : public InstructionFormat +{ + public: + X64LeaRIP() : InstructionFormat(InstructionFormat::k64Small) + { + LIMITED_METHOD_CONTRACT; + } + + virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT; + + return 7; + } + + virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset) + { + if (fExternal) + { + switch (refsize) + { + case InstructionFormat::k64Small: + // For external, we don't have enough info to predict + // the offset. + return FALSE; + + case InstructionFormat::k64: + // intentional fallthru + case InstructionFormat::kAllowAlways: + return TRUE; + + default: + _ASSERTE(0); + return FALSE; + } + } + else + { + switch (refsize) + { + case InstructionFormat::k64Small: + return FitsInI4(offset); + + case InstructionFormat::k64: + // intentional fallthru + case InstructionFormat::kAllowAlways: + return TRUE; + + default: + _ASSERTE(0); + return FALSE; + } + } + } + + virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer) + { + LIMITED_METHOD_CONTRACT; + + X86Reg reg = (X86Reg)variationCode; + BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + + if (reg >= kR8) + { + rex |= REX_MODRM_REG_EXT; + reg = X86RegFromAMD64Reg(reg); + } + + pOutBuffer[0] = rex; + pOutBuffer[1] = 0x8D; + pOutBuffer[2] = 0x05 | (reg << 3); + // only support absolute pushimm32 of the label address. The fixedUpReference is + // the offset to the label from the current point, so add to get address + *((__int32*)(3+pOutBuffer)) = (__int32)(fixedUpReference); + } +}; + +#endif // _TARGET_AMD64_ + +#if defined(_TARGET_AMD64_) +static BYTE gX64NearJumpSetup[sizeof(X64NearJumpSetup)]; +static BYTE gX64NearJumpExecute[sizeof(X64NearJumpExecute)]; +static BYTE gX64LeaRIP[sizeof(X64LeaRIP)]; +#endif + +static BYTE gX86NearJump[sizeof(X86NearJump)]; +static BYTE gX86CondJump[sizeof(X86CondJump)]; +static BYTE gX86Call[sizeof(X86Call)]; +static BYTE gX86PushImm32[sizeof(X86PushImm32)]; + +/* static */ void StubLinkerCPU::Init() +{ + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + INJECT_FAULT(COMPlusThrowOM();); + } + CONTRACTL_END; + new (gX86NearJump) X86NearJump(); + new (gX86CondJump) X86CondJump( InstructionFormat::k8|InstructionFormat::k32); + new (gX86Call) X86Call(); + new (gX86PushImm32) X86PushImm32(InstructionFormat::k32); + +#if defined(_TARGET_AMD64_) + new (gX64NearJumpSetup) X64NearJumpSetup(); + new (gX64NearJumpExecute) X64NearJumpExecute(); + new (gX64LeaRIP) X64LeaRIP(); +#endif +} + +//--------------------------------------------------------------- +// Emits: +// mov destReg, srcReg +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitMovRegReg(X86Reg destReg, X86Reg srcReg) +{ + STANDARD_VM_CONTRACT; + +#ifdef _TARGET_AMD64_ + BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + + if (destReg >= kR8) + { + rex |= REX_MODRM_RM_EXT; + destReg = X86RegFromAMD64Reg(destReg); + } + if (srcReg >= kR8) + { + rex |= REX_MODRM_REG_EXT; + srcReg = X86RegFromAMD64Reg(srcReg); + } + Emit8(rex); +#endif + + Emit8(0x89); + Emit8(static_cast(0xC0 | (srcReg << 3) | destReg)); +} + +//--------------------------------------------------------------- + +VOID StubLinkerCPU::X86EmitMovSPReg(X86Reg srcReg) +{ + STANDARD_VM_CONTRACT; + const X86Reg kESP = (X86Reg)4; + X86EmitMovRegReg(kESP, srcReg); +} + +VOID StubLinkerCPU::X86EmitMovRegSP(X86Reg destReg) +{ + STANDARD_VM_CONTRACT; + const X86Reg kESP = (X86Reg)4; + X86EmitMovRegReg(destReg, kESP); +} + + +//--------------------------------------------------------------- +// Emits: +// PUSH +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitPushReg(X86Reg reg) +{ + STANDARD_VM_CONTRACT; + +#ifdef STUBLINKER_GENERATES_UNWIND_INFO + X86Reg origReg = reg; +#endif + +#ifdef _TARGET_AMD64_ + if (reg >= kR8) + { + Emit8(REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT); + reg = X86RegFromAMD64Reg(reg); + } +#endif + Emit8(static_cast(0x50 + reg)); + +#ifdef STUBLINKER_GENERATES_UNWIND_INFO + if (IsPreservedReg(origReg)) + { + UnwindPushedReg(origReg); + } + else +#endif + { + Push(sizeof(void*)); + } +} + + +//--------------------------------------------------------------- +// Emits: +// POP +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitPopReg(X86Reg reg) +{ + STANDARD_VM_CONTRACT; + +#ifdef _TARGET_AMD64_ + if (reg >= kR8) + { + Emit8(REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT); + reg = X86RegFromAMD64Reg(reg); + } +#endif // _TARGET_AMD64_ + + Emit8(static_cast(0x58 + reg)); + Pop(sizeof(void*)); +} + +//--------------------------------------------------------------- +// Emits: +// PUSH +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitPushImm32(UINT32 value) +{ + STANDARD_VM_CONTRACT; + + Emit8(0x68); + Emit32(value); + Push(sizeof(void*)); +} + + +//--------------------------------------------------------------- +// Emits: +// PUSH +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitPushImm32(CodeLabel &target) +{ + STANDARD_VM_CONTRACT; + + EmitLabelRef(&target, reinterpret_cast(gX86PushImm32), 0); +} + + +//--------------------------------------------------------------- +// Emits: +// PUSH +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitPushImm8(BYTE value) +{ + STANDARD_VM_CONTRACT; + + Emit8(0x6a); + Emit8(value); + Push(sizeof(void*)); +} + + +//--------------------------------------------------------------- +// Emits: +// PUSH +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitPushImmPtr(LPVOID value WIN64_ARG(X86Reg tmpReg /*=kR10*/)) +{ + STANDARD_VM_CONTRACT; + +#ifdef _TARGET_AMD64_ + X86EmitRegLoad(tmpReg, (UINT_PTR) value); + X86EmitPushReg(tmpReg); +#else + X86EmitPushImm32((UINT_PTR) value); +#endif +} + +//--------------------------------------------------------------- +// Emits: +// XOR , +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitZeroOutReg(X86Reg reg) +{ + STANDARD_VM_CONTRACT; + +#ifdef _TARGET_AMD64_ + // 32-bit results are zero-extended, so we only need the REX byte if + // it's an extended register. + if (reg >= kR8) + { + Emit8(REX_PREFIX_BASE | REX_MODRM_REG_EXT | REX_MODRM_RM_EXT); + reg = X86RegFromAMD64Reg(reg); + } +#endif + Emit8(0x33); + Emit8(static_cast(0xc0 | (reg << 3) | reg)); +} + +//--------------------------------------------------------------- +// Emits: +// jmp [reg] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitJumpReg(X86Reg reg) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + } + CONTRACTL_END; + + Emit8(0xff); + Emit8(static_cast(0xe0) | static_cast(reg)); +} + +//--------------------------------------------------------------- +// Emits: +// CMP ,imm32 +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitCmpRegImm32(X86Reg reg, INT32 imm32) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + PRECONDITION((int) reg < NumX86Regs); + } + CONTRACTL_END; + +#ifdef _TARGET_AMD64_ + BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + + if (reg >= kR8) + { + rex |= REX_OPCODE_REG_EXT; + reg = X86RegFromAMD64Reg(reg); + } + Emit8(rex); +#endif + + if (FitsInI1(imm32)) { + Emit8(0x83); + Emit8(static_cast(0xF8 | reg)); + Emit8((INT8)imm32); + } else { + Emit8(0x81); + Emit8(static_cast(0xF8 | reg)); + Emit32(imm32); + } +} + +#ifdef _TARGET_AMD64_ +//--------------------------------------------------------------- +// Emits: +// CMP [reg+offs], imm32 +// CMP [reg], imm32 +//--------------------------------------------------------------- +VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32) +{ + STANDARD_VM_CONTRACT; + + BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + + if (reg >= kR8) + { + rex |= REX_OPCODE_REG_EXT; + reg = X86RegFromAMD64Reg(reg); + } + Emit8(rex); + + X64EmitCmp32RegIndexImm32(reg, offs, imm32); +} + +VOID StubLinkerCPU:: X64EmitCmp32RegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32) +#else // _TARGET_AMD64_ +VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32) +#endif // _TARGET_AMD64_ +{ + CONTRACTL + { + STANDARD_VM_CHECK; + PRECONDITION((int) reg < NumX86Regs); + } + CONTRACTL_END; + + // + // The binary representation of "cmp [mem], imm32" is : + // 1000-00sw mod11-1r/m + // + + unsigned wBit = (FitsInI1(imm32) ? 0 : 1); + Emit8(static_cast(0x80 | wBit)); + + unsigned modBits; + if (offs == 0) + modBits = 0; + else if (FitsInI1(offs)) + modBits = 1; + else + modBits = 2; + + Emit8(static_cast((modBits << 6) | 0x38 | reg)); + + if (offs) + { + if (FitsInI1(offs)) + Emit8((INT8)offs); + else + Emit32(offs); + } + + if (FitsInI1(imm32)) + Emit8((INT8)imm32); + else + Emit32(imm32); +} + +//--------------------------------------------------------------- +// Emits: +#if defined(_TARGET_AMD64_) +// mov rax, +// add rsp, imm32 +// jmp rax +#else +// add rsp, imm32 +// jmp +#endif +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitTailcallWithESPAdjust(CodeLabel *pTarget, INT32 imm32) +{ + STANDARD_VM_CONTRACT; + +#if defined(_TARGET_AMD64_) + EmitLabelRef(pTarget, reinterpret_cast(gX64NearJumpSetup), 0); + X86EmitAddEsp(imm32); + EmitLabelRef(pTarget, reinterpret_cast(gX64NearJumpExecute), 0); +#else + X86EmitAddEsp(imm32); + X86EmitNearJump(pTarget); +#endif +} + +//--------------------------------------------------------------- +// Emits: +#if defined(_TARGET_AMD64_) +// mov rax, +// pop reg +// jmp rax +#else +// pop reg +// jmp +#endif +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitTailcallWithSinglePop(CodeLabel *pTarget, X86Reg reg) +{ + STANDARD_VM_CONTRACT; + +#if defined(_TARGET_AMD64_) + EmitLabelRef(pTarget, reinterpret_cast(gX64NearJumpSetup), 0); + X86EmitPopReg(reg); + EmitLabelRef(pTarget, reinterpret_cast(gX64NearJumpExecute), 0); +#else + X86EmitPopReg(reg); + X86EmitNearJump(pTarget); +#endif +} + +//--------------------------------------------------------------- +// Emits: +// JMP or +// JMP (gX86NearJump), 0); +} + + +//--------------------------------------------------------------- +// Emits: +// Jcc or +// Jcc +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitCondJump(CodeLabel *target, X86CondCode::cc condcode) +{ + STANDARD_VM_CONTRACT; + EmitLabelRef(target, reinterpret_cast(gX86CondJump), condcode); +} + + +//--------------------------------------------------------------- +// Emits: +// call +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitCall(CodeLabel *target, int iArgBytes) +{ + STANDARD_VM_CONTRACT; + + EmitLabelRef(target, reinterpret_cast(gX86Call), 0); + + INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that + // we know that this is a call that can directly call + // managed code +#ifndef _TARGET_AMD64_ + Pop(iArgBytes); +#endif // !_TARGET_AMD64_ +} + + +//--------------------------------------------------------------- +// Emits: +// ret n +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitReturn(WORD wArgBytes) +{ + CONTRACTL + { + STANDARD_VM_CHECK; +#ifdef _TARGET_AMD64_ + PRECONDITION(wArgBytes == 0); +#endif + + } + CONTRACTL_END; + + if (wArgBytes == 0) + Emit8(0xc3); + else + { + Emit8(0xc2); + Emit16(wArgBytes); + } + + Pop(wArgBytes); +} + +#ifdef _TARGET_AMD64_ +//--------------------------------------------------------------- +// Emits: +// JMP or +// JMP (gX64LeaRIP), reg); +} +#endif // _TARGET_AMD64_ + + + +VOID StubLinkerCPU::X86EmitPushRegs(unsigned regSet) +{ + STANDARD_VM_CONTRACT; + + for (X86Reg r = kEAX; r <= NumX86Regs; r = (X86Reg)(r+1)) + if (regSet & (1U<= kEAX; r = (X86Reg)(r-1)) + if (regSet & (1U<, [ + ] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitIndexRegLoad(X86Reg dstreg, + X86Reg srcreg, + __int32 ofs) +{ + STANDARD_VM_CONTRACT; + X86EmitOffsetModRM(0x8b, dstreg, srcreg, ofs); +} + + +//--------------------------------------------------------------- +// Emits: +// mov [ + ], +// +// Note: If you intend to use this to perform 64bit moves to a RSP +// based offset, then this method may not work. Consider +// using X86EmitIndexRegStoreRSP. +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitIndexRegStore(X86Reg dstreg, + __int32 ofs, + X86Reg srcreg) +{ + STANDARD_VM_CONTRACT; + + if (dstreg != kESP_Unsafe) + X86EmitOffsetModRM(0x89, srcreg, dstreg, ofs); + else + X86EmitOp(0x89, srcreg, (X86Reg)kESP_Unsafe, ofs); +} + +#if defined(_TARGET_AMD64_) +//--------------------------------------------------------------- +// Emits: +// mov [RSP + ], +// +// It marks the instruction has 64bit so that the processor +// performs a 8byte data move to a RSP based stack location. +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitIndexRegStoreRSP(__int32 ofs, + X86Reg srcreg) +{ + STANDARD_VM_CONTRACT; + + X86EmitOp(0x89, srcreg, (X86Reg)kESP_Unsafe, ofs, (X86Reg)0, 0, k64BitOp); +} + +//--------------------------------------------------------------- +// Emits: +// mov [R12 + ], +// +// It marks the instruction has 64bit so that the processor +// performs a 8byte data move to a R12 based stack location. +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitIndexRegStoreR12(__int32 ofs, + X86Reg srcreg) +{ + STANDARD_VM_CONTRACT; + + X86EmitOp(0x89, srcreg, (X86Reg)kR12, ofs, (X86Reg)0, 0, k64BitOp); +} +#endif // defined(_TARGET_AMD64_) + +//--------------------------------------------------------------- +// Emits: +// push dword ptr [ + ] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitIndexPush(X86Reg srcreg, __int32 ofs) +{ + STANDARD_VM_CONTRACT; + + if(srcreg != kESP_Unsafe) + X86EmitOffsetModRM(0xff, (X86Reg)0x6, srcreg, ofs); + else + X86EmitOp(0xff,(X86Reg)0x6, srcreg, ofs); + + Push(sizeof(void*)); +} + +//--------------------------------------------------------------- +// Emits: +// push dword ptr [ + * + ] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitBaseIndexPush( + X86Reg baseReg, + X86Reg indexReg, + __int32 scale, + __int32 ofs) +{ + STANDARD_VM_CONTRACT; + + X86EmitOffsetModRmSIB(0xff, (X86Reg)0x6, baseReg, indexReg, scale, ofs); + Push(sizeof(void*)); +} + +//--------------------------------------------------------------- +// Emits: +// push dword ptr [ESP + ] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitSPIndexPush(__int32 ofs) +{ + STANDARD_VM_CONTRACT; + + __int8 ofs8 = (__int8) ofs; + if (ofs == (__int32) ofs8) + { + // The offset can be expressed in a byte (can use the byte + // form of the push esp instruction) + + BYTE code[] = {0xff, 0x74, 0x24, ofs8}; + EmitBytes(code, sizeof(code)); + } + else + { + // The offset requires 4 bytes (need to use the long form + // of the push esp instruction) + + BYTE code[] = {0xff, 0xb4, 0x24, 0x0, 0x0, 0x0, 0x0}; + *(__int32 *)(&code[3]) = ofs; + EmitBytes(code, sizeof(code)); + } + + Push(sizeof(void*)); +} + + +//--------------------------------------------------------------- +// Emits: +// pop dword ptr [ + ] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitIndexPop(X86Reg srcreg, __int32 ofs) +{ + STANDARD_VM_CONTRACT; + + if(srcreg != kESP_Unsafe) + X86EmitOffsetModRM(0x8f, (X86Reg)0x0, srcreg, ofs); + else + X86EmitOp(0x8f,(X86Reg)0x0, srcreg, ofs); + + Pop(sizeof(void*)); +} + +//--------------------------------------------------------------- +// Emits: +// lea , [ + +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitIndexLea(X86Reg dstreg, X86Reg srcreg, __int32 ofs) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + PRECONDITION((int) dstreg < NumX86Regs); + PRECONDITION((int) srcreg < NumX86Regs); + } + CONTRACTL_END; + + X86EmitOffsetModRM(0x8d, dstreg, srcreg, ofs); +} + +#if defined(_TARGET_AMD64_) +VOID StubLinkerCPU::X86EmitIndexLeaRSP(X86Reg dstreg, X86Reg srcreg, __int32 ofs) +{ + STANDARD_VM_CONTRACT; + + X86EmitOp(0x8d, dstreg, (X86Reg)kESP_Unsafe, ofs, (X86Reg)0, 0, k64BitOp); +} +#endif // defined(_TARGET_AMD64_) + +//--------------------------------------------------------------- +// Emits: +// sub esp, IMM +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitSubEsp(INT32 imm32) +{ + STANDARD_VM_CONTRACT; + + if (imm32 < 0x1000-100) + { + // As long as the esp size is less than 1 page plus a small + // safety fudge factor, we can just bump esp. + X86EmitSubEspWorker(imm32); + } + else + { + // Otherwise, must touch at least one byte for each page. + while (imm32 >= 0x1000) + { + + X86EmitSubEspWorker(0x1000-4); + X86EmitPushReg(kEAX); + + imm32 -= 0x1000; + } + if (imm32 < 500) + { + X86EmitSubEspWorker(imm32); + } + else + { + // If the remainder is large, touch the last byte - again, + // as a fudge factor. + X86EmitSubEspWorker(imm32-4); + X86EmitPushReg(kEAX); + } + } +} + + +//--------------------------------------------------------------- +// Emits: +// sub esp, IMM +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitSubEspWorker(INT32 imm32) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + + // On Win32, stacks must be faulted in one page at a time. + PRECONDITION(imm32 < 0x1000); + } + CONTRACTL_END; + + if (!imm32) + { + // nop + } + else + { + X86_64BitOperands(); + + if (FitsInI1(imm32)) + { + Emit16(0xec83); + Emit8((INT8)imm32); + } + else + { + Emit16(0xec81); + Emit32(imm32); + } + + Push(imm32); + } +} + + +//--------------------------------------------------------------- +// Emits: +// add esp, IMM +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitAddEsp(INT32 imm32) +{ + STANDARD_VM_CONTRACT; + + if (!imm32) + { + // nop + } + else + { + X86_64BitOperands(); + + if (FitsInI1(imm32)) + { + Emit16(0xc483); + Emit8((INT8)imm32); + } + else + { + Emit16(0xc481); + Emit32(imm32); + } + } + Pop(imm32); +} + +VOID StubLinkerCPU::X86EmitAddReg(X86Reg reg, INT32 imm32) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + PRECONDITION((int) reg < NumX86Regs); + } + CONTRACTL_END; + + if (imm32 == 0) + return; + +#ifdef _TARGET_AMD64_ + BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + + if (reg >= kR8) + { + rex |= REX_OPCODE_REG_EXT; + reg = X86RegFromAMD64Reg(reg); + } + Emit8(rex); +#endif + + if (FitsInI1(imm32)) { + Emit8(0x83); + Emit8(static_cast(0xC0 | reg)); + Emit8(static_cast(imm32)); + } else { + Emit8(0x81); + Emit8(static_cast(0xC0 | reg)); + Emit32(imm32); + } +} + +//--------------------------------------------------------------- +// Emits: add destReg, srcReg +//--------------------------------------------------------------- + +VOID StubLinkerCPU::X86EmitAddRegReg(X86Reg destReg, X86Reg srcReg) +{ + STANDARD_VM_CONTRACT; + + X86EmitR2ROp(0x01, srcReg, destReg); +} + + + + +VOID StubLinkerCPU::X86EmitSubReg(X86Reg reg, INT32 imm32) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + PRECONDITION((int) reg < NumX86Regs); + } + CONTRACTL_END; + +#ifdef _TARGET_AMD64_ + BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + + if (reg >= kR8) + { + rex |= REX_OPCODE_REG_EXT; + reg = X86RegFromAMD64Reg(reg); + } + Emit8(rex); +#endif + + if (FitsInI1(imm32)) { + Emit8(0x83); + Emit8(static_cast(0xE8 | reg)); + Emit8(static_cast(imm32)); + } else { + Emit8(0x81); + Emit8(static_cast(0xE8 | reg)); + Emit32(imm32); + } +} + +//--------------------------------------------------------------- +// Emits: sub destReg, srcReg +//--------------------------------------------------------------- + +VOID StubLinkerCPU::X86EmitSubRegReg(X86Reg destReg, X86Reg srcReg) +{ + STANDARD_VM_CONTRACT; + + X86EmitR2ROp(0x29, srcReg, destReg); +} + +#if defined(_TARGET_AMD64_) + +//--------------------------------------------------------------- +// movdqa destXmmreg, srcXmmReg +//--------------------------------------------------------------- +VOID StubLinkerCPU::X64EmitMovXmmXmm(X86Reg destXmmreg, X86Reg srcXmmReg) +{ + STANDARD_VM_CONTRACT; + // There are several that could be used to mov xmm registers. MovAps is + // what C++ compiler uses so let's use it here too. + X86EmitR2ROp(X86_INSTR_MOVAPS_R_RM, destXmmreg, srcXmmReg, k32BitOp); +} + +//--------------------------------------------------------------- +// movdqa XmmN, [baseReg + offset] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X64EmitMovdqaFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs) +{ + STANDARD_VM_CONTRACT; + X64EmitMovXmmWorker(0x66, 0x6F, Xmmreg, baseReg, ofs); +} + +//--------------------------------------------------------------- +// movdqa [baseReg + offset], XmmN +//--------------------------------------------------------------- +VOID StubLinkerCPU::X64EmitMovdqaToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs) +{ + STANDARD_VM_CONTRACT; + X64EmitMovXmmWorker(0x66, 0x7F, Xmmreg, baseReg, ofs); +} + +//--------------------------------------------------------------- +// movsd XmmN, [baseReg + offset] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X64EmitMovSDFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs) +{ + STANDARD_VM_CONTRACT; + X64EmitMovXmmWorker(0xF2, 0x10, Xmmreg, baseReg, ofs); +} + +//--------------------------------------------------------------- +// movsd [baseReg + offset], XmmN +//--------------------------------------------------------------- +VOID StubLinkerCPU::X64EmitMovSDToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs) +{ + STANDARD_VM_CONTRACT; + X64EmitMovXmmWorker(0xF2, 0x11, Xmmreg, baseReg, ofs); +} + +//--------------------------------------------------------------- +// movss XmmN, [baseReg + offset] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X64EmitMovSSFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs) +{ + STANDARD_VM_CONTRACT; + X64EmitMovXmmWorker(0xF3, 0x10, Xmmreg, baseReg, ofs); +} + +//--------------------------------------------------------------- +// movss [baseReg + offset], XmmN +//--------------------------------------------------------------- +VOID StubLinkerCPU::X64EmitMovSSToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs) +{ + STANDARD_VM_CONTRACT; + X64EmitMovXmmWorker(0xF3, 0x11, Xmmreg, baseReg, ofs); +} + +//--------------------------------------------------------------- +// Helper method for emitting of XMM from/to memory moves +//--------------------------------------------------------------- +VOID StubLinkerCPU::X64EmitMovXmmWorker(BYTE prefix, BYTE opcode, X86Reg Xmmreg, X86Reg baseReg, __int32 ofs) +{ + STANDARD_VM_CONTRACT; + + BYTE codeBuffer[10]; + unsigned int nBytes = 0; + + // Setup the legacyPrefix for movsd + codeBuffer[nBytes++] = prefix; + + // By default, assume we dont have to emit the REX byte. + bool fEmitRex = false; + + BYTE rex = REX_PREFIX_BASE; + + if (baseReg >= kR8) + { + rex |= REX_MODRM_RM_EXT; + baseReg = X86RegFromAMD64Reg(baseReg); + fEmitRex = true; + } + if (Xmmreg >= kXMM8) + { + rex |= REX_MODRM_REG_EXT; + Xmmreg = X86RegFromAMD64Reg(Xmmreg); + fEmitRex = true; + } + + if (fEmitRex == true) + { + codeBuffer[nBytes++] = rex; + } + + // Next, specify the two byte opcode - first byte is always 0x0F. + codeBuffer[nBytes++] = 0x0F; + codeBuffer[nBytes++] = opcode; + + BYTE modrm = static_cast((Xmmreg << 3) | baseReg); + bool fOffsetFitsInSignedByte = FitsInI1(ofs)?true:false; + + if (fOffsetFitsInSignedByte) + codeBuffer[nBytes++] = 0x40|modrm; + else + codeBuffer[nBytes++] = 0x80|modrm; + + // If we are dealing with RSP or R12 as the baseReg, we need to emit the SIB byte. + if ((baseReg == (X86Reg)4 /*kRSP*/) || (baseReg == kR12)) + { + codeBuffer[nBytes++] = 0x24; + } + + // Finally, specify the offset + if (fOffsetFitsInSignedByte) + { + codeBuffer[nBytes++] = (BYTE)ofs; + } + else + { + *((__int32*)(codeBuffer+nBytes)) = ofs; + nBytes += 4; + } + + _ASSERTE(nBytes <= _countof(codeBuffer)); + + // Lastly, emit the encoded bytes + EmitBytes(codeBuffer, nBytes); +} + +#endif // defined(_TARGET_AMD64_) + +//--------------------------------------------------------------- +// Emits a MOD/RM for accessing a dword at [ + ofs32] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitOffsetModRM(BYTE opcode, X86Reg opcodereg, X86Reg indexreg, __int32 ofs) +{ + STANDARD_VM_CONTRACT; + + BYTE codeBuffer[7]; + BYTE* code = codeBuffer; + int nBytes = 0; +#ifdef _TARGET_AMD64_ + code++; + // + // code points to base X86 instruction, + // codeBuffer points to full AMD64 instruction + // + BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + + if (indexreg >= kR8) + { + rex |= REX_MODRM_RM_EXT; + indexreg = X86RegFromAMD64Reg(indexreg); + } + if (opcodereg >= kR8) + { + rex |= REX_MODRM_REG_EXT; + opcodereg = X86RegFromAMD64Reg(opcodereg); + } + + nBytes++; + code[-1] = rex; +#endif + code[0] = opcode; + nBytes++; + BYTE modrm = static_cast((opcodereg << 3) | indexreg); + if (ofs == 0 && indexreg != kEBP) + { + code[1] = modrm; + nBytes++; + EmitBytes(codeBuffer, nBytes); + } + else if (FitsInI1(ofs)) + { + code[1] = 0x40|modrm; + code[2] = (BYTE)ofs; + nBytes += 2; + EmitBytes(codeBuffer, nBytes); + } + else + { + code[1] = 0x80|modrm; + *((__int32*)(2+code)) = ofs; + nBytes += 5; + EmitBytes(codeBuffer, nBytes); + } +} + +//--------------------------------------------------------------- +// Emits a MOD/RM for accessing a dword at [ + * + ofs32] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitOffsetModRmSIB(BYTE opcode, X86Reg opcodeOrReg, X86Reg baseReg, X86Reg indexReg, __int32 scale, __int32 ofs) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + PRECONDITION(scale == 1 || scale == 2 || scale == 4 || scale == 8); + PRECONDITION(indexReg != kESP_Unsafe); + } + CONTRACTL_END; + + BYTE codeBuffer[8]; + BYTE* code = codeBuffer; + int nBytes = 0; + +#ifdef _TARGET_AMD64_ + _ASSERTE(!"NYI"); +#endif + code[0] = opcode; + nBytes++; + + BYTE scaleEnc = 0; + switch(scale) + { + case 1: scaleEnc = 0; break; + case 2: scaleEnc = 1; break; + case 4: scaleEnc = 2; break; + case 8: scaleEnc = 3; break; + default: _ASSERTE(!"Unexpected"); + } + + BYTE sib = static_cast((scaleEnc << 6) | (indexReg << 3) | baseReg); + + if (FitsInI1(ofs)) + { + code[1] = static_cast(0x44 | (opcodeOrReg << 3)); + code[2] = sib; + code[3] = (BYTE)ofs; + nBytes += 3; + EmitBytes(codeBuffer, nBytes); + } + else + { + code[1] = static_cast(0x84 | (opcodeOrReg << 3)); + code[2] = sib; + *(__int32*)(&code[3]) = ofs; + nBytes += 6; + EmitBytes(codeBuffer, nBytes); + } +} + + + +VOID StubLinkerCPU::X86EmitRegLoad(X86Reg reg, UINT_PTR imm) +{ + STANDARD_VM_CONTRACT; + + if (!imm) + { + X86EmitZeroOutReg(reg); + return; + } + + UINT cbimm = sizeof(void*); + +#ifdef _TARGET_AMD64_ + // amd64 zero-extends all 32-bit operations. If the immediate will fit in + // 32 bits, use the smaller encoding. + + if (reg >= kR8 || !FitsInU4(imm)) + { + BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + if (reg >= kR8) + { + rex |= REX_MODRM_RM_EXT; + reg = X86RegFromAMD64Reg(reg); + } + Emit8(rex); + } + else + { + // amd64 is little endian, so the &imm below will correctly read off + // the low 4 bytes. + cbimm = sizeof(UINT32); + } +#endif // _TARGET_AMD64_ + Emit8(0xB8 | (BYTE)reg); + EmitBytes((BYTE*)&imm, cbimm); +} + + +//--------------------------------------------------------------- +// Emits the most efficient form of the operation: +// +// opcode altreg, [basereg + scaledreg*scale + ofs] +// +// or +// +// opcode [basereg + scaledreg*scale + ofs], altreg +// +// (the opcode determines which comes first.) +// +// +// Limitations: +// +// scale must be 0,1,2,4 or 8. +// if scale == 0, scaledreg is ignored. +// basereg and altreg may be equal to 4 (ESP) but scaledreg cannot +// for some opcodes, "altreg" may actually select an operation +// rather than a second register argument. +// if basereg is EBP, scale must be 0. +// +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitOp(WORD opcode, + X86Reg altreg, + X86Reg basereg, + __int32 ofs /*=0*/, + X86Reg scaledreg /*=0*/, + BYTE scale /*=0*/ + AMD64_ARG(X86OperandSize OperandSize /*= k32BitOp*/)) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + + // All 2-byte opcodes start with 0x0f. + PRECONDITION(!(opcode >> 8) || (opcode & 0xff) == 0x0f); + + PRECONDITION(scale == 0 || scale == 1 || scale == 2 || scale == 4 || scale == 8); + PRECONDITION(scaledreg != (X86Reg)4); + PRECONDITION(!(basereg == kEBP && scale != 0)); + + PRECONDITION( ((UINT)basereg) < NumX86Regs ); + PRECONDITION( ((UINT)scaledreg) < NumX86Regs ); + PRECONDITION( ((UINT)altreg) < NumX86Regs ); + } + CONTRACTL_END; + +#ifdef _TARGET_AMD64_ + if ( k64BitOp == OperandSize + || altreg >= kR8 + || basereg >= kR8 + || scaledreg >= kR8) + { + BYTE rex = REX_PREFIX_BASE; + + if (k64BitOp == OperandSize) + rex |= REX_OPERAND_SIZE_64BIT; + + if (altreg >= kR8) + { + rex |= REX_MODRM_REG_EXT; + altreg = X86RegFromAMD64Reg(altreg); + } + + if (basereg >= kR8) + { + // basereg might be in the modrm or sib fields. This will be + // decided below, but the encodings are the same either way. + _ASSERTE(REX_SIB_BASE_EXT == REX_MODRM_RM_EXT); + rex |= REX_SIB_BASE_EXT; + basereg = X86RegFromAMD64Reg(basereg); + } + + if (scaledreg >= kR8) + { + rex |= REX_SIB_INDEX_EXT; + scaledreg = X86RegFromAMD64Reg(scaledreg); + } + + Emit8(rex); + } +#endif // _TARGET_AMD64_ + + BYTE modrmbyte = static_cast(altreg << 3); + BOOL fNeedSIB = FALSE; + BYTE SIBbyte = 0; + BYTE ofssize; + BYTE scaleselect= 0; + + if (ofs == 0 && basereg != kEBP) + { + ofssize = 0; // Don't change this constant! + } + else if (FitsInI1(ofs)) + { + ofssize = 1; // Don't change this constant! + } + else + { + ofssize = 2; // Don't change this constant! + } + + switch (scale) + { + case 1: scaleselect = 0; break; + case 2: scaleselect = 1; break; + case 4: scaleselect = 2; break; + case 8: scaleselect = 3; break; + } + + if (scale == 0 && basereg != (X86Reg)4 /*ESP*/) + { + // [basereg + ofs] + modrmbyte |= basereg | (ofssize << 6); + } + else if (scale == 0) + { + // [esp + ofs] + _ASSERTE(basereg == (X86Reg)4); + fNeedSIB = TRUE; + SIBbyte = 0044; + + modrmbyte |= 4 | (ofssize << 6); + } + else + { + + //[basereg + scaledreg*scale + ofs] + + modrmbyte |= 0004 | (ofssize << 6); + fNeedSIB = TRUE; + SIBbyte = static_cast((scaleselect << 6) | (scaledreg << 3) | basereg); + + } + + //Some sanity checks: + _ASSERTE(!(fNeedSIB && basereg == kEBP)); // EBP not valid as a SIB base register. + _ASSERTE(!( (!fNeedSIB) && basereg == (X86Reg)4 )) ; // ESP addressing requires SIB byte + + Emit8((BYTE)opcode); + + if (opcode >> 8) + Emit8(opcode >> 8); + + Emit8(modrmbyte); + if (fNeedSIB) + { + Emit8(SIBbyte); + } + switch (ofssize) + { + case 0: break; + case 1: Emit8( (__int8)ofs ); break; + case 2: Emit32( ofs ); break; + default: _ASSERTE(!"Can't get here."); + } +} + + +// Emits +// +// opcode altreg, modrmreg +// +// or +// +// opcode modrmreg, altreg +// +// (the opcode determines which one comes first) +// +// For single-operand opcodes, "altreg" actually selects +// an operation rather than a register. + +VOID StubLinkerCPU::X86EmitR2ROp (WORD opcode, + X86Reg altreg, + X86Reg modrmreg + AMD64_ARG(X86OperandSize OperandSize /*= k64BitOp*/) + ) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + + // All 2-byte opcodes start with 0x0f. + PRECONDITION(!(opcode >> 8) || (opcode & 0xff) == 0x0f); + + PRECONDITION( ((UINT)altreg) < NumX86Regs ); + PRECONDITION( ((UINT)modrmreg) < NumX86Regs ); + } + CONTRACTL_END; + +#ifdef _TARGET_AMD64_ + BYTE rex = 0; + + if (modrmreg >= kR8) + { + rex |= REX_MODRM_RM_EXT; + modrmreg = X86RegFromAMD64Reg(modrmreg); + } + + if (altreg >= kR8) + { + rex |= REX_MODRM_REG_EXT; + altreg = X86RegFromAMD64Reg(altreg); + } + + if (k64BitOp == OperandSize) + rex |= REX_OPERAND_SIZE_64BIT; + + if (rex) + Emit8(REX_PREFIX_BASE | rex); +#endif // _TARGET_AMD64_ + + Emit8((BYTE)opcode); + + if (opcode >> 8) + Emit8(opcode >> 8); + + Emit8(static_cast(0300 | (altreg << 3) | modrmreg)); +} + + +//--------------------------------------------------------------- +// Emits: +// op altreg, [esp+ofs] +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitEspOffset(BYTE opcode, + X86Reg altreg, + __int32 ofs + AMD64_ARG(X86OperandSize OperandSize /*= k64BitOp*/) + ) +{ + STANDARD_VM_CONTRACT; + + BYTE codeBuffer[8]; + BYTE *code = codeBuffer; + int nBytes; + +#ifdef _TARGET_AMD64_ + BYTE rex = 0; + + if (k64BitOp == OperandSize) + rex |= REX_OPERAND_SIZE_64BIT; + + if (altreg >= kR8) + { + rex |= REX_MODRM_REG_EXT; + altreg = X86RegFromAMD64Reg(altreg); + } + + if (rex) + { + *code = (REX_PREFIX_BASE | rex); + code++; + nBytes = 1; + } + else +#endif // _TARGET_AMD64_ + { + nBytes = 0; + } + + code[0] = opcode; + BYTE modrm = static_cast((altreg << 3) | 004); + if (ofs == 0) + { + code[1] = modrm; + code[2] = 0044; + EmitBytes(codeBuffer, 3 + nBytes); + } + else if (FitsInI1(ofs)) + { + code[1] = 0x40|modrm; + code[2] = 0044; + code[3] = (BYTE)ofs; + EmitBytes(codeBuffer, 4 + nBytes); + } + else + { + code[1] = 0x80|modrm; + code[2] = 0044; + *((__int32*)(3+code)) = ofs; + EmitBytes(codeBuffer, 7 + nBytes); + } + +} + +//--------------------------------------------------------------- + +VOID StubLinkerCPU::X86EmitPushEBPframe() +{ + STANDARD_VM_CONTRACT; + + // push ebp + X86EmitPushReg(kEBP); + // mov ebp,esp + X86EmitMovRegSP(kEBP); +} + +#ifdef _DEBUG +//--------------------------------------------------------------- +// Emits: +// mov ,0xcccccccc +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitDebugTrashReg(X86Reg reg) +{ + STANDARD_VM_CONTRACT; + +#ifdef _TARGET_AMD64_ + BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + + if (reg >= kR8) + { + rex |= REX_OPCODE_REG_EXT; + reg = X86RegFromAMD64Reg(reg); + } + Emit8(rex); + Emit8(0xb8|reg); + Emit64(0xcccccccccccccccc); +#else + Emit8(static_cast(0xb8 | reg)); + Emit32(0xcccccccc); +#endif +} +#endif //_DEBUG + + +// Get X86Reg indexes of argument registers based on offset into ArgumentRegister +X86Reg GetX86ArgumentRegisterFromOffset(size_t ofs) +{ + CONTRACT(X86Reg) + { + NOTHROW; + GC_NOTRIGGER; + + } + CONTRACT_END; + + #define ARGUMENT_REGISTER(reg) if (ofs == offsetof(ArgumentRegisters, reg)) RETURN k##reg ; + ENUM_ARGUMENT_REGISTERS(); + #undef ARGUMENT_REGISTER + + _ASSERTE(0);//Can't get here. + RETURN kEBP; +} + + +#ifdef _TARGET_AMD64_ +static const X86Reg c_argRegs[] = { + #define ARGUMENT_REGISTER(regname) k##regname, + ENUM_ARGUMENT_REGISTERS() + #undef ARGUMENT_REGISTER +}; +#endif + + +#ifndef CROSSGEN_COMPILE + +#if defined(_DEBUG) && (defined(_TARGET_AMD64_) || defined(_TARGET_X86_)) && !defined(FEATURE_PAL) +void StubLinkerCPU::EmitJITHelperLoggingThunk(PCODE pJitHelper, LPVOID helperFuncCount) +{ + STANDARD_VM_CONTRACT; + + VMHELPCOUNTDEF* pHelperFuncCount = (VMHELPCOUNTDEF*)helperFuncCount; +/* + push rcx + mov rcx, &(pHelperFuncCount->count) + lock inc [rcx] + pop rcx +#ifdef _TARGET_AMD64_ + mov rax, + jmp rax +#else + jmp +#endif +*/ + + // push rcx + // mov rcx, &(pHelperFuncCount->count) + X86EmitPushReg(kECX); + X86EmitRegLoad(kECX, (UINT_PTR)(&(pHelperFuncCount->count))); + + // lock inc [rcx] + BYTE lock_inc_RCX[] = { 0xf0, 0xff, 0x01 }; + EmitBytes(lock_inc_RCX, sizeof(lock_inc_RCX)); + +#if defined(_TARGET_AMD64_) + // mov rax, + // pop rcx + // jmp rax +#else + // pop rcx + // jmp +#endif + X86EmitTailcallWithSinglePop(NewExternalCodeLabel(pJitHelper), kECX); +} +#endif // _DEBUG && (_TARGET_AMD64_ || _TARGET_X86_) && !FEATURE_PAL + +#ifndef FEATURE_IMPLICIT_TLS +//--------------------------------------------------------------- +// Emit code to store the current Thread structure in dstreg +// preservedRegSet is a set of registers to be preserved +// TRASHES EAX, EDX, ECX unless they are in preservedRegSet. +// RESULTS dstreg = current Thread +//--------------------------------------------------------------- +VOID StubLinkerCPU::X86EmitTLSFetch(DWORD idx, X86Reg dstreg, unsigned preservedRegSet) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + + // It doesn't make sense to have the destination register be preserved + PRECONDITION((preservedRegSet & (1<m_pvDatum->m_pMD) + X86EmitIndexRegLoad(kECX, regFrame, UMThkCallFrame::GetOffsetOfDatum()); + X86EmitIndexRegLoad(kECX, kECX, UMEntryThunk::GetOffsetOfMethodDesc()); + + // Push arguments and notify profiler + X86EmitPushImm32(COR_PRF_TRANSITION_CALL); // Reason + X86EmitPushReg(kECX); // MethodDesc* + X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerUnmanagedToManagedTransitionMD), 2*sizeof(void*)); + } + +#ifdef FEATURE_COMINTEROP + else if (pFrameVptr == ComMethodFrame::GetMethodFrameVPtr()) + { + // Load the methoddesc into ECX (Frame->m_pvDatum->m_pMD) + X86EmitIndexRegLoad(kECX, regFrame, ComMethodFrame::GetOffsetOfDatum()); + X86EmitIndexRegLoad(kECX, kECX, ComCallMethodDesc::GetOffsetOfMethodDesc()); + + // Push arguments and notify profiler + X86EmitPushImm32(COR_PRF_TRANSITION_CALL); // Reason + X86EmitPushReg(kECX); // MethodDesc* + X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerUnmanagedToManagedTransitionMD), 2*sizeof(void*)); + } +#endif // FEATURE_COMINTEROP + + // Unrecognized frame vtbl + else + { + _ASSERTE(!"Unrecognized vtble passed to EmitComMethodStubProlog with profiling turned on."); + } +} + + +VOID StubLinkerCPU::EmitProfilerComCallEpilog(TADDR pFrameVptr, X86Reg regFrame) +{ + CONTRACTL + { + STANDARD_VM_CHECK; +#ifdef FEATURE_COMINTEROP + PRECONDITION(pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr() || pFrameVptr == ComMethodFrame::GetMethodFrameVPtr()); +#else + PRECONDITION(pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr()); +#endif // FEATURE_COMINTEROP + } + CONTRACTL_END; + + if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr()) + { + // Load the methoddesc into ECX (UMThkCallFrame->m_pvDatum->m_pMD) + X86EmitIndexRegLoad(kECX, regFrame, UMThkCallFrame::GetOffsetOfDatum()); + X86EmitIndexRegLoad(kECX, kECX, UMEntryThunk::GetOffsetOfMethodDesc()); + + // Push arguments and notify profiler + X86EmitPushImm32(COR_PRF_TRANSITION_RETURN); // Reason + X86EmitPushReg(kECX); // MethodDesc* + X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerManagedToUnmanagedTransitionMD), 2*sizeof(void*)); + } + +#ifdef FEATURE_COMINTEROP + else if (pFrameVptr == ComMethodFrame::GetMethodFrameVPtr()) + { + // Load the methoddesc into ECX (Frame->m_pvDatum->m_pMD) + X86EmitIndexRegLoad(kECX, regFrame, ComMethodFrame::GetOffsetOfDatum()); + X86EmitIndexRegLoad(kECX, kECX, ComCallMethodDesc::GetOffsetOfMethodDesc()); + + // Push arguments and notify profiler + X86EmitPushImm32(COR_PRF_TRANSITION_RETURN); // Reason + X86EmitPushReg(kECX); // MethodDesc* + X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerManagedToUnmanagedTransitionMD), 2*sizeof(void*)); + } +#endif // FEATURE_COMINTEROP + + // Unrecognized frame vtbl + else + { + _ASSERTE(!"Unrecognized vtble passed to EmitComMethodStubEpilog with profiling turned on."); + } +} +#endif // PROFILING_SUPPORTED + + +//======================================================================== +// Prolog for entering managed code from COM +// pushes the appropriate frame ptr +// sets up a thread and returns a label that needs to be emitted by the caller +// At the end: +// ESI will hold the pointer to the ComMethodFrame or UMThkCallFrame +// EBX will hold the result of GetThread() +// EDI will hold the previous Frame ptr + +void StubLinkerCPU::EmitComMethodStubProlog(TADDR pFrameVptr, + CodeLabel** rgRareLabels, + CodeLabel** rgRejoinLabels, + BOOL bShouldProfile) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + + PRECONDITION(rgRareLabels != NULL); + PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL); + PRECONDITION(rgRejoinLabels != NULL); + PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL); + } + CONTRACTL_END; + + // push ebp ;; save callee-saved register + // push ebx ;; save callee-saved register + // push esi ;; save callee-saved register + // push edi ;; save callee-saved register + X86EmitPushEBPframe(); + + X86EmitPushReg(kEBX); + X86EmitPushReg(kESI); + X86EmitPushReg(kEDI); + + // push eax ; datum + X86EmitPushReg(kEAX); + + // push edx ;leave room for m_next (edx is an arbitrary choice) + X86EmitPushReg(kEDX); + + // push IMM32 ; push Frame vptr + X86EmitPushImmPtr((LPVOID) pFrameVptr); + + X86EmitPushImmPtr((LPVOID)GetProcessGSCookie()); + + // lea esi, [esp+4] ;; set ESI -> new frame + X86EmitEspOffset(0x8d, kESI, 4); // lea ESI, [ESP+4] + + if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr()) + { + // Preserve argument registers for thiscall/fastcall + X86EmitPushReg(kECX); + X86EmitPushReg(kEDX); + } + + // Emit Setup thread + EmitSetup(rgRareLabels[0]); // rareLabel for rare setup + EmitLabel(rgRejoinLabels[0]); // rejoin label for rare setup + +#ifdef PROFILING_SUPPORTED + // If profiling is active, emit code to notify profiler of transition + // Must do this before preemptive GC is disabled, so no problem if the + // profiler blocks. + if (CORProfilerTrackTransitions() && bShouldProfile) + { + EmitProfilerComCallProlog(pFrameVptr, /*Frame*/ kESI); + } +#endif // PROFILING_SUPPORTED + + //----------------------------------------------------------------------- + // Generate the inline part of disabling preemptive GC. It is critical + // that this part happen before we link in the frame. That's because + // we won't be able to unlink the frame from preemptive mode. And during + // shutdown, we cannot switch to cooperative mode under some circumstances + //----------------------------------------------------------------------- + EmitDisable(rgRareLabels[1], /*fCallIn=*/TRUE, kEBX); // rare disable gc + EmitLabel(rgRejoinLabels[1]); // rejoin for rare disable gc + + // If we take an SO after installing the new frame but before getting the exception + // handlers in place, we will have a corrupt frame stack. So probe-by-touch first for + // sufficient stack space to erect the handler. Because we know we will be touching + // that stack right away when install the handler, this probe-by-touch will not incur + // unnecessary cache misses. And this allows us to do the probe with one instruction. + + // Note that for Win64, the personality routine will handle unlinking the frame, so + // we don't need to probe in the Win64 stubs. The exception is ComToCLRWorker + // where we don't setup a personality routine. However, we push the frame inside + // that function and it is probe-protected with an entry point probe first, so we are + // OK there too. + + // We push two registers to setup the EH handler and none to setup the frame + // so probe for double that to give ourselves a small margin for error. + // mov eax, [esp+n] ;; probe for sufficient stack to setup EH + X86EmitEspOffset(0x8B, kEAX, -0x20); + // mov edi,[ebx + Thread.GetFrame()] ;; get previous frame + X86EmitIndexRegLoad(kEDI, kEBX, Thread::GetOffsetOfCurrentFrame()); + + // mov [esi + Frame.m_next], edi + X86EmitIndexRegStore(kESI, Frame::GetOffsetOfNextLink(), kEDI); + + // mov [ebx + Thread.GetFrame()], esi + X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kESI); + + if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr()) + { + // push UnmanagedToManagedExceptHandler + X86EmitPushImmPtr((LPVOID)UMThunkPrestubHandler); + + // mov eax, fs:[0] + static const BYTE codeSEH1[] = { 0x64, 0xA1, 0x0, 0x0, 0x0, 0x0}; + EmitBytes(codeSEH1, sizeof(codeSEH1)); + + // push eax + X86EmitPushReg(kEAX); + + // mov dword ptr fs:[0], esp + static const BYTE codeSEH2[] = { 0x64, 0x89, 0x25, 0x0, 0x0, 0x0, 0x0}; + EmitBytes(codeSEH2, sizeof(codeSEH2)); + } + +#if _DEBUG + if (Frame::ShouldLogTransitions()) + { + // call LogTransition + X86EmitPushReg(kESI); + X86EmitCall(NewExternalCodeLabel((LPVOID) Frame::LogTransition), sizeof(void*)); + } +#endif +} + +//======================================================================== +// Epilog for stubs that enter managed code from COM +// +// At this point of the stub, the state should be as follows: +// ESI holds the ComMethodFrame or UMThkCallFrame ptr +// EBX holds the result of GetThread() +// EDI holds the previous Frame ptr +// +void StubLinkerCPU::EmitComMethodStubEpilog(TADDR pFrameVptr, + CodeLabel** rgRareLabels, + CodeLabel** rgRejoinLabels, + BOOL bShouldProfile) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + + PRECONDITION(rgRareLabels != NULL); + PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL); + PRECONDITION(rgRejoinLabels != NULL); + PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL); + } + CONTRACTL_END; + + EmitCheckGSCookie(kESI, UnmanagedToManagedFrame::GetOffsetOfGSCookie()); + + if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr()) + { + // if we are using exceptions, unlink the SEH + // mov ecx,[esp] ;;pointer to the next exception record + X86EmitEspOffset(0x8b, kECX, 0); + + // mov dword ptr fs:[0], ecx + static const BYTE codeSEH[] = { 0x64, 0x89, 0x0D, 0x0, 0x0, 0x0, 0x0 }; + EmitBytes(codeSEH, sizeof(codeSEH)); + + X86EmitAddEsp(sizeof(EXCEPTION_REGISTRATION_RECORD)); + } + + // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame + X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI); + + //----------------------------------------------------------------------- + // Generate the inline part of disabling preemptive GC + //----------------------------------------------------------------------- + EmitEnable(rgRareLabels[2]); // rare gc + EmitLabel(rgRejoinLabels[2]); // rejoin for rare gc + + if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr()) + { + // Restore argument registers for thiscall/fastcall + X86EmitPopReg(kEDX); + X86EmitPopReg(kECX); + } + + // add esp, popstack + X86EmitAddEsp(sizeof(GSCookie) + UnmanagedToManagedFrame::GetOffsetOfCalleeSavedRegisters()); + + // pop edi ; restore callee-saved registers + // pop esi + // pop ebx + // pop ebp + X86EmitPopReg(kEDI); + X86EmitPopReg(kESI); + X86EmitPopReg(kEBX); + X86EmitPopReg(kEBP); + + // jmp eax //reexecute! + X86EmitR2ROp(0xff, (X86Reg)4, kEAX); + + // ret + // This will never be executed. It is just to help out stack-walking logic + // which disassembles the epilog to unwind the stack. A "ret" instruction + // indicates that no more code needs to be disassembled, if the stack-walker + // keeps on going past the previous "jmp eax". + X86EmitReturn(0); + + //----------------------------------------------------------------------- + // The out-of-line portion of enabling preemptive GC - rarely executed + //----------------------------------------------------------------------- + EmitLabel(rgRareLabels[2]); // label for rare enable gc + EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc + + //----------------------------------------------------------------------- + // The out-of-line portion of disabling preemptive GC - rarely executed + //----------------------------------------------------------------------- + EmitLabel(rgRareLabels[1]); // label for rare disable gc + EmitRareDisable(rgRejoinLabels[1]); // emit rare disable gc + + //----------------------------------------------------------------------- + // The out-of-line portion of setup thread - rarely executed + //----------------------------------------------------------------------- + EmitLabel(rgRareLabels[0]); // label for rare setup thread + EmitRareSetup(rgRejoinLabels[0], /*fThrow*/ TRUE); // emit rare setup thread +} + +//--------------------------------------------------------------- +// Emit code to store the setup current Thread structure in eax. +// TRASHES eax,ecx&edx. +// RESULTS ebx = current Thread +//--------------------------------------------------------------- +VOID StubLinkerCPU::EmitSetup(CodeLabel *pForwardRef) +{ + STANDARD_VM_CONTRACT; + +#ifdef FEATURE_IMPLICIT_TLS + DWORD idx = 0; + TLSACCESSMODE mode = TLSACCESS_GENERIC; +#else + DWORD idx = GetThreadTLSIndex(); + TLSACCESSMODE mode = GetTLSAccessMode(idx); +#endif + +#ifdef _DEBUG + { + static BOOL f = TRUE; + f = !f; + if (f) + { + mode = TLSACCESS_GENERIC; + } + } +#endif + + switch (mode) + { + case TLSACCESS_WNT: + { + unsigned __int32 tlsofs = offsetof(TEB, TlsSlots) + (idx * sizeof(void*)); + + static const BYTE code[] = {0x64,0x8b,0x1d}; // mov ebx, dword ptr fs:[IMM32] + EmitBytes(code, sizeof(code)); + Emit32(tlsofs); + } + break; + + case TLSACCESS_GENERIC: +#ifdef FEATURE_IMPLICIT_TLS + X86EmitCall(NewExternalCodeLabel((LPVOID) GetThread), sizeof(void*)); +#else + X86EmitPushImm32(idx); + + // call TLSGetValue + X86EmitCall(NewExternalCodeLabel((LPVOID) TlsGetValue), sizeof(void*)); +#endif + // mov ebx,eax + Emit16(0xc389); + break; + default: + _ASSERTE(0); + } + + // cmp ebx, 0 + static const BYTE b[] = { 0x83, 0xFB, 0x0}; + + EmitBytes(b, sizeof(b)); + + // jz RarePath + X86EmitCondJump(pForwardRef, X86CondCode::kJZ); + +#ifdef _DEBUG + X86EmitDebugTrashReg(kECX); + X86EmitDebugTrashReg(kEDX); +#endif + +} + +VOID StubLinkerCPU::EmitRareSetup(CodeLabel *pRejoinPoint, BOOL fThrow) +{ + STANDARD_VM_CONTRACT; + +#ifndef FEATURE_COMINTEROP + _ASSERTE(fThrow); +#else // !FEATURE_COMINTEROP + if (!fThrow) + { + X86EmitPushReg(kESI); + X86EmitCall(NewExternalCodeLabel((LPVOID) CreateThreadBlockReturnHr), sizeof(void*)); + } + else +#endif // !FEATURE_COMINTEROP + { + X86EmitCall(NewExternalCodeLabel((LPVOID) CreateThreadBlockThrow), 0); + } + + // mov ebx,eax + Emit16(0xc389); + X86EmitNearJump(pRejoinPoint); +} + +//======================================================================== +#endif // _TARGET_X86_ +//======================================================================== +#if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_) +//======================================================================== +// Epilog for stubs that enter managed code from COM +// +// On entry, ESI points to the Frame +// ESP points to below FramedMethodFrame::m_vc5Frame +// EBX hold GetThread() +// EDI holds the previous Frame + +void StubLinkerCPU::EmitSharedComMethodStubEpilog(TADDR pFrameVptr, + CodeLabel** rgRareLabels, + CodeLabel** rgRejoinLabels, + unsigned offsetRetThunk, + BOOL bShouldProfile) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + + PRECONDITION(rgRareLabels != NULL); + PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL); + PRECONDITION(rgRejoinLabels != NULL); + PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL); + } + CONTRACTL_END; + + CodeLabel *NoEntryLabel; + NoEntryLabel = NewCodeLabel(); + + EmitCheckGSCookie(kESI, UnmanagedToManagedFrame::GetOffsetOfGSCookie()); + + // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame + X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI); + + //----------------------------------------------------------------------- + // Generate the inline part of enabling preemptive GC + //----------------------------------------------------------------------- + EmitLabel(NoEntryLabel); // need to enable preemp mode even when we fail the disable as rare disable will return in coop mode + + EmitEnable(rgRareLabels[2]); // rare enable gc + EmitLabel(rgRejoinLabels[2]); // rejoin for rare enable gc + +#ifdef PROFILING_SUPPORTED + // If profiling is active, emit code to notify profiler of transition + if (CORProfilerTrackTransitions() && bShouldProfile) + { + // Save return value + X86EmitPushReg(kEAX); + X86EmitPushReg(kEDX); + + EmitProfilerComCallEpilog(pFrameVptr, kESI); + + // Restore return value + X86EmitPopReg(kEDX); + X86EmitPopReg(kEAX); + } +#endif // PROFILING_SUPPORTED + + X86EmitAddEsp(sizeof(GSCookie) + UnmanagedToManagedFrame::GetOffsetOfDatum()); + + // pop ecx + X86EmitPopReg(kECX); // pop the MethodDesc* + + // pop edi ; restore callee-saved registers + // pop esi + // pop ebx + // pop ebp + X86EmitPopReg(kEDI); + X86EmitPopReg(kESI); + X86EmitPopReg(kEBX); + X86EmitPopReg(kEBP); + + // add ecx, offsetRetThunk + X86EmitAddReg(kECX, offsetRetThunk); + + // jmp ecx + // This will jump to the "ret cbStackArgs" instruction in COMMETHOD_PREPAD. + static const BYTE bjmpecx[] = { 0xff, 0xe1 }; + EmitBytes(bjmpecx, sizeof(bjmpecx)); + + // ret + // This will never be executed. It is just to help out stack-walking logic + // which disassembles the epilog to unwind the stack. A "ret" instruction + // indicates that no more code needs to be disassembled, if the stack-walker + // keeps on going past the previous "jmp ecx". + X86EmitReturn(0); + + //----------------------------------------------------------------------- + // The out-of-line portion of enabling preemptive GC - rarely executed + //----------------------------------------------------------------------- + EmitLabel(rgRareLabels[2]); // label for rare enable gc + EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc + + //----------------------------------------------------------------------- + // The out-of-line portion of disabling preemptive GC - rarely executed + //----------------------------------------------------------------------- + EmitLabel(rgRareLabels[1]); // label for rare disable gc + EmitRareDisableHRESULT(rgRejoinLabels[1], NoEntryLabel); + + //----------------------------------------------------------------------- + // The out-of-line portion of setup thread - rarely executed + //----------------------------------------------------------------------- + EmitLabel(rgRareLabels[0]); // label for rare setup thread + EmitRareSetup(rgRejoinLabels[0],/*fThrow*/ FALSE); // emit rare setup thread +} + +//======================================================================== +#endif // defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_) + +#ifndef FEATURE_STUBS_AS_IL +/*============================================================================== + Pushes a TransitionFrame on the stack + If you make any changes to the prolog instruction sequence, be sure + to update UpdateRegdisplay, too!! This service should only be called from + within the runtime. It should not be called for any unmanaged -> managed calls in. + + At the end of the generated prolog stub code: + pFrame is in ESI/RSI. + the previous pFrame is in EDI/RDI + The current Thread* is in EBX/RBX. + For x86, ESP points to TransitionFrame + For amd64, ESP points to the space reserved for the outgoing argument registers +*/ + +VOID StubLinkerCPU::EmitMethodStubProlog(TADDR pFrameVptr, int transitionBlockOffset) +{ + STANDARD_VM_CONTRACT; + +#ifdef _TARGET_AMD64_ + X86EmitPushReg(kR15); // CalleeSavedRegisters + X86EmitPushReg(kR14); + X86EmitPushReg(kR13); + X86EmitPushReg(kR12); + X86EmitPushReg(kRBP); + X86EmitPushReg(kRBX); + X86EmitPushReg(kRSI); + X86EmitPushReg(kRDI); + + // Push m_datum + X86EmitPushReg(SCRATCH_REGISTER_X86REG); + + // push edx ;leave room for m_next (edx is an arbitrary choice) + X86EmitPushReg(kEDX); + + // push Frame vptr + X86EmitPushImmPtr((LPVOID) pFrameVptr); + + // mov rsi, rsp + X86EmitR2ROp(0x8b, kRSI, (X86Reg)4 /*kESP*/); + UnwindSetFramePointer(kRSI); + + // Save ArgumentRegisters + #define ARGUMENT_REGISTER(regname) X86EmitRegSave(k##regname, SecureDelegateFrame::GetOffsetOfTransitionBlock() + \ + sizeof(TransitionBlock) + offsetof(ArgumentRegisters, regname)); + ENUM_ARGUMENT_REGISTERS(); + #undef ARGUMENT_REGISTER + + _ASSERTE(((Frame*)&pFrameVptr)->GetGSCookiePtr() == PTR_GSCookie(PBYTE(&pFrameVptr) - sizeof(GSCookie))); + X86EmitPushImmPtr((LPVOID)GetProcessGSCookie()); + + // sub rsp, 4*sizeof(void*) ;; allocate callee scratch area and ensure rsp is 16-byte-aligned + const INT32 padding = sizeof(ArgumentRegisters) + ((sizeof(FramedMethodFrame) % (2 * sizeof(LPVOID))) ? 0 : sizeof(LPVOID)); + X86EmitSubEsp(padding); +#endif // _TARGET_AMD64_ + +#ifdef _TARGET_X86_ + // push ebp ;; save callee-saved register + // mov ebp,esp + // push ebx ;; save callee-saved register + // push esi ;; save callee-saved register + // push edi ;; save callee-saved register + X86EmitPushEBPframe(); + + X86EmitPushReg(kEBX); + X86EmitPushReg(kESI); + X86EmitPushReg(kEDI); + + // Push & initialize ArgumentRegisters + #define ARGUMENT_REGISTER(regname) X86EmitPushReg(k##regname); + ENUM_ARGUMENT_REGISTERS(); + #undef ARGUMENT_REGISTER + + // Push m_datum + X86EmitPushReg(kEAX); + + // push edx ;leave room for m_next (edx is an arbitrary choice) + X86EmitPushReg(kEDX); + + // push Frame vptr + X86EmitPushImmPtr((LPVOID) pFrameVptr); + + // mov esi,esp + X86EmitMovRegSP(kESI); + + X86EmitPushImmPtr((LPVOID)GetProcessGSCookie()); +#endif // _TARGET_X86_ + + // ebx <-- GetThread() + // Trashes X86TLSFetch_TRASHABLE_REGS + X86EmitCurrentThreadFetch(kEBX, 0); + +#if _DEBUG + + // call ObjectRefFlush +#ifdef _TARGET_AMD64_ + + // mov rcx, rbx + X86EmitR2ROp(0x8b, kECX, kEBX); // arg in reg + +#else // !_TARGET_AMD64_ + X86EmitPushReg(kEBX); // arg on stack +#endif // _TARGET_AMD64_ + + // Make the call + X86EmitCall(NewExternalCodeLabel((LPVOID) Thread::ObjectRefFlush), sizeof(void*)); + +#endif // _DEBUG + + // mov edi,[ebx + Thread.GetFrame()] ;; get previous frame + X86EmitIndexRegLoad(kEDI, kEBX, Thread::GetOffsetOfCurrentFrame()); + + // mov [esi + Frame.m_next], edi + X86EmitIndexRegStore(kESI, Frame::GetOffsetOfNextLink(), kEDI); + + // mov [ebx + Thread.GetFrame()], esi + X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kESI); + +#if _DEBUG + + if (Frame::ShouldLogTransitions()) + { + // call LogTransition +#ifdef _TARGET_AMD64_ + + // mov rcx, rsi + X86EmitR2ROp(0x8b, kECX, kESI); // arg in reg + +#else // !_TARGET_AMD64_ + X86EmitPushReg(kESI); // arg on stack +#endif // _TARGET_AMD64_ + + X86EmitCall(NewExternalCodeLabel((LPVOID) Frame::LogTransition), sizeof(void*)); + +#ifdef _TARGET_AMD64_ + // Reload parameter registers + // mov r, [esp+offs] + #define ARGUMENT_REGISTER(regname) X86EmitEspOffset(0x8b, k##regname, sizeof(ArgumentRegisters) + \ + sizeof(TransitionFrame) + offsetof(ArgumentRegisters, regname)); + ENUM_ARGUMENT_REGISTERS(); + #undef ARGUMENT_REGISTER + +#endif // _TARGET_AMD64_ + } + +#endif // _DEBUG + + +#ifdef _TARGET_AMD64_ + // OK for the debugger to examine the new frame now + // (Note that if it's not OK yet for some stub, another patch label + // can be emitted later which will override this one.) + EmitPatchLabel(); +#else + // For x86, the patch label can be specified only after the GSCookie is pushed + // Otherwise the debugger will see a Frame without a valid GSCookie +#endif +} + +/*============================================================================== + EmitMethodStubEpilog generates the part of the stub that will pop off the + Frame + + restoreArgRegs - indicates whether the argument registers need to be + restored from m_argumentRegisters + + At this point of the stub: + pFrame is in ESI/RSI. + the previous pFrame is in EDI/RDI + The current Thread* is in EBX/RBX. + For x86, ESP points to the FramedMethodFrame::NegInfo +*/ + +VOID StubLinkerCPU::EmitMethodStubEpilog(WORD numArgBytes, int transitionBlockOffset) +{ + STANDARD_VM_CONTRACT; + + // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame + X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI); + +#ifdef _TARGET_X86_ + // deallocate Frame + X86EmitAddEsp(sizeof(GSCookie) + transitionBlockOffset + TransitionBlock::GetOffsetOfCalleeSavedRegisters()); + +#elif defined(_TARGET_AMD64_) + // lea rsp, [rsi + ] + X86EmitOffsetModRM(0x8d, (X86Reg)4 /*kRSP*/, kRSI, transitionBlockOffset + TransitionBlock::GetOffsetOfCalleeSavedRegisters()); +#endif // _TARGET_AMD64_ + + // pop edi ; restore callee-saved registers + // pop esi + // pop ebx + // pop ebp + X86EmitPopReg(kEDI); + X86EmitPopReg(kESI); + X86EmitPopReg(kEBX); + X86EmitPopReg(kEBP); + +#ifdef _TARGET_AMD64_ + X86EmitPopReg(kR12); + X86EmitPopReg(kR13); + X86EmitPopReg(kR14); + X86EmitPopReg(kR15); +#endif + +#ifdef _TARGET_AMD64_ + // Caller deallocates argument space. (Bypasses ASSERT in + // X86EmitReturn.) + numArgBytes = 0; +#endif + + X86EmitReturn(numArgBytes); +} + + +// On entry, ESI should be pointing to the Frame + +VOID StubLinkerCPU::EmitCheckGSCookie(X86Reg frameReg, int gsCookieOffset) +{ + STANDARD_VM_CONTRACT; + +#ifdef _DEBUG + // cmp dword ptr[frameReg-gsCookieOffset], gsCookie +#ifdef _TARGET_X86_ + X86EmitCmpRegIndexImm32(frameReg, gsCookieOffset, GetProcessGSCookie()); +#else + X64EmitCmp32RegIndexImm32(frameReg, gsCookieOffset, (INT32)GetProcessGSCookie()); +#endif + + CodeLabel * pLabel = NewCodeLabel(); + X86EmitCondJump(pLabel, X86CondCode::kJE); + + X86EmitCall(NewExternalCodeLabel((LPVOID) JIT_FailFast), 0); + + EmitLabel(pLabel); +#endif +} +#endif // !FEATURE_STUBS_AS_IL + + +// This method unboxes the THIS pointer and then calls pRealMD +// If it's shared code for a method in a generic value class, then also extract the vtable pointer +// and pass it as an extra argument. Thus this stub generator really covers both +// - Unboxing, non-instantiating stubs +// - Unboxing, method-table-instantiating stubs +VOID StubLinkerCPU::EmitUnboxMethodStub(MethodDesc* pUnboxMD) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + PRECONDITION(!pUnboxMD->IsStatic()); + } + CONTRACTL_END; + +#ifdef FEATURE_STUBS_AS_IL + _ASSERTE(!pUnboxMD->RequiresInstMethodTableArg()); +#else + if (pUnboxMD->RequiresInstMethodTableArg()) + { + EmitInstantiatingMethodStub(pUnboxMD, NULL); + return; + } +#endif + + // + // unboxing a value class simply means adding sizeof(void*) to the THIS pointer + // +#ifdef _TARGET_AMD64_ + X86EmitAddReg(THIS_kREG, sizeof(void*)); + + // Use direct call if possible + if (pUnboxMD->HasStableEntryPoint()) + { + X86EmitRegLoad(kRAX, pUnboxMD->GetStableEntryPoint());// MOV RAX, DWORD + } + else + { + X86EmitRegLoad(kRAX, (UINT_PTR)pUnboxMD->GetAddrOfSlot()); // MOV RAX, DWORD + + X86EmitIndexRegLoad(kRAX, kRAX); // MOV RAX, [RAX] + } + + Emit16(X86_INSTR_JMP_EAX); // JMP EAX +#else // _TARGET_AMD64_ + X86EmitAddReg(THIS_kREG, sizeof(void*)); + + // Use direct call if possible + if (pUnboxMD->HasStableEntryPoint()) + { + X86EmitNearJump(NewExternalCodeLabel((LPVOID) pUnboxMD->GetStableEntryPoint())); + } + else + { + // jmp [slot] + Emit16(0x25ff); + Emit32((DWORD)(size_t)pUnboxMD->GetAddrOfSlot()); + } +#endif //_TARGET_AMD64_ +} + + +#if defined(FEATURE_SHARE_GENERIC_CODE) && !defined(FEATURE_STUBS_AS_IL) +// The stub generated by this method passes an extra dictionary argument before jumping to +// shared-instantiation generic code. +// +// pMD is either +// * An InstantiatedMethodDesc for a generic method whose code is shared across instantiations. +// In this case, the extra argument is the InstantiatedMethodDesc for the instantiation-specific stub itself. +// or * A MethodDesc for a static method in a generic class whose code is shared across instantiations. +// In this case, the extra argument is the MethodTable pointer of the instantiated type. +// or * A MethodDesc for unboxing stub. In this case, the extra argument is null. +VOID StubLinkerCPU::EmitInstantiatingMethodStub(MethodDesc* pMD, void* extra) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + PRECONDITION(pMD->RequiresInstArg()); + } + CONTRACTL_END; + + MetaSig msig(pMD); + ArgIterator argit(&msig); + +#ifdef _TARGET_AMD64_ + int paramTypeArgOffset = argit.GetParamTypeArgOffset(); + int paramTypeArgIndex = TransitionBlock::GetArgumentIndexFromOffset(paramTypeArgOffset); + + CorElementType argTypes[5]; + + int firstRealArg = paramTypeArgIndex + 1; + int argNum = firstRealArg; + + // + // Compute types of the 4 register args and first stack arg + // + + CorElementType sigType; + while ((sigType = msig.NextArgNormalized()) != ELEMENT_TYPE_END) + { + argTypes[argNum++] = sigType; + if (argNum > 4) + break; + } + msig.Reset(); + + BOOL fUseInstantiatingMethodStubWorker = FALSE; + + if (argNum > 4) + { + // + // We will need to go through assembly helper. + // + fUseInstantiatingMethodStubWorker = TRUE; + + // Allocate space for frame before pushing the arguments for the assembly helper + X86EmitSubEsp((INT32)(AlignUp(sizeof(void *) /* extra stack param */ + sizeof(GSCookie) + sizeof(StubHelperFrame), 16) - sizeof(void *) /* return address */)); + + // + // Store extra arg stack arg param for the helper. + // + CorElementType argType = argTypes[--argNum]; + switch (argType) + { + case ELEMENT_TYPE_R4: + // movss dword ptr [rsp], xmm? + X64EmitMovSSToMem(kXMM3, (X86Reg)4 /*kRSP*/); + break; + case ELEMENT_TYPE_R8: + // movsd qword ptr [rsp], xmm? + X64EmitMovSDToMem(kXMM3, (X86Reg)4 /*kRSP*/); + break; + default: + X86EmitIndexRegStoreRSP(0, kR9); + break; + } + } + + // + // Shuffle the register arguments + // + while (argNum > firstRealArg) + { + CorElementType argType = argTypes[--argNum]; + + switch (argType) + { + case ELEMENT_TYPE_R4: + case ELEMENT_TYPE_R8: + // mov xmm#, xmm#-1 + X64EmitMovXmmXmm((X86Reg)argNum, (X86Reg)(argNum - 1)); + break; + default: + //mov reg#, reg#-1 + X86EmitMovRegReg(c_argRegs[argNum], c_argRegs[argNum-1]); + break; + } + } + + // + // Setup the hidden instantiation argument + // + if (extra != NULL) + { + X86EmitRegLoad(c_argRegs[paramTypeArgIndex], (UINT_PTR)extra); + } + else + { + X86EmitIndexRegLoad(c_argRegs[paramTypeArgIndex], THIS_kREG); + + X86EmitAddReg(THIS_kREG, sizeof(void*)); + } + + // Use direct call if possible + if (pMD->HasStableEntryPoint()) + { + X86EmitRegLoad(kRAX, pMD->GetStableEntryPoint());// MOV RAX, DWORD + } + else + { + X86EmitRegLoad(kRAX, (UINT_PTR)pMD->GetAddrOfSlot()); // MOV RAX, DWORD + + X86EmitIndexRegLoad(kRAX, kRAX); // MOV RAX, [RAX] + } + + if (fUseInstantiatingMethodStubWorker) + { + X86EmitPushReg(kRAX); + + UINT cbStack = argit.SizeOfArgStack(); + _ASSERTE(cbStack > 0); + + X86EmitPushImm32((AlignUp(cbStack, 16) / sizeof(void*)) - 1); // -1 for extra stack arg + + X86EmitRegLoad(kRAX, GetEEFuncEntryPoint(InstantiatingMethodStubWorker));// MOV RAX, DWORD + } + else + { + _ASSERTE(argit.SizeOfArgStack() == 0); + } + + Emit16(X86_INSTR_JMP_EAX); + +#else + int paramTypeArgOffset = argit.GetParamTypeArgOffset(); + + // It's on the stack + if (TransitionBlock::IsStackArgumentOffset(paramTypeArgOffset)) + { + // Pop return address into AX + X86EmitPopReg(kEAX); + + if (extra != NULL) + { + // Push extra dictionary argument + X86EmitPushImmPtr(extra); + } + else + { + // Push the vtable pointer from "this" + X86EmitIndexPush(THIS_kREG, 0); + } + + // Put return address back + X86EmitPushReg(kEAX); + } + // It's in a register + else + { + X86Reg paramReg = GetX86ArgumentRegisterFromOffset(paramTypeArgOffset - TransitionBlock::GetOffsetOfArgumentRegisters()); + + if (extra != NULL) + { + X86EmitRegLoad(paramReg, (UINT_PTR)extra); + } + else + { + // Just extract the vtable pointer from "this" + X86EmitIndexRegLoad(paramReg, THIS_kREG); + } + } + + if (extra == NULL) + { + // Unboxing stub case. + X86EmitAddReg(THIS_kREG, sizeof(void*)); + } + + // Use direct call if possible + if (pMD->HasStableEntryPoint()) + { + X86EmitNearJump(NewExternalCodeLabel((LPVOID) pMD->GetStableEntryPoint())); + } + else + { + // jmp [slot] + Emit16(0x25ff); + Emit32((DWORD)(size_t)pMD->GetAddrOfSlot()); + } +#endif // +} +#endif // FEATURE_SHARE_GENERIC_CODE && FEATURE_STUBS_AS_IL + + +#if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO) + +typedef BOOL GetModuleInformationProc( + HANDLE hProcess, + HMODULE hModule, + LPMODULEINFO lpmodinfo, + DWORD cb +); + +GetModuleInformationProc *g_pfnGetModuleInformation = NULL; + +extern "C" VOID __cdecl DebugCheckStubUnwindInfoWorker (CONTEXT *pStubContext) +{ + BEGIN_ENTRYPOINT_VOIDRET; + + LOG((LF_STUBS, LL_INFO1000000, "checking stub unwind info:\n")); + + // + // Make a copy of the CONTEXT. RtlVirtualUnwind will modify this copy. + // DebugCheckStubUnwindInfo will need to restore registers from the + // original CONTEXT. + // + CONTEXT ctx = *pStubContext; + ctx.ContextFlags = (CONTEXT_CONTROL | CONTEXT_INTEGER); + + // + // Find the upper bound of the stack and address range of KERNEL32. This + // is where we expect the unwind to stop. + // + void *pvStackTop = GetThread()->GetCachedStackBase(); + + if (!g_pfnGetModuleInformation) + { + HMODULE hmodPSAPI = WszGetModuleHandle(W("PSAPI.DLL")); + + if (!hmodPSAPI) + { + hmodPSAPI = WszLoadLibrary(W("PSAPI.DLL")); + if (!hmodPSAPI) + { + _ASSERTE(!"unable to load PSAPI.DLL"); + goto ErrExit; + } + } + + g_pfnGetModuleInformation = (GetModuleInformationProc*)GetProcAddress(hmodPSAPI, "GetModuleInformation"); + if (!g_pfnGetModuleInformation) + { + _ASSERTE(!"can't find PSAPI!GetModuleInformation"); + goto ErrExit; + } + + // Intentionally leak hmodPSAPI. We don't want to + // LoadLibrary/FreeLibrary every time, this is slow + produces lots of + // debugger spew. This is just debugging code after all... + } + + HMODULE hmodKERNEL32 = WszGetModuleHandle(W("KERNEL32")); + _ASSERTE(hmodKERNEL32); + + MODULEINFO modinfoKERNEL32; + if (!g_pfnGetModuleInformation(GetCurrentProcess(), hmodKERNEL32, &modinfoKERNEL32, sizeof(modinfoKERNEL32))) + { + _ASSERTE(!"unable to get bounds of KERNEL32"); + goto ErrExit; + } + + // + // Unwind until IP is 0, sp is at the stack top, and callee IP is in kernel32. + // + + for (;;) + { + ULONG64 ControlPc = (ULONG64)GetIP(&ctx); + + LOG((LF_STUBS, LL_INFO1000000, "pc %p, sp %p\n", ControlPc, GetSP(&ctx))); + + ULONG64 ImageBase; + T_RUNTIME_FUNCTION *pFunctionEntry = RtlLookupFunctionEntry( + ControlPc, + &ImageBase, + NULL); + if (pFunctionEntry) + { + PVOID HandlerData; + ULONG64 EstablisherFrame; + + RtlVirtualUnwind( + 0, + ImageBase, + ControlPc, + pFunctionEntry, + &ctx, + &HandlerData, + &EstablisherFrame, + NULL); + + ULONG64 NewControlPc = (ULONG64)GetIP(&ctx); + + LOG((LF_STUBS, LL_INFO1000000, "function %p, image %p, new pc %p, new sp %p\n", pFunctionEntry, ImageBase, NewControlPc, GetSP(&ctx))); + + if (!NewControlPc) + { + if (dac_cast(GetSP(&ctx)) < (BYTE*)pvStackTop - 0x100) + { + _ASSERTE(!"SP did not end up at top of stack"); + goto ErrExit; + } + + if (!( ControlPc > (ULONG64)modinfoKERNEL32.lpBaseOfDll + && ControlPc < (ULONG64)modinfoKERNEL32.lpBaseOfDll + modinfoKERNEL32.SizeOfImage)) + { + _ASSERTE(!"PC did not end up in KERNEL32"); + goto ErrExit; + } + + break; + } + } + else + { + // Nested functions that do not use any stack space or nonvolatile + // registers are not required to have unwind info (ex. + // USER32!ZwUserCreateWindowEx). + ctx.Rip = *(ULONG64*)(ctx.Rsp); + ctx.Rsp += sizeof(ULONG64); + } + } +ErrExit: + + END_ENTRYPOINT_VOIDRET; + return; +} + +//virtual +VOID StubLinkerCPU::EmitUnwindInfoCheckWorker (CodeLabel *pCheckLabel) +{ + STANDARD_VM_CONTRACT; + X86EmitCall(pCheckLabel, 0); +} + +//virtual +VOID StubLinkerCPU::EmitUnwindInfoCheckSubfunction() +{ + STANDARD_VM_CONTRACT; + +#ifdef _TARGET_AMD64_ + // X86EmitCall will generate "mov rax, target/jmp rax", so we have to save + // rax on the stack. DO NOT use X86EmitPushReg. That will induce infinite + // recursion, since the push may require more unwind info. This "push rax" + // will be accounted for by DebugCheckStubUnwindInfo's unwind info + // (considered part of its locals), so there doesn't have to be unwind + // info for it. + Emit8(0x50); +#endif + + X86EmitNearJump(NewExternalCodeLabel(DebugCheckStubUnwindInfo)); +} + +#endif // defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO) + + +#ifdef _TARGET_X86_ + +//----------------------------------------------------------------------- +// Generates the inline portion of the code to enable preemptive GC. Hopefully, +// the inline code is all that will execute most of the time. If this code +// path is entered at certain times, however, it will need to jump out to +// a separate out-of-line path which is more expensive. The "pForwardRef" +// label indicates the start of the out-of-line path. +// +// Assumptions: +// ebx = Thread +// Preserves +// all registers except ecx. +// +//----------------------------------------------------------------------- +VOID StubLinkerCPU::EmitEnable(CodeLabel *pForwardRef) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + + PRECONDITION(4 == sizeof( ((Thread*)0)->m_State )); + PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled )); + } + CONTRACTL_END; + + // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],0 + X86EmitOffsetModRM(0xc6, (X86Reg)0, kEBX, Thread::GetOffsetOfGCFlag()); + Emit8(0); + + _ASSERTE(FitsInI1(Thread::TS_CatchAtSafePoint)); + + // test byte ptr [ebx + Thread.m_State], TS_CatchAtSafePoint + X86EmitOffsetModRM(0xf6, (X86Reg)0, kEBX, Thread::GetOffsetOfState()); + Emit8(Thread::TS_CatchAtSafePoint); + + // jnz RarePath + X86EmitCondJump(pForwardRef, X86CondCode::kJNZ); + +#ifdef _DEBUG + X86EmitDebugTrashReg(kECX); +#endif + +} + + +//----------------------------------------------------------------------- +// Generates the out-of-line portion of the code to enable preemptive GC. +// After the work is done, the code jumps back to the "pRejoinPoint" +// which should be emitted right after the inline part is generated. +// +// Assumptions: +// ebx = Thread +// Preserves +// all registers except ecx. +// +//----------------------------------------------------------------------- +VOID StubLinkerCPU::EmitRareEnable(CodeLabel *pRejoinPoint) +{ + STANDARD_VM_CONTRACT; + + X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareEnable), 0); +#ifdef _DEBUG + X86EmitDebugTrashReg(kECX); +#endif + if (pRejoinPoint) + { + X86EmitNearJump(pRejoinPoint); + } + +} + + +//----------------------------------------------------------------------- +// Generates the inline portion of the code to disable preemptive GC. Hopefully, +// the inline code is all that will execute most of the time. If this code +// path is entered at certain times, however, it will need to jump out to +// a separate out-of-line path which is more expensive. The "pForwardRef" +// label indicates the start of the out-of-line path. +// +// Assumptions: +// ebx = Thread +// Preserves +// all registers except ecx. +// +//----------------------------------------------------------------------- +VOID StubLinkerCPU::EmitDisable(CodeLabel *pForwardRef, BOOL fCallIn, X86Reg ThreadReg) +{ + CONTRACTL + { + STANDARD_VM_CHECK; + + PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled )); + PRECONDITION(4 == sizeof(g_TrapReturningThreads)); + } + CONTRACTL_END; + +#if defined(FEATURE_COMINTEROP) && defined(MDA_SUPPORTED) + // If we are checking whether the current thread is already holds the loader lock, vector + // such cases to the rare disable pathway, where we can check again. + if (fCallIn && (NULL != MDA_GET_ASSISTANT(Reentrancy))) + { + CodeLabel *pNotReentrantLabel = NewCodeLabel(); + + // test byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],1 + X86EmitOffsetModRM(0xf6, (X86Reg)0, ThreadReg, Thread::GetOffsetOfGCFlag()); + Emit8(1); + + // jz NotReentrant + X86EmitCondJump(pNotReentrantLabel, X86CondCode::kJZ); + + X86EmitPushReg(kEAX); + X86EmitPushReg(kEDX); + X86EmitPushReg(kECX); + + X86EmitCall(NewExternalCodeLabel((LPVOID) HasIllegalReentrancy), 0); + + // If the probe fires, we go ahead and allow the call anyway. At this point, there could be + // GC heap corruptions. So the probe detects the illegal case, but doesn't prevent it. + + X86EmitPopReg(kECX); + X86EmitPopReg(kEDX); + X86EmitPopReg(kEAX); + + EmitLabel(pNotReentrantLabel); + } +#endif + + // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],1 + X86EmitOffsetModRM(0xc6, (X86Reg)0, ThreadReg, Thread::GetOffsetOfGCFlag()); + Emit8(1); + + // cmp dword ptr g_TrapReturningThreads, 0 + Emit16(0x3d83); + EmitPtr((void *)&g_TrapReturningThreads); + Emit8(0); + + // jnz RarePath + X86EmitCondJump(pForwardRef, X86CondCode::kJNZ); + +#if defined(FEATURE_COMINTEROP) && !defined(FEATURE_CORESYSTEM) + // If we are checking whether the current thread holds the loader lock, vector + // such cases to the rare disable pathway, where we can check again. + if (fCallIn && ShouldCheckLoaderLock()) + { + X86EmitPushReg(kEAX); + X86EmitPushReg(kEDX); + + if (ThreadReg == kECX) + X86EmitPushReg(kECX); + + // BOOL AuxUlibIsDLLSynchronizationHeld(BOOL *IsHeld) + // + // So we need to be sure that both the return value and the passed BOOL are both TRUE. + // If either is FALSE, then the call failed or the lock is not held. Either way, the + // probe should not fire. + + X86EmitPushReg(kEDX); // BOOL temp + Emit8(0x54); // push ESP because arg is &temp + X86EmitCall(NewExternalCodeLabel((LPVOID) AuxUlibIsDLLSynchronizationHeld), 0); + + // callee has popped. + X86EmitPopReg(kEDX); // recover temp + + CodeLabel *pPopLabel = NewCodeLabel(); + + Emit16(0xc085); // test eax, eax + X86EmitCondJump(pPopLabel, X86CondCode::kJZ); + + Emit16(0xd285); // test edx, edx + + EmitLabel(pPopLabel); // retain the conditional flags across the pops + + if (ThreadReg == kECX) + X86EmitPopReg(kECX); + + X86EmitPopReg(kEDX); + X86EmitPopReg(kEAX); + + X86EmitCondJump(pForwardRef, X86CondCode::kJNZ); + } +#endif + +#ifdef _DEBUG + if (ThreadReg != kECX) + X86EmitDebugTrashReg(kECX); +#endif + +} + + +//----------------------------------------------------------------------- +// Generates the out-of-line portion of the code to disable preemptive GC. +// After the work is done, the code jumps back to the "pRejoinPoint" +// which should be emitted right after the inline part is generated. However, +// if we cannot execute managed code at this time, an exception is thrown +// which cannot be caught by managed code. +// +// Assumptions: +// ebx = Thread +// Preserves +// all registers except ecx, eax. +// +//----------------------------------------------------------------------- +VOID StubLinkerCPU::EmitRareDisable(CodeLabel *pRejoinPoint) +{ + STANDARD_VM_CONTRACT; + + X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareDisableTHROW), 0); + +#ifdef _DEBUG + X86EmitDebugTrashReg(kECX); +#endif + X86EmitNearJump(pRejoinPoint); +} + +#ifdef FEATURE_COMINTEROP +//----------------------------------------------------------------------- +// Generates the out-of-line portion of the code to disable preemptive GC. +// After the work is done, the code normally jumps back to the "pRejoinPoint" +// which should be emitted right after the inline part is generated. However, +// if we cannot execute managed code at this time, an HRESULT is returned +// via the ExitPoint. +// +// Assumptions: +// ebx = Thread +// Preserves +// all registers except ecx, eax. +// +//----------------------------------------------------------------------- +VOID StubLinkerCPU::EmitRareDisableHRESULT(CodeLabel *pRejoinPoint, CodeLabel *pExitPoint) +{ + STANDARD_VM_CONTRACT; + + X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareDisableHR), 0); + +#ifdef _DEBUG + X86EmitDebugTrashReg(kECX); +#endif + + // test eax, eax ;; test the result of StubRareDisableHR + Emit16(0xc085); + + // JZ pRejoinPoint + X86EmitCondJump(pRejoinPoint, X86CondCode::kJZ); + + X86EmitNearJump(pExitPoint); +} +#endif // FEATURE_COMINTEROP + +#endif // _TARGET_X86_ + +#endif // CROSSGEN_COMPILE + + +VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray) +{ + STANDARD_VM_CONTRACT; + +#ifdef _TARGET_AMD64_ + + // mov SCRATCHREG,rsp + X86_64BitOperands(); + Emit8(0x8b); + Emit8(0304 | (SCRATCH_REGISTER_X86REG << 3)); + + // save the real target in r11, will jump to it later. r10 is used below. + // Windows: mov r11, rcx + // Unix: mov r11, rdi + X86EmitMovRegReg(kR11, THIS_kREG); + +#ifdef UNIX_AMD64_ABI + for (ShuffleEntry* pEntry = pShuffleEntryArray; pEntry->srcofs != ShuffleEntry::SENTINEL; pEntry++) + { + if (pEntry->srcofs & ShuffleEntry::REGMASK) + { + // If source is present in register then destination must also be a register + _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK); + // Both the srcofs and dstofs must be of the same kind of registers - float or general purpose. + _ASSERTE((pEntry->dstofs & ShuffleEntry::FPREGMASK) == (pEntry->srcofs & ShuffleEntry::FPREGMASK)); + + int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK; + int srcRegIndex = pEntry->srcofs & ShuffleEntry::OFSREGMASK; + + if (pEntry->srcofs & ShuffleEntry::FPREGMASK) + { + // movdqa dstReg, srcReg + X64EmitMovXmmXmm((X86Reg)(kXMM0 + dstRegIndex), (X86Reg)(kXMM0 + srcRegIndex)); + } + else + { + // mov dstReg, srcReg + X86EmitMovRegReg(c_argRegs[dstRegIndex], c_argRegs[srcRegIndex]); + } + } + else if (pEntry->dstofs & ShuffleEntry::REGMASK) + { + // source must be on the stack + _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK)); + + int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK; + int srcOffset = (pEntry->srcofs + 1) * sizeof(void*); + + if (pEntry->dstofs & ShuffleEntry::FPREGMASK) + { + if (pEntry->dstofs & ShuffleEntry::FPSINGLEMASK) + { + // movss dstReg, [rax + src] + X64EmitMovSSFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset); + } + else + { + // movsd dstReg, [rax + src] + X64EmitMovSDFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset); + } + } + else + { + // mov dstreg, [rax + src] + X86EmitIndexRegLoad(c_argRegs[dstRegIndex], SCRATCH_REGISTER_X86REG, srcOffset); + } + } + else + { + // source must be on the stack + _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK)); + + // dest must be on the stack + _ASSERTE(!(pEntry->dstofs & ShuffleEntry::REGMASK)); + + // mov r10, [rax + src] + X86EmitIndexRegLoad (kR10, SCRATCH_REGISTER_X86REG, (pEntry->srcofs + 1) * sizeof(void*)); + + // mov [rax + dst], r10 + X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, (pEntry->dstofs + 1) * sizeof(void*), kR10); + } + } +#else // UNIX_AMD64_ABI + UINT step = 1; + + if (pShuffleEntryArray->argtype == ELEMENT_TYPE_END) + { + // Special handling of open instance methods with return buffer. Move "this" + // by two slots, and leave the "retbufptr" between the two slots intact. + + // mov rcx, r8 + X86EmitMovRegReg(kRCX, kR8); + + // Skip this entry + pShuffleEntryArray++; + + // Skip this entry and leave retbufptr intact + step += 2; + } + + // Now shuffle the args by one position: + // steps 1-3 : reg args (rcx, rdx, r8) + // step 4 : stack->reg arg (r9) + // step >4 : stack args + + for(; + pShuffleEntryArray->srcofs != ShuffleEntry::SENTINEL; + step++, pShuffleEntryArray++) + { + switch (step) + { + case 1: + case 2: + case 3: + switch (pShuffleEntryArray->argtype) + { + case ELEMENT_TYPE_R4: + case ELEMENT_TYPE_R8: + // mov xmm-1#, xmm# + X64EmitMovXmmXmm((X86Reg)(step - 1), (X86Reg)(step)); + break; + default: + // mov argRegs[step-1], argRegs[step] + X86EmitMovRegReg(c_argRegs[step-1], c_argRegs[step]); + break; + } + break; + + case 4: + { + switch (pShuffleEntryArray->argtype) + { + case ELEMENT_TYPE_R4: + X64EmitMovSSFromMem(kXMM3, kRAX, 0x28); + break; + + case ELEMENT_TYPE_R8: + X64EmitMovSDFromMem(kXMM3, kRAX, 0x28); + break; + + default: + // mov r9, [rax + 28h] + X86EmitIndexRegLoad (kR9, SCRATCH_REGISTER_X86REG, 5*sizeof(void*)); + } + break; + } + default: + + // mov r10, [rax + (step+1)*sizeof(void*)] + X86EmitIndexRegLoad (kR10, SCRATCH_REGISTER_X86REG, (step+1)*sizeof(void*)); + + // mov [rax + step*sizeof(void*)], r10 + X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, step*sizeof(void*), kR10); + } + } +#endif // UNIX_AMD64_ABI + + // mov r10, [r11 + Delegate._methodptraux] + X86EmitIndexRegLoad(kR10, kR11, DelegateObject::GetOffsetOfMethodPtrAux()); + // add r11, DelegateObject::GetOffsetOfMethodPtrAux() - load the indirection cell into r11 + X86EmitAddReg(kR11, DelegateObject::GetOffsetOfMethodPtrAux()); + // Now jump to real target + // jmp r10 + X86EmitR2ROp(0xff, (X86Reg)4, kR10); + +#else // _TARGET_AMD64_ + + UINT espadjust = 0; + BOOL haveMemMemMove = FALSE; + + ShuffleEntry *pWalk = NULL; + for (pWalk = pShuffleEntryArray; pWalk->srcofs != ShuffleEntry::SENTINEL; pWalk++) + { + if (!(pWalk->dstofs & ShuffleEntry::REGMASK) && + !(pWalk->srcofs & ShuffleEntry::REGMASK) && + pWalk->srcofs != pWalk->dstofs) + { + haveMemMemMove = TRUE; + espadjust = sizeof(void*); + break; + } + } + + if (haveMemMemMove) + { + // push ecx + X86EmitPushReg(THIS_kREG); + } + else + { + // mov eax, ecx + Emit8(0x8b); + Emit8(0300 | SCRATCH_REGISTER_X86REG << 3 | THIS_kREG); + } + + UINT16 emptySpot = 0x4 | ShuffleEntry::REGMASK; + + while (true) + { + for (pWalk = pShuffleEntryArray; pWalk->srcofs != ShuffleEntry::SENTINEL; pWalk++) + if (pWalk->dstofs == emptySpot) + break; + + if (pWalk->srcofs == ShuffleEntry::SENTINEL) + break; + + if ((pWalk->dstofs & ShuffleEntry::REGMASK)) + { + if (pWalk->srcofs & ShuffleEntry::REGMASK) + { + // mov , + Emit8(0x8b); + Emit8(static_cast(0300 | + (GetX86ArgumentRegisterFromOffset( pWalk->dstofs & ShuffleEntry::OFSMASK ) << 3) | + (GetX86ArgumentRegisterFromOffset( pWalk->srcofs & ShuffleEntry::OFSMASK )))); + } + else + { + X86EmitEspOffset(0x8b, GetX86ArgumentRegisterFromOffset( pWalk->dstofs & ShuffleEntry::OFSMASK ), pWalk->srcofs+espadjust); + } + } + else + { + // if the destination is not a register, the source shouldn't be either. + _ASSERTE(!(pWalk->srcofs & ShuffleEntry::REGMASK)); + if (pWalk->srcofs != pWalk->dstofs) + { + X86EmitEspOffset(0x8b, kEAX, pWalk->srcofs+espadjust); + X86EmitEspOffset(0x89, kEAX, pWalk->dstofs+espadjust); + } + } + emptySpot = pWalk->srcofs; + } + + // Capture the stacksizedelta while we're at the end of the list. + _ASSERTE(pWalk->srcofs == ShuffleEntry::SENTINEL); + + if (haveMemMemMove) + X86EmitPopReg(SCRATCH_REGISTER_X86REG); + + if (pWalk->stacksizedelta) + X86EmitAddEsp(pWalk->stacksizedelta); + + // Now jump to real target + // JMP [SCRATCHREG] + // we need to jump indirect so that for virtual delegates eax contains a pointer to the indirection cell + X86EmitAddReg(SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtrAux()); + static const BYTE bjmpeax[] = { 0xff, 0x20 }; + EmitBytes(bjmpeax, sizeof(bjmpeax)); + +#endif // _TARGET_AMD64_ +} + + +#if !defined(CROSSGEN_COMPILE) && !defined(FEATURE_STUBS_AS_IL) + +//=========================================================================== +// Computes hash code for MulticastDelegate.Invoke() +UINT_PTR StubLinkerCPU::HashMulticastInvoke(MetaSig* pSig) +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; + } + CONTRACTL_END; + + ArgIterator argit(pSig); + + UINT numStackBytes = argit.SizeOfArgStack(); + + if (numStackBytes > 0x7FFF) + COMPlusThrow(kNotSupportedException, W("NotSupported_TooManyArgs")); + +#ifdef _TARGET_AMD64_ + // Generate a hash key as follows: + // UINT Arg0Type:2; // R4 (1), R8 (2), other (3) + // UINT Arg1Type:2; // R4 (1), R8 (2), other (3) + // UINT Arg2Type:2; // R4 (1), R8 (2), other (3) + // UINT Arg3Type:2; // R4 (1), R8 (2), other (3) + // UINT NumArgs:24; // number of arguments + // (This should cover all the prestub variations) + + _ASSERTE(!(numStackBytes & 7)); + UINT hash = (numStackBytes / sizeof(void*)) << 8; + + UINT argNum = 0; + + // NextArg() doesn't take into account the "this" pointer. + // That's why we have to special case it here. + if (argit.HasThis()) + { + hash |= 3 << (2*argNum); + argNum++; + } + + if (argit.HasRetBuffArg()) + { + hash |= 3 << (2*argNum); + argNum++; + } + + for (; argNum < 4; argNum++) + { + switch (pSig->NextArgNormalized()) + { + case ELEMENT_TYPE_END: + argNum = 4; + break; + case ELEMENT_TYPE_R4: + hash |= 1 << (2*argNum); + break; + case ELEMENT_TYPE_R8: + hash |= 2 << (2*argNum); + break; + default: + hash |= 3 << (2*argNum); + break; + } + } + +#else // _TARGET_AMD64_ + + // check if the function is returning a float, in which case the stub has to take + // care of popping the floating point stack except for the last invocation + + _ASSERTE(!(numStackBytes & 3)); + + UINT hash = numStackBytes; + + if (CorTypeInfo::IsFloat(pSig->GetReturnType())) + { + hash |= 2; + } +#endif // _TARGET_AMD64_ + + return hash; +} + +#ifdef _TARGET_X86_ +//=========================================================================== +// Emits code for MulticastDelegate.Invoke() +VOID StubLinkerCPU::EmitDelegateInvoke() +{ + STANDARD_VM_CONTRACT; + + CodeLabel *pNullLabel = NewCodeLabel(); + + // test THISREG, THISREG + X86EmitR2ROp(0x85, THIS_kREG, THIS_kREG); + + // jz null + X86EmitCondJump(pNullLabel, X86CondCode::kJZ); + + // mov SCRATCHREG, [THISREG + Delegate.FP] ; Save target stub in register + X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfMethodPtr()); + + // mov THISREG, [THISREG + Delegate.OR] ; replace "this" pointer + X86EmitIndexRegLoad(THIS_kREG, THIS_kREG, DelegateObject::GetOffsetOfTarget()); + + // jmp SCRATCHREG + Emit16(0xe0ff | (SCRATCH_REGISTER_X86REG<<8)); + + // Do a null throw + EmitLabel(pNullLabel); + + // mov ECX, CORINFO_NullReferenceException + Emit8(0xb8+kECX); + Emit32(CORINFO_NullReferenceException); + + X86EmitCall(NewExternalCodeLabel(GetEEFuncEntryPoint(JIT_InternalThrowFromHelper)), 0); + + X86EmitReturn(0); +} +#endif // _TARGET_X86_ + +VOID StubLinkerCPU::EmitMulticastInvoke(UINT_PTR hash) +{ + STANDARD_VM_CONTRACT; + + int thisRegOffset = MulticastFrame::GetOffsetOfTransitionBlock() + + TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, THIS_REG); + + // push the methoddesc on the stack + // mov eax, [ecx + offsetof(_methodAuxPtr)] + X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfMethodPtrAux()); + + // Push a MulticastFrame on the stack. + EmitMethodStubProlog(MulticastFrame::GetMethodFrameVPtr(), MulticastFrame::GetOffsetOfTransitionBlock()); + +#ifdef _TARGET_X86_ + // Frame is ready to be inspected by debugger for patch location + EmitPatchLabel(); +#else // _TARGET_AMD64_ + + // Save register arguments in their home locations. + // Non-FP registers are already saved by EmitMethodStubProlog. + // (Assumes Sig.NextArg() does not enum RetBuffArg or "this".) + + int argNum = 0; + __int32 argOfs = MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs(); + CorElementType argTypes[4]; + CorElementType argType; + + // 'this' + argOfs += sizeof(void*); + argTypes[argNum] = ELEMENT_TYPE_I8; + argNum++; + + do + { + argType = ELEMENT_TYPE_END; + + switch ((hash >> (2 * argNum)) & 3) + { + case 0: + argType = ELEMENT_TYPE_END; + break; + case 1: + argType = ELEMENT_TYPE_R4; + + // movss dword ptr [rsp + argOfs], xmm? + X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs); + break; + case 2: + argType = ELEMENT_TYPE_R8; + + // movsd qword ptr [rsp + argOfs], xmm? + X64EmitMovSDToMem((X86Reg)argNum, kRSI, argOfs); + break; + default: + argType = ELEMENT_TYPE_I; + break; + } + + argOfs += sizeof(void*); + argTypes[argNum] = argType; + argNum++; + } + while (argNum < 4 && ELEMENT_TYPE_END != argType); + + _ASSERTE(4 == argNum || ELEMENT_TYPE_END == argTypes[argNum-1]); + +#endif // _TARGET_AMD64_ + + // TODO: on AMD64, pick different regs for locals so don't need the pushes + + // push edi ;; Save EDI (want to use it as loop index) + X86EmitPushReg(kEDI); + + // xor edi,edi ;; Loop counter: EDI=0,1,2... + X86EmitZeroOutReg(kEDI); + + CodeLabel *pLoopLabel = NewCodeLabel(); + CodeLabel *pEndLoopLabel = NewCodeLabel(); + + EmitLabel(pLoopLabel); + + // Entry: + // EDI == iteration counter + + // mov ecx, [esi + this] ;; get delegate + X86EmitIndexRegLoad(THIS_kREG, kESI, thisRegOffset); + + // cmp edi,[ecx]._invocationCount + X86EmitOp(0x3b, kEDI, THIS_kREG, DelegateObject::GetOffsetOfInvocationCount()); + + // je ENDLOOP + X86EmitCondJump(pEndLoopLabel, X86CondCode::kJZ); + +#ifdef _TARGET_AMD64_ + + INT32 numStackBytes = (INT32)((hash >> 8) * sizeof(void *)); + + INT32 stackUsed, numStackArgs, ofs; + + // Push any stack args, plus an extra location + // for rsp alignment if needed + + numStackArgs = numStackBytes / sizeof(void*); + + // 1 push above, so stack is currently misaligned + const unsigned STACK_ALIGN_ADJUST = 8; + + if (!numStackArgs) + { + // sub rsp, 28h ;; 4 reg arg home locs + rsp alignment + stackUsed = 0x20 + STACK_ALIGN_ADJUST; + X86EmitSubEsp(stackUsed); + } + else + { + stackUsed = numStackArgs * sizeof(void*); + + // If the stack is misaligned, then an odd number of arguments + // will naturally align the stack. + if ( ((numStackArgs & 1) == 0) + != (STACK_ALIGN_ADJUST == 0)) + { + X86EmitPushReg(kRAX); + stackUsed += sizeof(void*); + } + + ofs = MulticastFrame::GetOffsetOfTransitionBlock() + + TransitionBlock::GetOffsetOfArgs() + sizeof(ArgumentRegisters) + numStackBytes; + + while (numStackArgs--) + { + ofs -= sizeof(void*); + + // push [rsi + ofs] ;; Push stack args + X86EmitIndexPush(kESI, ofs); + } + + // sub rsp, 20h ;; Create 4 reg arg home locations + X86EmitSubEsp(0x20); + + stackUsed += 0x20; + } + + for( + argNum = 0, argOfs = MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs(); + argNum < 4 && argTypes[argNum] != ELEMENT_TYPE_END; + argNum++, argOfs += sizeof(void*) + ) + { + switch (argTypes[argNum]) + { + case ELEMENT_TYPE_R4: + // movss xmm?, dword ptr [rsi + argOfs] + X64EmitMovSSFromMem((X86Reg)argNum, kRSI, argOfs); + break; + case ELEMENT_TYPE_R8: + // movsd xmm?, qword ptr [rsi + argOfs] + X64EmitMovSDFromMem((X86Reg)argNum, kRSI, argOfs); + break; + default: + if (c_argRegs[argNum] != THIS_kREG) + { + // mov r*, [rsi + dstOfs] + X86EmitIndexRegLoad(c_argRegs[argNum], kESI,argOfs); + } + break; + } // switch + } + + // mov SCRATCHREG, [rcx+Delegate._invocationList] ;;fetch invocation list + X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList()); + + // mov SCRATCHREG, [SCRATCHREG+m_Array+rdi*8] ;; index into invocation list + X86EmitOp(0x8b, kEAX, SCRATCH_REGISTER_X86REG, static_cast(PtrArray::GetDataOffset()), kEDI, sizeof(void*), k64BitOp); + + // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer + X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget()); + + // call [SCRATCHREG+Delegate.target] ;; call current subscriber + X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr()); + + // add rsp, stackUsed ;; Clean up stack + X86EmitAddEsp(stackUsed); + + // inc edi + Emit16(0xC7FF); + +#else // _TARGET_AMD64_ + + UINT16 numStackBytes = static_cast(hash & ~3); + + // ..repush & reenregister args.. + INT32 ofs = numStackBytes + MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs(); + while (ofs != MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs()) + { + ofs -= sizeof(void*); + X86EmitIndexPush(kESI, ofs); + } + + #define ARGUMENT_REGISTER(regname) if (k##regname != THIS_kREG) { X86EmitIndexRegLoad(k##regname, kESI, \ + offsetof(ArgumentRegisters, regname) + MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters()); } + + ENUM_ARGUMENT_REGISTERS_BACKWARD(); + + #undef ARGUMENT_REGISTER + + // mov SCRATCHREG, [ecx+Delegate._invocationList] ;;fetch invocation list + X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList()); + + // mov SCRATCHREG, [SCRATCHREG+m_Array+edi*4] ;; index into invocation list + X86EmitOp(0x8b, kEAX, SCRATCH_REGISTER_X86REG, PtrArray::GetDataOffset(), kEDI, sizeof(void*)); + + // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer + X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget()); + + // call [SCRATCHREG+Delegate.target] ;; call current subscriber + X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr()); + INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that + // we know that this is a call that can directly call + // managed code + + // inc edi + Emit8(0x47); + + if (hash & 2) // CorTypeInfo::IsFloat(pSig->GetReturnType()) + { + // if the return value is a float/double check if we just did the last call - if not, + // emit the pop of the float stack + + // mov SCRATCHREG, [esi + this] ;; get delegate + X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, kESI, thisRegOffset); + + // cmp edi,[SCRATCHREG]._invocationCount + X86EmitOffsetModRM(0x3b, kEDI, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfInvocationCount()); + + CodeLabel *pNoFloatStackPopLabel = NewCodeLabel(); + + // je NOFLOATSTACKPOP + X86EmitCondJump(pNoFloatStackPopLabel, X86CondCode::kJZ); + + // fstp 0 + Emit16(0xd8dd); + + // NoFloatStackPopLabel: + EmitLabel(pNoFloatStackPopLabel); + } + +#endif // _TARGET_AMD64_ + + // The debugger may need to stop here, so grab the offset of this code. + EmitPatchLabel(); + + // jmp LOOP + X86EmitNearJump(pLoopLabel); + + //ENDLOOP: + EmitLabel(pEndLoopLabel); + + // pop edi ;; Restore edi + X86EmitPopReg(kEDI); + + EmitCheckGSCookie(kESI, MulticastFrame::GetOffsetOfGSCookie()); + + // Epilog + EmitMethodStubEpilog(numStackBytes, MulticastFrame::GetOffsetOfTransitionBlock()); +} + +VOID StubLinkerCPU::EmitSecureDelegateInvoke(UINT_PTR hash) +{ + STANDARD_VM_CONTRACT; + + int thisRegOffset = SecureDelegateFrame::GetOffsetOfTransitionBlock() + + TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, THIS_REG); + + // push the methoddesc on the stack + // mov eax, [ecx + offsetof(_invocationCount)] + X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationCount()); + + // Push a SecureDelegateFrame on the stack. + EmitMethodStubProlog(SecureDelegateFrame::GetMethodFrameVPtr(), SecureDelegateFrame::GetOffsetOfTransitionBlock()); + +#ifdef _TARGET_X86_ + // Frame is ready to be inspected by debugger for patch location + EmitPatchLabel(); +#else // _TARGET_AMD64_ + + // Save register arguments in their home locations. + // Non-FP registers are already saved by EmitMethodStubProlog. + // (Assumes Sig.NextArg() does not enum RetBuffArg or "this".) + + int argNum = 0; + __int32 argOfs = SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs(); + CorElementType argTypes[4]; + CorElementType argType; + + // 'this' + argOfs += sizeof(void*); + argTypes[argNum] = ELEMENT_TYPE_I8; + argNum++; + + do + { + argType = ELEMENT_TYPE_END; + + switch ((hash >> (2 * argNum)) & 3) + { + case 0: + argType = ELEMENT_TYPE_END; + break; + case 1: + argType = ELEMENT_TYPE_R4; + + // movss dword ptr [rsp + argOfs], xmm? + X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs); + break; + case 2: + argType = ELEMENT_TYPE_R8; + + // movsd qword ptr [rsp + argOfs], xmm? + X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs); + break; + default: + argType = ELEMENT_TYPE_I; + break; + } + + argOfs += sizeof(void*); + argTypes[argNum] = argType; + argNum++; + } + while (argNum < 4 && ELEMENT_TYPE_END != argType); + + _ASSERTE(4 == argNum || ELEMENT_TYPE_END == argTypes[argNum-1]); + +#endif // _TARGET_AMD64_ + + // mov ecx, [esi + this] ;; get delegate + X86EmitIndexRegLoad(THIS_kREG, kESI, thisRegOffset); + +#ifdef _TARGET_AMD64_ + + INT32 numStackBytes = (INT32)((hash >> 8) * sizeof(void *)); + + INT32 stackUsed, numStackArgs, ofs; + + // Push any stack args, plus an extra location + // for rsp alignment if needed + + numStackArgs = numStackBytes / sizeof(void*); + + // 1 push above, so stack is currently misaligned + const unsigned STACK_ALIGN_ADJUST = 0; + + if (!numStackArgs) + { + // sub rsp, 28h ;; 4 reg arg home locs + rsp alignment + stackUsed = 0x20 + STACK_ALIGN_ADJUST; + X86EmitSubEsp(stackUsed); + } + else + { + stackUsed = numStackArgs * sizeof(void*); + + // If the stack is misaligned, then an odd number of arguments + // will naturally align the stack. + if ( ((numStackArgs & 1) == 0) + != (STACK_ALIGN_ADJUST == 0)) + { + X86EmitPushReg(kRAX); + stackUsed += sizeof(void*); + } + + ofs = SecureDelegateFrame::GetOffsetOfTransitionBlock() + + TransitionBlock::GetOffsetOfArgs() + sizeof(ArgumentRegisters) + numStackBytes; + + while (numStackArgs--) + { + ofs -= sizeof(void*); + + // push [rsi + ofs] ;; Push stack args + X86EmitIndexPush(kESI, ofs); + } + + // sub rsp, 20h ;; Create 4 reg arg home locations + X86EmitSubEsp(0x20); + + stackUsed += 0x20; + } + + int thisArgNum = 0; + + for( + argNum = 0, argOfs = SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs(); + argNum < 4 && argTypes[argNum] != ELEMENT_TYPE_END; + argNum++, argOfs += sizeof(void*) + ) + { + switch (argTypes[argNum]) + { + case ELEMENT_TYPE_R4: + // movss xmm?, dword ptr [rsi + argOfs] + X64EmitMovSSFromMem((X86Reg)argNum, kRSI, argOfs); + break; + case ELEMENT_TYPE_R8: + // movsd xmm?, qword ptr [rsi + argOfs] + X64EmitMovSDFromMem((X86Reg)argNum, kRSI, argOfs); + break; + default: + if (c_argRegs[argNum] != THIS_kREG) + { + // mov r*, [rsi + dstOfs] + X86EmitIndexRegLoad(c_argRegs[argNum], kESI,argOfs); + } + break; + } // switch + } + + // mov SCRATCHREG, [rcx+Delegate._invocationList] ;;fetch the inner delegate + X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList()); + + // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer + X86EmitIndexRegLoad(c_argRegs[thisArgNum], SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget()); + + // call [SCRATCHREG+Delegate.target] ;; call current subscriber + X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr()); + + // add rsp, stackUsed ;; Clean up stack + X86EmitAddEsp(stackUsed); + +#else // _TARGET_AMD64_ + + UINT16 numStackBytes = static_cast(hash & ~3); + + // ..repush & reenregister args.. + INT32 ofs = numStackBytes + SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs(); + while (ofs != SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs()) + { + ofs -= sizeof(void*); + X86EmitIndexPush(kESI, ofs); + } + + #define ARGUMENT_REGISTER(regname) if (k##regname != THIS_kREG) { X86EmitIndexRegLoad(k##regname, kESI, \ + offsetof(ArgumentRegisters, regname) + SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters()); } + + ENUM_ARGUMENT_REGISTERS_BACKWARD(); + + #undef ARGUMENT_REGISTER + + // mov SCRATCHREG, [ecx+Delegate._invocationList] ;;fetch the inner delegate + X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList()); + + // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer + X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget()); + + // call [SCRATCHREG+Delegate.target] ;; call current subscriber + X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr()); + INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that + // we know that this is a call that can directly call + // managed code + +#endif // _TARGET_AMD64_ + + // The debugger may need to stop here, so grab the offset of this code. + EmitPatchLabel(); + + EmitCheckGSCookie(kESI, SecureDelegateFrame::GetOffsetOfGSCookie()); + + // Epilog + EmitMethodStubEpilog(numStackBytes, SecureDelegateFrame::GetOffsetOfTransitionBlock()); +} + +#ifndef FEATURE_ARRAYSTUB_AS_IL + +// Little helper to generate code to move nbytes bytes of non Ref memory + +void generate_noref_copy (unsigned nbytes, StubLinkerCPU* sl) +{ + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + INJECT_FAULT(COMPlusThrowOM();); + } + CONTRACTL_END; + + // If the size is pointer-aligned, we'll use movsd + if (IS_ALIGNED(nbytes, sizeof(void*))) + { + // If there are less than 4 pointers to copy, "unroll" the "rep movsd" + if (nbytes <= 3*sizeof(void*)) + { + while (nbytes > 0) + { + // movsd + sl->X86_64BitOperands(); + sl->Emit8(0xa5); + + nbytes -= sizeof(void*); + } + } + else + { + // mov ECX, size / 4 + sl->Emit8(0xb8+kECX); + sl->Emit32(nbytes / sizeof(void*)); + + // repe movsd + sl->Emit8(0xf3); + sl->X86_64BitOperands(); + sl->Emit8(0xa5); + } + } + else + { + // mov ECX, size + sl->Emit8(0xb8+kECX); + sl->Emit32(nbytes); + + // repe movsb + sl->Emit16(0xa4f3); + } +} + + +X86Reg LoadArrayOpArg ( + UINT32 idxloc, + StubLinkerCPU *psl, + X86Reg kRegIfFromMem, + UINT ofsadjust + AMD64_ARG(StubLinkerCPU::X86OperandSize OperandSize = StubLinkerCPU::k64BitOp) + ) +{ + STANDARD_VM_CONTRACT; + + if (!TransitionBlock::IsStackArgumentOffset(idxloc)) + return GetX86ArgumentRegisterFromOffset(idxloc - TransitionBlock::GetOffsetOfArgumentRegisters()); + + psl->X86EmitEspOffset(0x8b, kRegIfFromMem, idxloc + ofsadjust AMD64_ARG(OperandSize)); + return kRegIfFromMem; +} + +VOID StubLinkerCPU::EmitArrayOpStubThrow(unsigned exConst, unsigned cbRetArg) +{ + STANDARD_VM_CONTRACT; + + //ArrayOpStub*Exception + X86EmitPopReg(kESI); + X86EmitPopReg(kEDI); + + //mov CORINFO_NullReferenceException_ASM, %ecx + Emit8(0xb8 | kECX); + Emit32(exConst); + //InternalExceptionWorker + + X86EmitPopReg(kEDX); + // add pArrayOpScript->m_cbretpop, %esp (was add %eax, %esp) + Emit8(0x81); + Emit8(0xc0 | 0x4); + Emit32(cbRetArg); + X86EmitPushReg(kEDX); + X86EmitNearJump(NewExternalCodeLabel((PVOID)JIT_InternalThrow)); +} + +//=========================================================================== +// Emits code to do an array operation. +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable:21000) // Suppress PREFast warning about overly large function +#endif +VOID StubLinkerCPU::EmitArrayOpStub(const ArrayOpScript* pArrayOpScript) +{ + STANDARD_VM_CONTRACT; + + // This is the offset to the parameters/what's already pushed on the stack: + // return address. + const INT locsize = sizeof(void*); + + // ArrayOpScript's stack offsets are built using ArgIterator, which + // assumes a TransitionBlock has been pushed, which is not the case + // here. rsp + ofsadjust should point at the first argument. Any further + // stack modifications below need to adjust ofsadjust appropriately. + // baseofsadjust needs to be the stack adjustment at the entry point - + // this is used further below to compute how much stack space was used. + + INT ofsadjust = locsize - (INT)sizeof(TransitionBlock); + + // Register usage + // + // x86 AMD64 + // Inputs: + // managed array THIS_kREG (ecx) THIS_kREG (rcx) + // index 0 edx rdx + // index 1/value r8 + // index 2/value r9 + // expected element type for LOADADDR eax rax rdx + // Working registers: + // total (accumulates unscaled offset) edi r10 + // factor (accumulates the slice factor) esi r11 + X86Reg kArrayRefReg = THIS_kREG; +#ifdef _TARGET_AMD64_ + const X86Reg kArrayMTReg = kR10; + const X86Reg kTotalReg = kR10; + const X86Reg kFactorReg = kR11; +#else + const X86Reg kArrayMTReg = kESI; + const X86Reg kTotalReg = kEDI; + const X86Reg kFactorReg = kESI; +#endif + +#ifdef _TARGET_AMD64_ + // Simplifying assumption for fNeedPrologue. + _ASSERTE(!pArrayOpScript->m_gcDesc || (pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER)); + // Simplifying assumption for saving rsi and rdi. + _ASSERTE(!(pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER) || ArgIterator::IsArgPassedByRef(pArrayOpScript->m_elemsize)); + + // Cases where we need to make calls + BOOL fNeedScratchArea = ( (pArrayOpScript->m_flags & (ArrayOpScript::NEEDSTYPECHECK | ArrayOpScript::NEEDSWRITEBARRIER)) + && ( pArrayOpScript->m_op == ArrayOpScript::STORE + || ( pArrayOpScript->m_op == ArrayOpScript::LOAD + && (pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER)))); + + // Cases where we need to copy large values + BOOL fNeedRSIRDI = ( ArgIterator::IsArgPassedByRef(pArrayOpScript->m_elemsize) + && ArrayOpScript::LOADADDR != pArrayOpScript->m_op); + + BOOL fNeedPrologue = ( fNeedScratchArea + || fNeedRSIRDI); +#endif + + X86Reg kValueReg; + + CodeLabel *Epilog = NewCodeLabel(); + CodeLabel *Inner_nullexception = NewCodeLabel(); + CodeLabel *Inner_rangeexception = NewCodeLabel(); + CodeLabel *Inner_typeMismatchexception = NULL; + + // + // Set up the stack frame. + // + // + // x86: + // value + // + // ... + // + // return address + // saved edi + // esp -> saved esi + // + // + // AMD64: + // value, if rank > 2 + // ... + // + 0x48 more indices + // + 0x40 r9 home + // + 0x38 r8 home + // + 0x30 rdx home + // + 0x28 rcx home + // + 0x20 return address + // + 0x18 scratch area (callee's r9) + // + 0x10 scratch area (callee's r8) + // + 8 scratch area (callee's rdx) + // rsp -> scratch area (callee's rcx) + // + // If the element type is a value class w/ object references, then rsi + // and rdi will also be saved above the scratch area: + // + // ... + // + 0x28 saved rsi + // + 0x20 saved rdi + // + 0x18 scratch area (callee's r9) + // + 0x10 scratch area (callee's r8) + // + 8 scratch area (callee's rdx) + // rsp -> scratch area (callee's rcx) + // + // And if no call or movsb is necessary, then the scratch area sits + // directly under the MethodDesc*. + + BOOL fSavedESI = FALSE; + BOOL fSavedEDI = FALSE; + +#ifdef _TARGET_AMD64_ + if (fNeedPrologue) + { + // Save argument registers if we'll be making a call before using + // them. Note that in this case the element value will always be an + // object type, and never be in an xmm register. + + if ( (pArrayOpScript->m_flags & ArrayOpScript::NEEDSTYPECHECK) + && ArrayOpScript::STORE == pArrayOpScript->m_op) + { + // mov [rsp+0x08], rcx + X86EmitEspOffset(0x89, kRCX, 0x08); + X86EmitEspOffset(0x89, kRDX, 0x10); + X86EmitEspOffset(0x89, kR8, 0x18); + + if (pArrayOpScript->m_rank >= 2) + X86EmitEspOffset(0x89, kR9, 0x20); + } + + if (fNeedRSIRDI) + { + X86EmitPushReg(kRSI); + X86EmitPushReg(kRDI); + + fSavedESI = fSavedEDI = TRUE; + + ofsadjust += 0x10; + } + + if (fNeedScratchArea) + { + // Callee scratch area (0x8 for aligned esp) + X86EmitSubEsp(sizeof(ArgumentRegisters) + 0x8); + ofsadjust += sizeof(ArgumentRegisters) + 0x8; + } + } +#else + // Preserve the callee-saved registers + // NOTE: if you change the sequence of these pushes, you must also update: + // ArrayOpStubNullException + // ArrayOpStubRangeException + // ArrayOpStubTypeMismatchException + _ASSERTE( kTotalReg == kEDI); + X86EmitPushReg(kTotalReg); + _ASSERTE( kFactorReg == kESI); + X86EmitPushReg(kFactorReg); + + fSavedESI = fSavedEDI = TRUE; + + ofsadjust += 2*sizeof(void*); +#endif + + // Check for null. + X86EmitR2ROp(0x85, kArrayRefReg, kArrayRefReg); // TEST ECX, ECX + X86EmitCondJump(Inner_nullexception, X86CondCode::kJZ); // jz Inner_nullexception + + // Do Type Check if needed + if (pArrayOpScript->m_flags & ArrayOpScript::NEEDSTYPECHECK) + { + if (pArrayOpScript->m_op == ArrayOpScript::STORE) + { + // Get the value to be stored. + kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kEAX, ofsadjust); + + X86EmitR2ROp(0x85, kValueReg, kValueReg); // TEST kValueReg, kValueReg + CodeLabel *CheckPassed = NewCodeLabel(); + X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // storing NULL is OK + + // mov EAX, element type ; possibly trashes kValueReg + X86EmitOp(0x8b, kArrayMTReg, kArrayRefReg, 0 AMD64_ARG(k64BitOp)); // mov ESI/R10, [kArrayRefReg] + + X86EmitOp(0x8b, kEAX, kValueReg, 0 AMD64_ARG(k64BitOp)); // mov EAX, [kValueReg] ; possibly trashes kValueReg + // cmp EAX, [ESI/R10+m_ElementType] + + X86EmitOp(0x3b, kEAX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp)); + X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // Exact match is OK + + X86EmitRegLoad(kEAX, (UINT_PTR)g_pObjectClass); // mov EAX, g_pObjectMethodTable + // cmp EAX, [ESI/R10+m_ElementType] + + X86EmitOp(0x3b, kEAX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp)); + X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // Assigning to array of object is OK + + // Try to call the fast helper first ( ObjIsInstanceOfNoGC ). + // If that fails we will fall back to calling the slow helper ( ArrayStoreCheck ) that erects a frame. + // See also JitInterfaceX86::JIT_Stelem_Ref + +#ifdef _TARGET_AMD64_ + // RCX contains pointer to object to check (Object*) + // RDX contains array type handle + + // mov RCX, [rsp+offsetToObject] ; RCX = Object* + X86EmitEspOffset(0x8b, kRCX, ofsadjust + pArrayOpScript->m_fValLoc); + + // get Array TypeHandle + // mov RDX, [RSP+offsetOfTypeHandle] + + X86EmitEspOffset(0x8b, kRDX, ofsadjust + + TransitionBlock::GetOffsetOfArgumentRegisters() + + FIELD_OFFSET(ArgumentRegisters, THIS_REG)); + + // mov RDX, [kArrayMTReg+offsetof(MethodTable, m_ElementType)] + X86EmitIndexRegLoad(kRDX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle()); + +#else + X86EmitPushReg(kEDX); // Save EDX + X86EmitPushReg(kECX); // Pass array object + + X86EmitIndexPush(kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle()); // push [kArrayMTReg + m_ElementType] ; Array element type handle + + // get address of value to store + _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register + X86EmitSPIndexPush(pArrayOpScript->m_fValLoc + ofsadjust + 3*sizeof(void*)); // push [ESP+offset] ; the object pointer + +#endif //_AMD64 + + + // emit a call to the fast helper + // One side effect of this is that we are going to generate a "jnz Epilog" and we DON'T need it + // in the fast path, however there are no side effects in emitting + // it in the fast path anyway. the reason for that is that it makes + // the cleanup code much easier ( we have only 1 place to cleanup the stack and + // restore it to the original state ) + X86EmitCall(NewExternalCodeLabel((LPVOID)ObjIsInstanceOfNoGC), 0); + X86EmitCmpRegImm32( kEAX, TypeHandle::CanCast); // CMP EAX, CanCast ; if ObjIsInstanceOfNoGC returns CanCast, we will go the fast path + CodeLabel * Cleanup = NewCodeLabel(); + X86EmitCondJump(Cleanup, X86CondCode::kJZ); + +#ifdef _TARGET_AMD64_ + // get address of value to store + // lea rcx, [rsp+offs] + X86EmitEspOffset(0x8d, kRCX, ofsadjust + pArrayOpScript->m_fValLoc); + + // get address of 'this'/rcx + // lea rdx, [rsp+offs] + X86EmitEspOffset(0x8d, kRDX, ofsadjust + + TransitionBlock::GetOffsetOfArgumentRegisters() + + FIELD_OFFSET(ArgumentRegisters, THIS_REG)); + +#else + // The stack is already setup correctly for the slow helper. + _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register + X86EmitEspOffset(0x8d, kECX, pArrayOpScript->m_fValLoc + ofsadjust + 2*sizeof(void*)); // lea ECX, [ESP+offset] + + // get address of 'this' + X86EmitEspOffset(0x8d, kEDX, 0); // lea EDX, [ESP] ; (address of ECX) + + +#endif + AMD64_ONLY(_ASSERTE(fNeedScratchArea)); + X86EmitCall(NewExternalCodeLabel((LPVOID)ArrayStoreCheck), 0); + + EmitLabel(Cleanup); +#ifdef _TARGET_AMD64_ + X86EmitEspOffset(0x8b, kRCX, 0x00 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters()); + X86EmitEspOffset(0x8b, kRDX, 0x08 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters()); + X86EmitEspOffset(0x8b, kR8, 0x10 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters()); + + if (pArrayOpScript->m_rank >= 2) + X86EmitEspOffset(0x8b, kR9, 0x18 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters()); +#else + X86EmitPopReg(kECX); // restore regs + X86EmitPopReg(kEDX); + + + X86EmitR2ROp(0x3B, kEAX, kEAX); // CMP EAX, EAX + X86EmitCondJump(Epilog, X86CondCode::kJNZ); // This branch never taken, but epilog walker uses it +#endif + + EmitLabel(CheckPassed); + } + else + { + _ASSERTE(pArrayOpScript->m_op == ArrayOpScript::LOADADDR); + + // Load up the hidden type parameter into 'typeReg' + X86Reg typeReg = LoadArrayOpArg(pArrayOpScript->m_typeParamOffs, this, kEAX, ofsadjust); + + // 'typeReg' holds the typeHandle for the ARRAY. This must be a ArrayTypeDesc*, so + // mask off the low two bits to get the TypeDesc* + X86EmitR2ROp(0x83, (X86Reg)4, typeReg); // AND typeReg, 0xFFFFFFFC + Emit8(0xFC); + + // If 'typeReg' is NULL then we're executing the readonly ::Address and no type check is + // needed. + CodeLabel *Inner_passedTypeCheck = NewCodeLabel(); + + X86EmitCondJump(Inner_passedTypeCheck, X86CondCode::kJZ); + + // Get the parameter of the parameterize type + // mov typeReg, [typeReg.m_Arg] + X86EmitOp(0x8b, typeReg, typeReg, offsetof(ParamTypeDesc, m_Arg) AMD64_ARG(k64BitOp)); + + // Compare this against the element type of the array. + // mov ESI/R10, [kArrayRefReg] + X86EmitOp(0x8b, kArrayMTReg, kArrayRefReg, 0 AMD64_ARG(k64BitOp)); + // cmp typeReg, [ESI/R10+m_ElementType]; + X86EmitOp(0x3b, typeReg, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp)); + + // Throw error if not equal + Inner_typeMismatchexception = NewCodeLabel(); + X86EmitCondJump(Inner_typeMismatchexception, X86CondCode::kJNZ); + EmitLabel(Inner_passedTypeCheck); + } + } + + CodeLabel* DoneCheckLabel = 0; + if (pArrayOpScript->m_rank == 1 && pArrayOpScript->m_fHasLowerBounds) + { + DoneCheckLabel = NewCodeLabel(); + CodeLabel* NotSZArrayLabel = NewCodeLabel(); + + // for rank1 arrays, we might actually have two different layouts depending on + // if we are ELEMENT_TYPE_ARRAY or ELEMENT_TYPE_SZARRAY. + + // mov EAX, [ARRAY] // EAX holds the method table + X86_64BitOperands(); + X86EmitOp(0x8b, kEAX, kArrayRefReg); + + // test [EAX + m_dwFlags], enum_flag_Category_IfArrayThenSzArray + X86_64BitOperands(); + X86EmitOffsetModRM(0xf7, (X86Reg)0, kEAX, MethodTable::GetOffsetOfFlags()); + Emit32(MethodTable::GetIfArrayThenSzArrayFlag()); + + // jz NotSZArrayLabel + X86EmitCondJump(NotSZArrayLabel, X86CondCode::kJZ); + + //Load the passed-in index into the scratch register. + const ArrayOpIndexSpec *pai = pArrayOpScript->GetArrayOpIndexSpecs(); + X86Reg idxReg = LoadArrayOpArg(pai->m_idxloc, this, SCRATCH_REGISTER_X86REG, ofsadjust); + + // cmp idxReg, [kArrayRefReg + LENGTH] + X86EmitOp(0x3b, idxReg, kArrayRefReg, ArrayBase::GetOffsetOfNumComponents()); + + // jae Inner_rangeexception + X86EmitCondJump(Inner_rangeexception, X86CondCode::kJAE); + + // if we cared efficiency of this, this move can be optimized + X86EmitR2ROp(0x8b, kTotalReg, idxReg AMD64_ARG(k32BitOp)); + + // sub ARRAY. 8 // 8 is accounts for the Lower bound and Dim count in the ARRAY + X86EmitSubReg(kArrayRefReg, 8); // adjust this pointer so that indexing works out for SZARRAY + + X86EmitNearJump(DoneCheckLabel); + EmitLabel(NotSZArrayLabel); + } + + // For each index, range-check and mix into accumulated total. + UINT idx = pArrayOpScript->m_rank; + BOOL firstTime = TRUE; + while (idx--) + { + const ArrayOpIndexSpec *pai = pArrayOpScript->GetArrayOpIndexSpecs() + idx; + + //Load the passed-in index into the scratch register. + X86Reg srcreg = LoadArrayOpArg(pai->m_idxloc, this, SCRATCH_REGISTER_X86REG, ofsadjust AMD64_ARG(k32BitOp)); + if (SCRATCH_REGISTER_X86REG != srcreg) + X86EmitR2ROp(0x8b, SCRATCH_REGISTER_X86REG, srcreg AMD64_ARG(k32BitOp)); + + // sub SCRATCH, dword ptr [kArrayRefReg + LOWERBOUND] + if (pArrayOpScript->m_fHasLowerBounds) + { + X86EmitOp(0x2b, SCRATCH_REGISTER_X86REG, kArrayRefReg, pai->m_lboundofs); + } + + // cmp SCRATCH, dword ptr [kArrayRefReg + LENGTH] + X86EmitOp(0x3b, SCRATCH_REGISTER_X86REG, kArrayRefReg, pai->m_lengthofs); + + // jae Inner_rangeexception + X86EmitCondJump(Inner_rangeexception, X86CondCode::kJAE); + + + // SCRATCH == idx - LOWERBOUND + // + // imul SCRATCH, FACTOR + if (!firstTime) + { + //Can skip the first time since FACTOR==1 + X86EmitR2ROp(0xaf0f, SCRATCH_REGISTER_X86REG, kFactorReg AMD64_ARG(k32BitOp)); + } + + // TOTAL += SCRATCH + if (firstTime) + { + // First time, we must zero-init TOTAL. Since + // zero-initing and then adding is just equivalent to a + // "mov", emit a "mov" + // mov TOTAL, SCRATCH + X86EmitR2ROp(0x8b, kTotalReg, SCRATCH_REGISTER_X86REG AMD64_ARG(k32BitOp)); + } + else + { + // add TOTAL, SCRATCH + X86EmitR2ROp(0x03, kTotalReg, SCRATCH_REGISTER_X86REG AMD64_ARG(k32BitOp)); + } + + // FACTOR *= [kArrayRefReg + LENGTH] + if (idx != 0) + { + // No need to update FACTOR on the last iteration + // since we won't use it again + + if (firstTime) + { + // must init FACTOR to 1 first: hence, + // the "imul" becomes a "mov" + // mov FACTOR, [kArrayRefReg + LENGTH] + X86EmitOp(0x8b, kFactorReg, kArrayRefReg, pai->m_lengthofs); + } + else + { + // imul FACTOR, [kArrayRefReg + LENGTH] + X86EmitOp(0xaf0f, kFactorReg, kArrayRefReg, pai->m_lengthofs); + } + } + + firstTime = FALSE; + } + + if (DoneCheckLabel != 0) + EmitLabel(DoneCheckLabel); + + // Pass these values to X86EmitArrayOp() to generate the element address. + X86Reg elemBaseReg = kArrayRefReg; + X86Reg elemScaledReg = kTotalReg; + UINT32 elemSize = pArrayOpScript->m_elemsize; + UINT32 elemOfs = pArrayOpScript->m_ofsoffirst; + + if (!(elemSize == 1 || elemSize == 2 || elemSize == 4 || elemSize == 8)) + { + switch (elemSize) + { + // No way to express this as a SIB byte. Fold the scale + // into TOTAL. + + case 16: + // shl TOTAL,4 + X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp)); + Emit8(4); + break; + + case 32: + // shl TOTAL,5 + X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp)); + Emit8(5); + break; + + case 64: + // shl TOTAL,6 + X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp)); + Emit8(6); + break; + + default: + // imul TOTAL, elemScale + X86EmitR2ROp(0x69, kTotalReg, kTotalReg AMD64_ARG(k32BitOp)); + Emit32(elemSize); + break; + } + elemSize = 1; + } + + _ASSERTE(FitsInU1(elemSize)); + BYTE elemScale = static_cast(elemSize); + + // Now, do the operation: + + switch (pArrayOpScript->m_op) + { + case ArrayOpScript::LOADADDR: + // lea eax, ELEMADDR + X86EmitOp(0x8d, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp)); + break; + + case ArrayOpScript::LOAD: + if (pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER) + { + // Ensure that these registers have been saved! + _ASSERTE(fSavedESI && fSavedEDI); + + //lea esi, ELEMADDR + X86EmitOp(0x8d, kESI, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp)); + + _ASSERTE(!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fRetBufLoc)); + // mov edi, retbufptr + X86EmitR2ROp(0x8b, kEDI, GetX86ArgumentRegisterFromOffset(pArrayOpScript->m_fRetBufLoc - TransitionBlock::GetOffsetOfArgumentRegisters())); + +COPY_VALUE_CLASS: + { + size_t size = pArrayOpScript->m_elemsize; + size_t total = 0; + if(pArrayOpScript->m_gcDesc) + { + CGCDescSeries* cur = pArrayOpScript->m_gcDesc->GetHighestSeries(); + if ((cur->startoffset-elemOfs) > 0) + generate_noref_copy ((unsigned) (cur->startoffset - elemOfs), this); + total += cur->startoffset - elemOfs; + + SSIZE_T cnt = (SSIZE_T) pArrayOpScript->m_gcDesc->GetNumSeries(); + // special array encoding + _ASSERTE(cnt < 0); + + for (SSIZE_T __i = 0; __i > cnt; __i--) + { + HALF_SIZE_T skip = cur->val_serie[__i].skip; + HALF_SIZE_T nptrs = cur->val_serie[__i].nptrs; + total += nptrs*sizeof (DWORD*); + do + { + AMD64_ONLY(_ASSERTE(fNeedScratchArea)); + + X86EmitCall(NewExternalCodeLabel((LPVOID) JIT_ByRefWriteBarrier), 0); + } while (--nptrs); + if (skip > 0) + { + //check if we are at the end of the series + if (__i == (cnt + 1)) + skip = skip - (HALF_SIZE_T)(cur->startoffset - elemOfs); + if (skip > 0) + generate_noref_copy (skip, this); + } + total += skip; + } + + _ASSERTE (size == total); + } + else + { + // no ref anywhere, just copy the bytes. + _ASSERTE (size); + generate_noref_copy ((unsigned)size, this); + } + } + } + else + { + switch (pArrayOpScript->m_elemsize) + { + case 1: + // mov[zs]x eax, byte ptr ELEMADDR + X86EmitOp(pArrayOpScript->m_signed ? 0xbe0f : 0xb60f, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale); + break; + + case 2: + // mov[zs]x eax, word ptr ELEMADDR + X86EmitOp(pArrayOpScript->m_signed ? 0xbf0f : 0xb70f, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale); + break; + + case 4: + if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE) + { +#ifdef _TARGET_AMD64_ + // movss xmm0, dword ptr ELEMADDR + Emit8(0xf3); + X86EmitOp(0x100f, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale); +#else // !_TARGET_AMD64_ + // fld dword ptr ELEMADDR + X86EmitOp(0xd9, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale); +#endif // !_TARGET_AMD64_ + } + else + { + // mov eax, ELEMADDR + X86EmitOp(0x8b, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale); + } + break; + + case 8: + if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE) + { +#ifdef _TARGET_AMD64_ + // movsd xmm0, qword ptr ELEMADDR + Emit8(0xf2); + X86EmitOp(0x100f, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale); +#else // !_TARGET_AMD64_ + // fld qword ptr ELEMADDR + X86EmitOp(0xdd, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale); +#endif // !_TARGET_AMD64_ + } + else + { + // mov eax, ELEMADDR + X86EmitOp(0x8b, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp)); +#ifdef _TARGET_X86_ + // mov edx, ELEMADDR + 4 + X86EmitOp(0x8b, kEDX, elemBaseReg, elemOfs + 4, elemScaledReg, elemScale); +#endif + } + break; + + default: + _ASSERTE(0); + } + } + + break; + + case ArrayOpScript::STORE: + + switch (pArrayOpScript->m_elemsize) + { + case 1: + // mov SCRATCH, [esp + valoffset] + kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust); + // mov byte ptr ELEMADDR, SCRATCH.b + X86EmitOp(0x88, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale); + break; + case 2: + // mov SCRATCH, [esp + valoffset] + kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust); + // mov word ptr ELEMADDR, SCRATCH.w + Emit8(0x66); + X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale); + break; + case 4: +#ifndef _TARGET_AMD64_ + if (pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER) + { + // mov SCRATCH, [esp + valoffset] + kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust); + + _ASSERTE(SCRATCH_REGISTER_X86REG == kEAX); // value to store is already in EAX where we want it. + // lea edx, ELEMADDR + X86EmitOp(0x8d, kEDX, elemBaseReg, elemOfs, elemScaledReg, elemScale); + + // call JIT_Writeable_Thunks_Buf.WriteBarrierReg[0] (== EAX) + X86EmitCall(NewExternalCodeLabel((LPVOID) &JIT_WriteBarrierEAX), 0); + } + else +#else // _TARGET_AMD64_ + if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE) + { + if (!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)) + { + kValueReg = (X86Reg)TransitionBlock::GetArgumentIndexFromOffset(pArrayOpScript->m_fValLoc); + } + else + { + kValueReg = (X86Reg)0; // xmm0 + + // movss xmm0, dword ptr [rsp+??] + Emit8(0xf3); + X86EmitOp(0x100f, kValueReg, (X86Reg)4 /*rsp*/, ofsadjust + pArrayOpScript->m_fValLoc); + } + + // movss dword ptr ELEMADDR, xmm? + Emit8(0xf3); + X86EmitOp(0x110f, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale); + } + else +#endif // _TARGET_AMD64_ + { + // mov SCRATCH, [esp + valoffset] + kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust AMD64_ARG(k32BitOp)); + + // mov ELEMADDR, SCRATCH + X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale); + } + break; + + case 8: + + if (!(pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER)) + { +#ifdef _TARGET_AMD64_ + if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE) + { + if (!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)) + { + kValueReg = (X86Reg)TransitionBlock::GetArgumentIndexFromOffset(pArrayOpScript->m_fValLoc); + } + else + { + kValueReg = (X86Reg)0; // xmm0 + + // movsd xmm0, qword ptr [rsp+??] + Emit8(0xf2); + X86EmitOp(0x100f, kValueReg, (X86Reg)4 /*rsp*/, ofsadjust + pArrayOpScript->m_fValLoc); + } + + // movsd qword ptr ELEMADDR, xmm? + Emit8(0xf2); + X86EmitOp(0x110f, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale); + } + else + { + // mov SCRATCH, [esp + valoffset] + kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust); + + // mov ELEMADDR, SCRATCH + X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale, k64BitOp); + } +#else // !_TARGET_AMD64_ + _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register: so too lazy to implement that case + // mov SCRATCH, [esp + valoffset] + X86EmitEspOffset(0x8b, SCRATCH_REGISTER_X86REG, pArrayOpScript->m_fValLoc + ofsadjust); + // mov ELEMADDR, SCRATCH + X86EmitOp(0x89, SCRATCH_REGISTER_X86REG, elemBaseReg, elemOfs, elemScaledReg, elemScale); + + _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register: so too lazy to implement that case + // mov SCRATCH, [esp + valoffset + 4] + X86EmitEspOffset(0x8b, SCRATCH_REGISTER_X86REG, pArrayOpScript->m_fValLoc + ofsadjust + 4); + // mov ELEMADDR+4, SCRATCH + X86EmitOp(0x89, SCRATCH_REGISTER_X86REG, elemBaseReg, elemOfs+4, elemScaledReg, elemScale); +#endif // !_TARGET_AMD64_ + break; + } +#ifdef _TARGET_AMD64_ + else + { + _ASSERTE(SCRATCH_REGISTER_X86REG == kEAX); // value to store is already in EAX where we want it. + // lea rcx, ELEMADDR + X86EmitOp(0x8d, kRCX, elemBaseReg, elemOfs, elemScaledReg, elemScale, k64BitOp); + + // mov rdx, [rsp + valoffset] + kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kRDX, ofsadjust); + _ASSERT(kRCX != kValueReg); + if (kRDX != kValueReg) + X86EmitR2ROp(0x8b, kRDX, kValueReg); + + _ASSERTE(fNeedScratchArea); + X86EmitCall(NewExternalCodeLabel((PVOID)JIT_WriteBarrier), 0); + break; + } +#endif // _TARGET_AMD64_ + // FALL THROUGH (on x86) + default: + // Ensure that these registers have been saved! + _ASSERTE(fSavedESI && fSavedEDI); + +#ifdef _TARGET_AMD64_ + // mov rsi, [rsp + valoffset] + kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kRSI, ofsadjust); + if (kRSI != kValueReg) + X86EmitR2ROp(0x8b, kRSI, kValueReg); +#else // !_TARGET_AMD64_ + _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); + // lea esi, [esp + valoffset] + X86EmitEspOffset(0x8d, kESI, pArrayOpScript->m_fValLoc + ofsadjust); +#endif // !_TARGET_AMD64_ + + // lea edi, ELEMADDR + X86EmitOp(0x8d, kEDI, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp)); + goto COPY_VALUE_CLASS; + } + break; + + default: + _ASSERTE(0); + } + + EmitLabel(Epilog); + +#ifdef _TARGET_AMD64_ + if (fNeedPrologue) + { + if (fNeedScratchArea) + { + // Throw away scratch area + X86EmitAddEsp(sizeof(ArgumentRegisters) + 0x8); + } + + if (fSavedEDI) + X86EmitPopReg(kRDI); + + if (fSavedESI) + X86EmitPopReg(kRSI); + } + + X86EmitReturn(0); +#else // !_TARGET_AMD64_ + // Restore the callee-saved registers + X86EmitPopReg(kFactorReg); + X86EmitPopReg(kTotalReg); + + // ret N + X86EmitReturn(pArrayOpScript->m_cbretpop); +#endif // !_TARGET_AMD64_ + + // Exception points must clean up the stack for all those extra args. + // kFactorReg and kTotalReg will be popped by the jump targets. + + void *pvExceptionThrowFn; + +#if defined(_TARGET_AMD64_) +#define ARRAYOP_EXCEPTION_HELPERS(base) { (PVOID)base, (PVOID)base##_RSIRDI, (PVOID)base##_ScratchArea, (PVOID)base##_RSIRDI_ScratchArea } + static void *rgNullExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubNullException); + static void *rgRangeExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubRangeException); + static void *rgTypeMismatchExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubTypeMismatchException); +#undef ARRAYOP_EXCEPTION_HELPERS + + UINT iExceptionHelper = (fNeedRSIRDI ? 1 : 0) + (fNeedScratchArea ? 2 : 0); +#endif // defined(_TARGET_AMD64_) + + EmitLabel(Inner_nullexception); + +#ifndef _TARGET_AMD64_ + pvExceptionThrowFn = (LPVOID)ArrayOpStubNullException; + + Emit8(0xb8); // mov EAX, + Emit32(pArrayOpScript->m_cbretpop); +#else //_TARGET_AMD64_ + pvExceptionThrowFn = rgNullExceptionHelpers[iExceptionHelper]; +#endif //!_TARGET_AMD64_ + X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn)); + + EmitLabel(Inner_rangeexception); +#ifndef _TARGET_AMD64_ + pvExceptionThrowFn = (LPVOID)ArrayOpStubRangeException; + Emit8(0xb8); // mov EAX, + Emit32(pArrayOpScript->m_cbretpop); +#else //_TARGET_AMD64_ + pvExceptionThrowFn = rgRangeExceptionHelpers[iExceptionHelper]; +#endif //!_TARGET_AMD64_ + X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn)); + + if (Inner_typeMismatchexception != NULL) + { + EmitLabel(Inner_typeMismatchexception); +#ifndef _TARGET_AMD64_ + pvExceptionThrowFn = (LPVOID)ArrayOpStubTypeMismatchException; + Emit8(0xb8); // mov EAX, + Emit32(pArrayOpScript->m_cbretpop); +#else //_TARGET_AMD64_ + pvExceptionThrowFn = rgTypeMismatchExceptionHelpers[iExceptionHelper]; +#endif //!_TARGET_AMD64_ + X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn)); + } +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +#endif // FEATURE_ARRAYSTUB_AS_IL + +//=========================================================================== +// Emits code to break into debugger +VOID StubLinkerCPU::EmitDebugBreak() +{ + STANDARD_VM_CONTRACT; + + // int3 + Emit8(0xCC); +} + +#if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_) + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning (disable : 4740) // There is inline asm code in this function, which disables + // global optimizations. +#pragma warning (disable : 4731) +#endif // _MSC_VER +Thread* __stdcall CreateThreadBlockReturnHr(ComMethodFrame *pFrame) +{ + + WRAPPER_NO_CONTRACT; + + Thread *pThread = NULL; + + HRESULT hr = S_OK; + + // This means that a thread is FIRST coming in from outside the EE. + BEGIN_ENTRYPOINT_THROWS; + pThread = SetupThreadNoThrow(&hr); + END_ENTRYPOINT_THROWS; + + if (pThread == NULL) { + // Unwind stack, and return hr + // NOTE: assumes __stdcall + // Note that this code does not handle the rare COM signatures that do not return HRESULT + // compute the callee pop stack bytes + UINT numArgStackBytes = pFrame->GetNumCallerStackBytes(); + unsigned frameSize = sizeof(Frame) + sizeof(LPVOID); + LPBYTE iEsp = ((LPBYTE)pFrame) + ComMethodFrame::GetOffsetOfCalleeSavedRegisters(); + __asm + { + mov eax, hr + mov edx, numArgStackBytes + //***************************************** + // reset the stack pointer + // none of the locals above can be used in the asm below + // if we wack the stack pointer + mov esp, iEsp + // pop callee saved registers + pop edi + pop esi + pop ebx + pop ebp + pop ecx ; //return address + // pop the callee cleanup stack args + add esp, edx ;// callee cleanup of args + jmp ecx; // jump to the address to continue execution + + // We will never get here. This "ret" is just so that code-disassembling + // profilers know to stop disassembling any further + ret + } + } + + return pThread; +} +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#endif // defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_) + +#endif // !defined(CROSSGEN_COMPILE) && !defined(FEATURE_STUBS_AS_IL) + +#endif // !DACCESS_COMPILE + + +#ifdef _TARGET_AMD64_ + +// +// TailCallFrame Object Scanning +// +// This handles scanning/promotion of GC objects that were +// protected by the TailCallHelper routine. Note that the objects +// being protected is somewhat dynamic and is dependent upon the +// the callee... +// + +void TailCallFrame::GcScanRoots(promote_func *fn, ScanContext* sc) +{ + WRAPPER_NO_CONTRACT; + + if (m_pGCLayout != NULL) + { + struct FrameOffsetDecoder { + private: + TADDR prevOffset; + TADDR rangeEnd; + BOOL maybeInterior; + BOOL atEnd; + PTR_SBYTE pbOffsets; + + DWORD ReadNumber() { + signed char i; + DWORD offset = 0; + while ((i = *pbOffsets++) >= 0) + { + offset = (offset << 7) | i; + } + offset = (offset << 7) | (i & 0x7F); + return offset; + } + + public: + FrameOffsetDecoder(PTR_GSCookie _base, TADDR offsets) + : prevOffset(dac_cast(_base)), rangeEnd(~0LL), atEnd(FALSE), pbOffsets(dac_cast(offsets)) { maybeInterior = FALSE;} + + bool MoveNext() { + LIMITED_METHOD_CONTRACT; + + if (rangeEnd < prevOffset) + { + prevOffset -= sizeof(void*); + return true; + } + if (atEnd) return false; + DWORD offset = ReadNumber(); + atEnd = (offset & 1); + BOOL range = (offset & 2); + maybeInterior = (offset & 0x80000000); + + offset &= 0x7FFFFFFC; + +#ifdef _WIN64 + offset <<= 1; +#endif + offset += sizeof(void*); + _ASSERTE(prevOffset > offset); + prevOffset -= offset; + + if (range) + { + _ASSERTE(!atEnd); + _ASSERTE(!maybeInterior); + DWORD offsetEnd = ReadNumber(); + atEnd = (offsetEnd & 1); + offsetEnd = (offsetEnd & ~1) << 1; + // range encoding starts with a range of 3 (2 is better to encode as + // 2 offsets), so 0 == 2 (the last offset in the range) + offsetEnd += sizeof(void*) * 2; + rangeEnd = prevOffset - offsetEnd; + } + + return true; + } + + BOOL MaybeInterior() const { return maybeInterior; } + + PTR_PTR_Object Current() const { return PTR_PTR_Object(prevOffset); } + + } decoder(GetGSCookiePtr(), m_pGCLayout); + + while (decoder.MoveNext()) + { + PTR_PTR_Object ppRef = decoder.Current(); + + LOG((LF_GC, INFO3, "Tail Call Frame Promoting" FMT_ADDR "to", + DBG_ADDR(OBJECTREF_TO_UNCHECKED_OBJECTREF(*ppRef)) )); + if (decoder.MaybeInterior()) + PromoteCarefully(fn, ppRef, sc, GC_CALL_INTERIOR|CHECK_APP_DOMAIN); + else + (*fn)(ppRef, sc, 0); + LOG((LF_GC, INFO3, FMT_ADDR "\n", DBG_ADDR(OBJECTREF_TO_UNCHECKED_OBJECTREF(*ppRef)) )); + } + } +} + +#ifndef DACCESS_COMPILE +static void EncodeOneGCOffset(CPUSTUBLINKER *pSl, ULONG delta, BOOL maybeInterior, BOOL range, BOOL last) +{ + CONTRACTL + { + THROWS; // From the stublinker + MODE_ANY; + GC_NOTRIGGER; + } + CONTRACTL_END; + + // Everything should be pointer aligned + // but we use a high bit for interior, and the 0 bit to denote the end of the list + // we use the 1 bit to denote a range + _ASSERTE((delta % sizeof(void*)) == 0); + +#if defined(_WIN64) + // For 64-bit, we have 3 bits of alignment, so we allow larger frames + // by shifting and gaining a free high-bit. + ULONG encodedDelta = delta >> 1; +#else + // For 32-bit, we just limit our frame size to <2GB. (I know, such a bummer!) + ULONG encodedDelta = delta; +#endif + _ASSERTE((encodedDelta & 0x80000003) == 0); + if (last) + { + encodedDelta |= 1; + } + + if (range) + { + encodedDelta |= 2; + } + else if (maybeInterior) + { + _ASSERTE(!range); + encodedDelta |= 0x80000000; + } + + BYTE bytes[5]; + UINT index = 5; + bytes[--index] = (BYTE)((encodedDelta & 0x7F) | 0x80); + encodedDelta >>= 7; + while (encodedDelta > 0) + { + bytes[--index] = (BYTE)(encodedDelta & 0x7F); + encodedDelta >>= 7; + } + pSl->EmitBytes(&bytes[index], 5 - index); +} + +static void EncodeGCOffsets(CPUSTUBLINKER *pSl, /* const */ ULONGARRAY & gcOffsets) +{ + CONTRACTL + { + THROWS; + MODE_ANY; + GC_NOTRIGGER; + } + CONTRACTL_END; + + _ASSERTE(gcOffsets.Count() > 0); + + ULONG prevOffset = 0; + int i = 0; + BOOL last = FALSE; + do { + ULONG offset = gcOffsets[i]; + // Everything should be pointer aligned + // but we use the 0-bit to mean maybeInterior, for byrefs. + _ASSERTE(((offset % sizeof(void*)) == 0) || ((offset % sizeof(void*)) == 1)); + BOOL maybeInterior = (offset & 1); + offset &= ~1; + + // Encode just deltas because they're smaller (and the list should be sorted) + _ASSERTE(offset >= (prevOffset + sizeof(void*))); + ULONG delta = offset - (prevOffset + sizeof(void*)); + if (!maybeInterior && gcOffsets.Count() > i + 2) + { + // Check for a potential range. + // Only do it if we have 3 or more pointers in a row + ULONG rangeOffset = offset; + int j = i + 1; + do { + ULONG nextOffset = gcOffsets[j]; + // interior pointers can't be in ranges + if (nextOffset & 1) + break; + // ranges must be saturated + if (nextOffset != (rangeOffset + sizeof(void*))) + break; + j++; + rangeOffset = nextOffset; + } while(j < gcOffsets.Count()); + + if (j > (i + 2)) + { + EncodeOneGCOffset(pSl, delta, FALSE, TRUE, last); + i = j - 1; + _ASSERTE(rangeOffset >= (offset + (sizeof(void*) * 2))); + delta = rangeOffset - (offset + (sizeof(void*) * 2)); + offset = rangeOffset; + } + } + last = (++i == gcOffsets.Count()); + + + EncodeOneGCOffset(pSl, delta, maybeInterior, FALSE, last); + + prevOffset = offset; + } while (!last); +} + +static void AppendGCLayout(ULONGARRAY &gcLayout, size_t baseOffset, BOOL fIsTypedRef, TypeHandle VMClsHnd) +{ + STANDARD_VM_CONTRACT; + + _ASSERTE((baseOffset % 16) == 0); + _ASSERTE(FitsInU4(baseOffset)); + + if (fIsTypedRef) + { + *gcLayout.AppendThrowing() = (ULONG)(baseOffset | 1); // "| 1" to mark it as an interior pointer + } + else if (!VMClsHnd.IsNativeValueType()) + { + MethodTable* pMT = VMClsHnd.GetMethodTable(); + _ASSERTE(pMT); + _ASSERTE(pMT->IsValueType()); + + // walk the GC descriptors, reporting the correct offsets + if (pMT->ContainsPointers()) + { + // size of instance when unboxed must be adjusted for the syncblock + // index and the VTable pointer. + DWORD size = pMT->GetBaseSize(); + + // we don't include this term in our 'ppstop' calculation below. + _ASSERTE(pMT->GetComponentSize() == 0); + + CGCDesc* map = CGCDesc::GetCGCDescFromMT(pMT); + CGCDescSeries* cur = map->GetLowestSeries(); + CGCDescSeries* last = map->GetHighestSeries(); + + _ASSERTE(cur <= last); + do + { + // offset to embedded references in this series must be + // adjusted by the VTable pointer, when in the unboxed state. + size_t adjustOffset = cur->GetSeriesOffset() - sizeof(void *); + + _ASSERTE(baseOffset >= adjustOffset); + size_t start = baseOffset - adjustOffset; + size_t stop = start - (cur->GetSeriesSize() + size); + for (size_t off = stop + sizeof(void*); off <= start; off += sizeof(void*)) + { + _ASSERTE(gcLayout.Count() == 0 || off > gcLayout[gcLayout.Count() - 1]); + _ASSERTE(FitsInU4(off)); + *gcLayout.AppendThrowing() = (ULONG)off; + } + cur++; + + } while (cur <= last); + } + } +} + +Stub * StubLinkerCPU::CreateTailCallCopyArgsThunk(CORINFO_SIG_INFO * pSig, + CorInfoHelperTailCallSpecialHandling flags) +{ + STANDARD_VM_CONTRACT; + + CPUSTUBLINKER sl; + CPUSTUBLINKER* pSl = &sl; + + // Generates a function that looks like this: + // size_t CopyArguments(va_list args, (RCX) + // CONTEXT *pCtx, (RDX) + // DWORD64 *pvStack, (R8) + // size_t cbStack) (R9) + // { + // if (pCtx != NULL) { + // foreach (arg in args) { + // copy into pCtx or pvStack + // } + // } + // return ; + // } + // + + CodeLabel *pNullLabel = pSl->NewCodeLabel(); + + // test rdx, rdx + pSl->X86EmitR2ROp(0x85, kRDX, kRDX); + + // jz NullLabel + pSl->X86EmitCondJump(pNullLabel, X86CondCode::kJZ); + + UINT nArgSlot = 0; + UINT totalArgs = pSig->totalILArgs() + ((pSig->isVarArg() || pSig->hasTypeArg()) ? 1 : 0); + bool fR10Loaded = false; + UINT cbArg; + static const UINT rgcbArgRegCtxtOffsets[4] = { offsetof(CONTEXT, Rcx), offsetof(CONTEXT, Rdx), + offsetof(CONTEXT, R8), offsetof(CONTEXT, R9) }; + static const UINT rgcbFpArgRegCtxtOffsets[4] = { offsetof(CONTEXT, Xmm0.Low), offsetof(CONTEXT, Xmm1.Low), + offsetof(CONTEXT, Xmm2.Low), offsetof(CONTEXT, Xmm3.Low) }; + + ULONGARRAY gcLayout; + + // On input to the function R9 contains the size of the buffer + // The first time this macro runs, R10 is loaded with the 'top' of the Frame + // and R9 is changed to point to the 'top' of the copy buffer. + // Then both R9 and R10 are decremented by the size of the struct we're copying + // So R10 is the value to put in the argument slot, and R9 is where the data + // should be copied to (or zeroed out in the case of the return buffer). +#define LOAD_STRUCT_OFFSET_IF_NEEDED(cbSize) \ + { \ + _ASSERTE(cbSize > 0); \ + _ASSERTE(FitsInI4(cbSize)); \ + __int32 offset = (__int32)cbSize; \ + if (!fR10Loaded) { \ + /* mov r10, [rdx + offset of RSP] */ \ + pSl->X86EmitIndexRegLoad(kR10, kRDX, offsetof(CONTEXT, Rsp)); \ + /* add an extra 8 because RSP is pointing at the return address */ \ + offset -= 8; \ + /* add r10, r9 */ \ + pSl->X86EmitAddRegReg(kR10, kR9); \ + /* add r9, r8 */ \ + pSl->X86EmitAddRegReg(kR9, kR8); \ + fR10Loaded = true; \ + } \ + /* sub r10, offset */ \ + pSl->X86EmitSubReg(kR10, offset); \ + /* sub r9, cbSize */ \ + pSl->X86EmitSubReg(kR9, cbSize); \ + } + + + if (flags & CORINFO_TAILCALL_STUB_DISPATCH_ARG) { + // This is set for stub dispatch + // The JIT placed an extra argument in the list that needs to + // get shoved into R11, and not counted. + // pCtx->R11 = va_arg(args, DWORD64); + + // mov rax, [rcx] + pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0); + // add rcx, 8 + pSl->X86EmitAddReg(kRCX, 8); + // mov [rdx + offset of R11], rax + pSl->X86EmitIndexRegStore(kRDX, offsetof(CONTEXT, R11), kRAX); + } + + ULONG cbStructOffset = 0; + + // First comes the 'this' pointer + if (pSig->hasThis()) { + // mov rax, [rcx] + pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0); + // add rcx, 8 + pSl->X86EmitAddReg(kRCX, 8); + // mov [rdx + offset of RCX/RDX], rax + pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX); + } + + // Next the return buffer + cbArg = 0; + TypeHandle th(pSig->retTypeClass); + if ((pSig->retType == CORINFO_TYPE_REFANY) || (pSig->retType == CORINFO_TYPE_VALUECLASS)) { + cbArg = th.GetSize(); + } + + if (ArgIterator::IsArgPassedByRef(cbArg)) { + totalArgs++; + + // We always reserve space for the return buffer, and we always zero it out, + // so the GC won't complain, but if it's already pointing above the frame, + // then we need to pass it in (so it will get passed out). + // Otherwise we assume the caller is returning void, so we just pass in + // dummy space to be overwritten. + UINT cbUsed = (cbArg + 0xF) & ~0xF; + LOAD_STRUCT_OFFSET_IF_NEEDED(cbUsed); + // now emit a 'memset(r9, 0, cbUsed)' + { + // xorps xmm0, xmm0 + pSl->X86EmitR2ROp(X86_INSTR_XORPS, kXMM0, kXMM0); + if (cbUsed <= 4 * 16) { + // movaps [r9], xmm0 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0); + if (16 < cbUsed) { + // movaps [r9 + 16], xmm0 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 16); + if (32 < cbUsed) { + // movaps [r9 + 32], xmm0 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 32); + if (48 < cbUsed) { + // movaps [r9 + 48], xmm0 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 48); + } + } + } + } + else { + // a loop (one double-quadword at a time) + pSl->X86EmitZeroOutReg(kR11); + // LoopLabel: + CodeLabel *pLoopLabel = pSl->NewCodeLabel(); + pSl->EmitLabel(pLoopLabel); + // movaps [r9 + r11], xmm0 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0, kR11, 1); + // add r11, 16 + pSl->X86EmitAddReg(kR11, 16); + // cmp r11, cbUsed + pSl->X86EmitCmpRegImm32(kR11, cbUsed); + // jl LoopLabel + pSl->X86EmitCondJump(pLoopLabel, X86CondCode::kJL); + } + } + cbStructOffset += cbUsed; + AppendGCLayout(gcLayout, cbStructOffset, pSig->retType == CORINFO_TYPE_REFANY, th); + + // mov rax, [rcx] + pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0); + // add rcx, 8 + pSl->X86EmitAddReg(kRCX, 8); + // cmp rax, [rdx + offset of R12] + pSl->X86EmitOffsetModRM(0x3B, kRAX, kRDX, offsetof(CONTEXT, R12)); + + CodeLabel *pSkipLabel = pSl->NewCodeLabel(); + // jnb SkipLabel + pSl->X86EmitCondJump(pSkipLabel, X86CondCode::kJNB); + + // Also check the lower bound of the stack in case the return buffer is on the GC heap + // and the GC heap is below the stack + // cmp rax, rsp + pSl->X86EmitR2ROp(0x3B, kRAX, (X86Reg)4 /*kRSP*/); + // jna SkipLabel + pSl->X86EmitCondJump(pSkipLabel, X86CondCode::kJB); + // mov rax, r10 + pSl->X86EmitMovRegReg(kRAX, kR10); + // SkipLabel: + pSl->EmitLabel(pSkipLabel); + // mov [rdx + offset of RCX], rax + pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX); + } + + // VarArgs Cookie *or* Generics Instantiation Parameter + if (pSig->hasTypeArg() || pSig->isVarArg()) { + // mov rax, [rcx] + pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0); + // add rcx, 8 + pSl->X86EmitAddReg(kRCX, 8); + // mov [rdx + offset of RCX/RDX], rax + pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX); + } + + _ASSERTE(nArgSlot <= 4); + + // Now for *all* the 'real' arguments + SigPointer ptr((PCCOR_SIGNATURE)pSig->args); + Module * module = GetModule(pSig->scope); + Instantiation classInst((TypeHandle*)pSig->sigInst.classInst, pSig->sigInst.classInstCount); + Instantiation methodInst((TypeHandle*)pSig->sigInst.methInst, pSig->sigInst.methInstCount); + SigTypeContext typeCtxt(classInst, methodInst); + + for( ;nArgSlot < totalArgs; ptr.SkipExactlyOne()) { + CorElementType et = ptr.PeekElemTypeNormalized(module, &typeCtxt); + if (et == ELEMENT_TYPE_SENTINEL) + continue; + + // mov rax, [rcx] + pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0); + // add rcx, 8 + pSl->X86EmitAddReg(kRCX, 8); + switch (et) { + case ELEMENT_TYPE_INTERNAL: + // TODO + _ASSERTE(!"Shouldn't see ELEMENT_TYPE_INTERNAL"); + break; + case ELEMENT_TYPE_TYPEDBYREF: + case ELEMENT_TYPE_VALUETYPE: + th = ptr.GetTypeHandleThrowing(module, &typeCtxt, ClassLoader::LoadTypes, CLASS_LOAD_UNRESTOREDTYPEKEY); + _ASSERTE(!th.IsNull()); + g_IBCLogger.LogEEClassAndMethodTableAccess(th.GetMethodTable()); + cbArg = (UINT)th.GetSize(); + if (ArgIterator::IsArgPassedByRef(cbArg)) { + UINT cbUsed = (cbArg + 0xF) & ~0xF; + LOAD_STRUCT_OFFSET_IF_NEEDED(cbUsed); + // rax has the source pointer + // r9 has the intermediate copy location + // r10 has the final destination + if (nArgSlot < 4) { + pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kR10); + } + else { + pSl->X86EmitIndexRegStore(kR8, 8 * nArgSlot++, kR10); + } + // now emit a 'memcpy(rax, r9, cbUsed)' + // These structs are supposed to be 16-byte aligned, but + // Reflection puts them on the GC heap, which is only 8-byte + // aligned. It also means we have to be careful about not + // copying too much (because we might cross a page boundary) + UINT cbUsed16 = (cbArg + 7) & ~0xF; + _ASSERTE((cbUsed16 == cbUsed) || ((cbUsed16 + 16) == cbUsed)); + + if (cbArg <= 192) { + // Unrolled version (6 x 16 bytes in parallel) + UINT offset = 0; + while (offset < cbUsed16) { + // movups xmm0, [rax + offset] + pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM0, kRAX, offset); + if (offset + 16 < cbUsed16) { + // movups xmm1, [rax + offset + 16] + pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM1, kRAX, offset + 16); + if (offset + 32 < cbUsed16) { + // movups xmm2, [rax + offset + 32] + pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM2, kRAX, offset + 32); + if (offset + 48 < cbUsed16) { + // movups xmm3, [rax + offset + 48] + pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM3, kRAX, offset + 48); + if (offset + 64 < cbUsed16) { + // movups xmm4, [rax + offset + 64] + pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM4, kRAX, offset + 64); + if (offset + 80 < cbUsed16) { + // movups xmm5, [rax + offset + 80] + pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM5, kRAX, offset + 80); + } + } + } + } + } + // movaps [r9 + offset], xmm0 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, offset); + offset += 16; + if (offset < cbUsed16) { + // movaps [r9 + 16], xmm1 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM1, kR9, offset); + offset += 16; + if (offset < cbUsed16) { + // movaps [r9 + 32], xmm2 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM2, kR9, offset); + offset += 16; + if (offset < cbUsed16) { + // movaps [r9 + 48], xmm3 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM3, kR9, offset); + offset += 16; + if (offset < cbUsed16) { + // movaps [r9 + 64], xmm4 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM4, kR9, offset); + offset += 16; + if (offset < cbUsed16) { + // movaps [r9 + 80], xmm5 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM5, kR9, offset); + offset += 16; + } + } + } + } + } + } + // Copy the last 8 bytes if needed + if (cbUsed > cbUsed16) { + _ASSERTE(cbUsed16 < cbArg); + // movlps xmm0, [rax + offset] + pSl->X86EmitOp(X86_INSTR_MOVLPS_R_RM, kXMM0, kRAX, offset); + // movlps [r9 + offset], xmm0 + pSl->X86EmitOp(X86_INSTR_MOVLPS_RM_R, kXMM0, kR9, offset); + } + } + else { + // a loop (one double-quadword at a time) + pSl->X86EmitZeroOutReg(kR11); + // LoopLabel: + CodeLabel *pLoopLabel = pSl->NewCodeLabel(); + pSl->EmitLabel(pLoopLabel); + // movups xmm0, [rax + r11] + pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM0, kRAX, 0, kR11, 1); + // movaps [r9 + r11], xmm0 + pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0, kR11, 1); + // add r11, 16 + pSl->X86EmitAddReg(kR11, 16); + // cmp r11, cbUsed16 + pSl->X86EmitCmpRegImm32(kR11, cbUsed16); + // jl LoopLabel + pSl->X86EmitCondJump(pLoopLabel, X86CondCode::kJL); + if (cbArg > cbUsed16) { + _ASSERTE(cbUsed16 + 8 >= cbArg); + // movlps xmm0, [rax + r11] + pSl->X86EmitOp(X86_INSTR_MOVLPS_R_RM, kXMM0, kRAX, 0, kR11, 1); + // movlps [r9 + r11], xmm0 + pSl->X86EmitOp(X86_INSTR_MOVLPS_RM_R, kXMM0, kR9, 0, kR11, 1); + } + } + cbStructOffset += cbUsed; + AppendGCLayout(gcLayout, cbStructOffset, et == ELEMENT_TYPE_TYPEDBYREF, th); + break; + } + + // + // Explicit Fall-Through for non-IsArgPassedByRef + // + + default: + if (nArgSlot < 4) { + pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot], kRAX); + if ((et == ELEMENT_TYPE_R4) || (et == ELEMENT_TYPE_R8)) { + pSl->X86EmitIndexRegStore(kRDX, rgcbFpArgRegCtxtOffsets[nArgSlot], kRAX); + } + } + else { + pSl->X86EmitIndexRegStore(kR8, 8 * nArgSlot, kRAX); + } + nArgSlot++; + break; + } + } + +#undef LOAD_STRUCT_OFFSET_IF_NEEDED + + // Keep our 4 shadow slots and even number of slots (to keep 16-byte aligned) + if (nArgSlot < 4) + nArgSlot = 4; + else if (nArgSlot & 1) + nArgSlot++; + + _ASSERTE((cbStructOffset % 16) == 0); + + // xor eax, eax + pSl->X86EmitZeroOutReg(kRAX); + // ret + pSl->X86EmitReturn(0); + + // NullLabel: + pSl->EmitLabel(pNullLabel); + + CodeLabel *pGCLayoutLabel = NULL; + if (gcLayout.Count() == 0) { + // xor eax, eax + pSl->X86EmitZeroOutReg(kRAX); + } + else { + // lea rax, [rip + offset to gclayout] + pGCLayoutLabel = pSl->NewCodeLabel(); + pSl->X86EmitLeaRIP(pGCLayoutLabel, kRAX); + } + // mov [r9], rax + pSl->X86EmitIndexRegStore(kR9, 0, kRAX); + // mov rax, cbStackNeeded + pSl->X86EmitRegLoad(kRAX, cbStructOffset + nArgSlot * 8); + // ret + pSl->X86EmitReturn(0); + + if (gcLayout.Count() > 0) { + // GCLayout: + pSl->EmitLabel(pGCLayoutLabel); + EncodeGCOffsets(pSl, gcLayout); + } + + return pSl->Link(); +} +#endif // DACCESS_COMPILE + +#endif // _TARGET_AMD64_ + + +#ifdef HAS_FIXUP_PRECODE + +#ifdef HAS_FIXUP_PRECODE_CHUNKS +TADDR FixupPrecode::GetMethodDesc() +{ + LIMITED_METHOD_CONTRACT; + SUPPORTS_DAC; + + // This lookup is also manually inlined in PrecodeFixupThunk assembly code + TADDR base = *PTR_TADDR(GetBase()); + if (base == NULL) + return NULL; + return base + (m_MethodDescChunkIndex * MethodDesc::ALIGNMENT); +} +#endif + +#ifdef DACCESS_COMPILE +void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) +{ + SUPPORTS_DAC; + DacEnumMemoryRegion(dac_cast(this), sizeof(FixupPrecode)); + + DacEnumMemoryRegion(GetBase(), sizeof(TADDR)); +} +#endif // DACCESS_COMPILE + +#endif // HAS_FIXUP_PRECODE + +#ifndef DACCESS_COMPILE + +BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD) +{ + CONTRACTL + { + THROWS; // Creating a JumpStub could throw OutOfMemory + GC_TRIGGERS; + } + CONTRACTL_END; + + BYTE* callAddrAdj = (BYTE*)pRel32 + 4; + INT32 expectedRel32 = static_cast((BYTE*)expected - callAddrAdj); + + INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD); + + _ASSERTE(IS_ALIGNED(pRel32, sizeof(INT32))); + return FastInterlockCompareExchange((LONG*)pRel32, (LONG)targetRel32, (LONG)expectedRel32) == (LONG)expectedRel32; +} + +void StubPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator /* = NULL */, + BYTE type /* = StubPrecode::Type */, TADDR target /* = NULL */) +{ + WRAPPER_NO_CONTRACT; + + IN_WIN64(m_movR10 = X86_INSTR_MOV_R10_IMM64); // mov r10, pMethodDesc + IN_WIN32(m_movEAX = X86_INSTR_MOV_EAX_IMM32); // mov eax, pMethodDesc + m_pMethodDesc = (TADDR)pMD; + IN_WIN32(m_mov_rm_r = X86_INSTR_MOV_RM_R); // mov reg,reg + m_type = type; + m_jmp = X86_INSTR_JMP_REL32; // jmp rel32 + + if (pLoaderAllocator != NULL) + { + // Use pMD == NULL in all precode initialization methods to allocate the initial jump stub in non-dynamic heap + // that has the same lifetime like as the precode itself + if (target == NULL) + target = GetPreStubEntryPoint(); + m_rel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, pLoaderAllocator); + } +} + +#ifdef HAS_NDIRECT_IMPORT_PRECODE + +void NDirectImportPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) +{ + WRAPPER_NO_CONTRACT; + StubPrecode::Init(pMD, pLoaderAllocator, NDirectImportPrecode::Type, GetEEFuncEntryPoint(NDirectImportThunk)); +} + +#endif // HAS_NDIRECT_IMPORT_PRECODE + + +#ifdef HAS_REMOTING_PRECODE + +void RemotingPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator /* = NULL */) +{ + WRAPPER_NO_CONTRACT; + + IN_WIN64(m_movR10 = X86_INSTR_MOV_R10_IMM64); // mov r10, pMethodDesc + IN_WIN32(m_movEAX = X86_INSTR_MOV_EAX_IMM32); // mov eax, pMethodDesc + m_pMethodDesc = (TADDR)pMD; + m_type = PRECODE_REMOTING; // nop + m_call = X86_INSTR_CALL_REL32; + m_jmp = X86_INSTR_JMP_REL32; // jmp rel32 + + if (pLoaderAllocator != NULL) + { + m_callRel32 = rel32UsingJumpStub(&m_callRel32, + GetEEFuncEntryPoint(PrecodeRemotingThunk), NULL /* pMD */, pLoaderAllocator); + m_rel32 = rel32UsingJumpStub(&m_rel32, + GetPreStubEntryPoint(), NULL /* pMD */, pLoaderAllocator); + } +} + +#endif // HAS_REMOTING_PRECODE + + +#ifdef HAS_FIXUP_PRECODE +void FixupPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex /*=0*/, int iPrecodeChunkIndex /*=0*/) +{ + WRAPPER_NO_CONTRACT; + + m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk + m_type = FixupPrecode::TypePrestub; + + // Initialize chunk indices only if they are not initialized yet. This is necessary to make MethodDesc::Reset work. + if (m_PrecodeChunkIndex == 0) + { + _ASSERTE(FitsInU1(iPrecodeChunkIndex)); + m_PrecodeChunkIndex = static_cast(iPrecodeChunkIndex); + } + + if (iMethodDescChunkIndex != -1) + { + if (m_MethodDescChunkIndex == 0) + { + _ASSERTE(FitsInU1(iMethodDescChunkIndex)); + m_MethodDescChunkIndex = static_cast(iMethodDescChunkIndex); + } + + if (*(void**)GetBase() == NULL) + *(void**)GetBase() = (BYTE*)pMD - (iMethodDescChunkIndex * MethodDesc::ALIGNMENT); + } + + _ASSERTE(GetMethodDesc() == (TADDR)pMD); + + if (pLoaderAllocator != NULL) + { + m_rel32 = rel32UsingJumpStub(&m_rel32, + GetEEFuncEntryPoint(PrecodeFixupThunk), NULL /* pMD */, pLoaderAllocator); + } +} + +BOOL FixupPrecode::SetTargetInterlocked(TADDR target, TADDR expected) +{ + CONTRACTL + { + THROWS; // Creating a JumpStub could throw OutOfMemory + GC_TRIGGERS; + } + CONTRACTL_END; + + INT64 oldValue = *(INT64*)this; + BYTE* pOldValue = (BYTE*)&oldValue; + + if (pOldValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] != FixupPrecode::TypePrestub) + return FALSE; + + MethodDesc * pMD = (MethodDesc*)GetMethodDesc(); + g_IBCLogger.LogMethodPrecodeWriteAccess(pMD); + + INT64 newValue = oldValue; + BYTE* pNewValue = (BYTE*)&newValue; + + pNewValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] = FixupPrecode::Type; + + pOldValue[offsetof(FixupPrecode,m_op)] = X86_INSTR_CALL_REL32; + pNewValue[offsetof(FixupPrecode,m_op)] = X86_INSTR_JMP_REL32; + + *(INT32*)(&pNewValue[offsetof(FixupPrecode,m_rel32)]) = rel32UsingJumpStub(&m_rel32, target, pMD); + + _ASSERTE(IS_ALIGNED(this, sizeof(INT64))); + EnsureWritableExecutablePages(this, sizeof(INT64)); + return FastInterlockCompareExchangeLong((INT64*) this, newValue, oldValue) == oldValue; +} + +#ifdef FEATURE_NATIVE_IMAGE_GENERATION +// Partial initialization. Used to save regrouped chunks. +void FixupPrecode::InitForSave(int iPrecodeChunkIndex) +{ + m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk + m_type = FixupPrecode::TypePrestub; + + _ASSERTE(FitsInU1(iPrecodeChunkIndex)); + m_PrecodeChunkIndex = static_cast(iPrecodeChunkIndex); + + // The rest is initialized in code:FixupPrecode::Fixup +} + +void FixupPrecode::Fixup(DataImage *image, MethodDesc * pMD) +{ + STANDARD_VM_CONTRACT; + + // Note that GetMethodDesc() does not return the correct value because of + // regrouping of MethodDescs into hot and cold blocks. That's why the caller + // has to supply the actual MethodDesc + + SSIZE_T mdChunkOffset; + ZapNode * pMDChunkNode = image->GetNodeForStructure(pMD, &mdChunkOffset); + ZapNode * pHelperThunk = image->GetHelperThunk(CORINFO_HELP_EE_PRECODE_FIXUP); + + image->FixupFieldToNode(this, offsetof(FixupPrecode, m_rel32), + pHelperThunk, 0, IMAGE_REL_BASED_REL32); + + // Set the actual chunk index + FixupPrecode * pNewPrecode = (FixupPrecode *)image->GetImagePointer(this); + + size_t mdOffset = mdChunkOffset - sizeof(MethodDescChunk); + size_t chunkIndex = mdOffset / MethodDesc::ALIGNMENT; + _ASSERTE(FitsInU1(chunkIndex)); + pNewPrecode->m_MethodDescChunkIndex = (BYTE) chunkIndex; + + // Fixup the base of MethodDescChunk + if (m_PrecodeChunkIndex == 0) + { + image->FixupFieldToNode(this, (BYTE *)GetBase() - (BYTE *)this, + pMDChunkNode, sizeof(MethodDescChunk)); + } +} +#endif // FEATURE_NATIVE_IMAGE_GENERATION + +#endif // HAS_FIXUP_PRECODE + +#endif // !DACCESS_COMPILE + + +#ifdef HAS_THISPTR_RETBUF_PRECODE + +// rel32 jmp target that points back to the jump (infinite loop). +// Used to mark uninitialized ThisPtrRetBufPrecode target +#define REL32_JMP_SELF (-5) + +#ifndef DACCESS_COMPILE +void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) +{ + WRAPPER_NO_CONTRACT; + + IN_WIN64(m_nop1 = X86_INSTR_NOP;) // nop +#ifdef UNIX_AMD64_ABI + m_prefix1 = 0x48; + m_movScratchArg0 = 0xC78B; // mov rax,rdi + m_prefix2 = 0x48; + m_movArg0Arg1 = 0xFE8B; // mov rdi,rsi + m_prefix3 = 0x48; + m_movArg1Scratch = 0xF08B; // mov rsi,rax +#else + IN_WIN64(m_prefix1 = 0x48;) + m_movScratchArg0 = 0xC889; // mov r/eax,r/ecx + IN_WIN64(m_prefix2 = 0x48;) + m_movArg0Arg1 = 0xD189; // mov r/ecx,r/edx + IN_WIN64(m_prefix3 = 0x48;) + m_movArg1Scratch = 0xC289; // mov r/edx,r/eax +#endif + m_nop2 = X86_INSTR_NOP; // nop + m_jmp = X86_INSTR_JMP_REL32; // jmp rel32 + m_pMethodDesc = (TADDR)pMD; + + // This precode is never patched lazily - avoid unnecessary jump stub allocation + m_rel32 = REL32_JMP_SELF; +} + +BOOL ThisPtrRetBufPrecode::SetTargetInterlocked(TADDR target, TADDR expected) +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; + } + CONTRACTL_END; + + // This precode is never patched lazily - the interlocked semantics is not required. + _ASSERTE(m_rel32 == REL32_JMP_SELF); + + // Use pMD == NULL to allocate the jump stub in non-dynamic heap that has the same lifetime as the precode itself + m_rel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, ((MethodDesc *)GetMethodDesc())->GetLoaderAllocatorForCode()); + + return TRUE; +} +#endif // !DACCESS_COMPILE + +PCODE ThisPtrRetBufPrecode::GetTarget() +{ + LIMITED_METHOD_DAC_CONTRACT; + + // This precode is never patched lazily - pretend that the uninitialized m_rel32 points to prestub + if (m_rel32 == REL32_JMP_SELF) + return GetPreStubEntryPoint(); + + return rel32Decode(PTR_HOST_MEMBER_TADDR(ThisPtrRetBufPrecode, this, m_rel32)); +} + +#endif // HAS_THISPTR_RETBUF_PRECODE diff --git a/src/vm/i386/stublinkerx86.h b/src/vm/i386/stublinkerx86.h new file mode 100644 index 0000000000..237fc794d4 --- /dev/null +++ b/src/vm/i386/stublinkerx86.h @@ -0,0 +1,781 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#ifndef STUBLINKERX86_H_ +#define STUBLINKERX86_H_ + +#include "stublink.h" + +struct ArrayOpScript; +class MetaSig; + +//======================================================================= + +#define X86_INSTR_CALL_REL32 0xE8 // call rel32 +#define X86_INSTR_CALL_IND 0x15FF // call dword ptr[addr32] +#define X86_INSTR_CALL_IND_EAX 0x10FF // call dword ptr[eax] +#define X86_INSTR_CALL_IND_EAX_OFFSET 0x50FF // call dword ptr[eax + offset] ; where offset follows these 2 bytes +#define X86_INSTR_CALL_EAX 0xD0FF // call eax +#define X86_INSTR_JMP_REL32 0xE9 // jmp rel32 +#define X86_INSTR_JMP_IND 0x25FF // jmp dword ptr[addr32] +#define X86_INSTR_JMP_EAX 0xE0FF // jmp eax +#define X86_INSTR_MOV_EAX_IMM32 0xB8 // mov eax, imm32 +#define X86_INSTR_MOV_EAX_ECX_IND 0x018b // mov eax, [ecx] +#define X86_INSTR_CMP_IND_ECX_IMM32 0x3981 // cmp [ecx], imm32 +#define X86_INSTR_MOV_RM_R 0x89 // mov r/m,reg + +#define X86_INSTR_MOV_AL 0xB0 // mov al, imm8 +#define X86_INSTR_JMP_REL8 0xEB // jmp short rel8 + +#define X86_INSTR_NOP 0x90 // nop +#define X86_INSTR_NOP3_1 0x9090 // 1st word of 3-byte nop +#define X86_INSTR_NOP3_3 0x90 // 3rd byte of 3-byte nop +#define X86_INSTR_INT3 0xCC // int 3 +#define X86_INSTR_HLT 0xF4 // hlt + +#define X86_INSTR_MOVAPS_R_RM 0x280F // movaps xmm1, xmm2/mem128 +#define X86_INSTR_MOVAPS_RM_R 0x290F // movaps xmm1/mem128, xmm2 +#define X86_INSTR_MOVLPS_R_RM 0x120F // movlps xmm1, xmm2/mem128 +#define X86_INSTR_MOVLPS_RM_R 0x130F // movlps xmm1/mem128, xmm2 +#define X86_INSTR_MOVUPS_R_RM 0x100F // movups xmm1, xmm2/mem128 +#define X86_INSTR_MOVUPS_RM_R 0x110F // movups xmm1/mem128, xmm2 +#define X86_INSTR_XORPS 0x570F // xorps xmm1, xmm2/mem128 + +#ifdef _TARGET_AMD64_ +#define X86_INSTR_MOV_R10_IMM64 0xBA49 // mov r10, imm64 +#endif + +//---------------------------------------------------------------------- +// Encodes X86 registers. The numbers are chosen to match Intel's opcode +// encoding. +//---------------------------------------------------------------------- +enum X86Reg +{ + kEAX = 0, + kECX = 1, + kEDX = 2, + kEBX = 3, + // kESP intentionally omitted because of its irregular treatment in MOD/RM + kEBP = 5, + kESI = 6, + kEDI = 7, + +#ifdef _TARGET_X86_ + NumX86Regs = 8, +#endif // _TARGET_X86_ + + kXMM0 = 0, + kXMM1 = 1, + kXMM2 = 2, + kXMM3 = 3, + kXMM4 = 4, + kXMM5 = 5, +#if defined(_TARGET_AMD64_) + kXMM6 = 6, + kXMM7 = 7, + kXMM8 = 8, + kXMM9 = 9, + kXMM10 = 10, + kXMM11 = 11, + kXMM12 = 12, + kXMM13 = 13, + kXMM14 = 14, + kXMM15 = 15, + // Integer registers commence here + kRAX = 0, + kRCX = 1, + kRDX = 2, + kRBX = 3, + // kRSP intentionally omitted because of its irregular treatment in MOD/RM + kRBP = 5, + kRSI = 6, + kRDI = 7, + kR8 = 8, + kR9 = 9, + kR10 = 10, + kR11 = 11, + kR12 = 12, + kR13 = 13, + kR14 = 14, + kR15 = 15, + NumX86Regs = 16, + +#endif // _TARGET_AMD64_ + + // We use "push ecx" instead of "sub esp, sizeof(LPVOID)" + kDummyPushReg = kECX +}; + + +// Use this only if you are absolutely sure that the instruction format +// handles it. This is not declared as X86Reg so that users are forced +// to add a cast and think about what exactly they are doing. +const int kESP_Unsafe = 4; + +//---------------------------------------------------------------------- +// Encodes X86 conditional jumps. The numbers are chosen to match +// Intel's opcode encoding. +//---------------------------------------------------------------------- +class X86CondCode { + public: + enum cc { + kJA = 0x7, + kJAE = 0x3, + kJB = 0x2, + kJBE = 0x6, + kJC = 0x2, + kJE = 0x4, + kJZ = 0x4, + kJG = 0xf, + kJGE = 0xd, + kJL = 0xc, + kJLE = 0xe, + kJNA = 0x6, + kJNAE = 0x2, + kJNB = 0x3, + kJNBE = 0x7, + kJNC = 0x3, + kJNE = 0x5, + kJNG = 0xe, + kJNGE = 0xc, + kJNL = 0xd, + kJNLE = 0xf, + kJNO = 0x1, + kJNP = 0xb, + kJNS = 0x9, + kJNZ = 0x5, + kJO = 0x0, + kJP = 0xa, + kJPE = 0xa, + kJPO = 0xb, + kJS = 0x8, + }; +}; + +//---------------------------------------------------------------------- +// StubLinker with extensions for generating X86 code. +//---------------------------------------------------------------------- +class StubLinkerCPU : public StubLinker +{ + public: + +#ifdef _TARGET_AMD64_ + enum X86OperandSize + { + k32BitOp, + k64BitOp, + }; +#endif + + VOID X86EmitAddReg(X86Reg reg, INT32 imm32); + VOID X86EmitAddRegReg(X86Reg destreg, X86Reg srcReg); + VOID X86EmitSubReg(X86Reg reg, INT32 imm32); + VOID X86EmitSubRegReg(X86Reg destreg, X86Reg srcReg); + + VOID X86EmitMovRegReg(X86Reg destReg, X86Reg srcReg); + VOID X86EmitMovSPReg(X86Reg srcReg); + VOID X86EmitMovRegSP(X86Reg destReg); + + VOID X86EmitPushReg(X86Reg reg); + VOID X86EmitPopReg(X86Reg reg); + VOID X86EmitPushRegs(unsigned regSet); + VOID X86EmitPopRegs(unsigned regSet); + VOID X86EmitPushImm32(UINT value); + VOID X86EmitPushImm32(CodeLabel &pTarget); + VOID X86EmitPushImm8(BYTE value); + VOID X86EmitPushImmPtr(LPVOID value WIN64_ARG(X86Reg tmpReg = kR10)); + + VOID X86EmitCmpRegImm32(X86Reg reg, INT32 imm32); // cmp reg, imm32 + VOID X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32); // cmp [reg+offs], imm32 +#ifdef _TARGET_AMD64_ + VOID X64EmitCmp32RegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32); // cmp dword ptr [reg+offs], imm32 + + VOID X64EmitMovXmmXmm(X86Reg destXmmreg, X86Reg srcXmmReg); + VOID X64EmitMovdqaFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0); + VOID X64EmitMovdqaToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0); + VOID X64EmitMovSDFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0); + VOID X64EmitMovSDToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0); + VOID X64EmitMovSSFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0); + VOID X64EmitMovSSToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0); + + VOID X64EmitMovXmmWorker(BYTE prefix, BYTE opcode, X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0); +#endif + + VOID X86EmitZeroOutReg(X86Reg reg); + VOID X86EmitJumpReg(X86Reg reg); + + VOID X86EmitOffsetModRM(BYTE opcode, X86Reg altreg, X86Reg indexreg, __int32 ofs); + VOID X86EmitOffsetModRmSIB(BYTE opcode, X86Reg opcodeOrReg, X86Reg baseReg, X86Reg indexReg, __int32 scale, __int32 ofs); + + VOID X86EmitTailcallWithESPAdjust(CodeLabel *pTarget, INT32 imm32); + VOID X86EmitTailcallWithSinglePop(CodeLabel *pTarget, X86Reg reg); + + VOID X86EmitNearJump(CodeLabel *pTarget); + VOID X86EmitCondJump(CodeLabel *pTarget, X86CondCode::cc condcode); + VOID X86EmitCall(CodeLabel *target, int iArgBytes); + VOID X86EmitReturn(WORD wArgBytes); +#ifdef _TARGET_AMD64_ + VOID X86EmitLeaRIP(CodeLabel *target, X86Reg reg); +#endif + + static const unsigned X86TLSFetch_TRASHABLE_REGS = (1<COM+ interop or N/Direct + VOID EmitProfilerComCallProlog(TADDR pFrameVptr, X86Reg regFrame); + VOID EmitProfilerComCallEpilog(TADDR pFrameVptr, X86Reg regFrame); + + + + // Emits the most efficient form of the operation: + // + // opcode altreg, [basereg + scaledreg*scale + ofs] + // + // or + // + // opcode [basereg + scaledreg*scale + ofs], altreg + // + // (the opcode determines which comes first.) + // + // + // Limitations: + // + // scale must be 0,1,2,4 or 8. + // if scale == 0, scaledreg is ignored. + // basereg and altreg may be equal to 4 (ESP) but scaledreg cannot + // for some opcodes, "altreg" may actually select an operation + // rather than a second register argument. + // + + VOID X86EmitOp(WORD opcode, + X86Reg altreg, + X86Reg basereg, + __int32 ofs = 0, + X86Reg scaledreg = (X86Reg)0, + BYTE scale = 0 + AMD64_ARG(X86OperandSize OperandSize = k32BitOp) + ); + +#ifdef _TARGET_AMD64_ + FORCEINLINE + VOID X86EmitOp(WORD opcode, + X86Reg altreg, + X86Reg basereg, + __int32 ofs, + X86OperandSize OperandSize + ) + { + X86EmitOp(opcode, altreg, basereg, ofs, (X86Reg)0, 0, OperandSize); + } +#endif // _TARGET_AMD64_ + + // Emits + // + // opcode altreg, modrmreg + // + // or + // + // opcode modrmreg, altreg + // + // (the opcode determines which one comes first) + // + // For single-operand opcodes, "altreg" actually selects + // an operation rather than a register. + + VOID X86EmitR2ROp(WORD opcode, + X86Reg altreg, + X86Reg modrmreg + AMD64_ARG(X86OperandSize OperandSize = k64BitOp) + ); + + VOID X86EmitRegLoad(X86Reg reg, UINT_PTR imm); + + VOID X86EmitRegSave(X86Reg altreg, __int32 ofs) + { + LIMITED_METHOD_CONTRACT; + X86EmitEspOffset(0x89, altreg, ofs); + // X86Reg values never are outside a byte. + UnwindSavedReg(static_cast(altreg), ofs); + } + + VOID X86_64BitOperands () + { + WRAPPER_NO_CONTRACT; +#ifdef _TARGET_AMD64_ + Emit8(0x48); +#endif + } + + VOID EmitEnable(CodeLabel *pForwardRef); + VOID EmitRareEnable(CodeLabel *pRejoinPoint); + + VOID EmitDisable(CodeLabel *pForwardRef, BOOL fCallIn, X86Reg ThreadReg); + VOID EmitRareDisable(CodeLabel *pRejoinPoint); + VOID EmitRareDisableHRESULT(CodeLabel *pRejoinPoint, CodeLabel *pExitPoint); + + VOID EmitSetup(CodeLabel *pForwardRef); + VOID EmitRareSetup(CodeLabel* pRejoinPoint, BOOL fThrow); + VOID EmitCheckGSCookie(X86Reg frameReg, int gsCookieOffset); + +#ifdef _TARGET_X86_ + void EmitComMethodStubProlog(TADDR pFrameVptr, CodeLabel** rgRareLabels, + CodeLabel** rgRejoinLabels, BOOL bShouldProfile); + + void EmitComMethodStubEpilog(TADDR pFrameVptr, CodeLabel** rgRareLabels, + CodeLabel** rgRejoinLabels, BOOL bShouldProfile); +#endif + + VOID EmitMethodStubProlog(TADDR pFrameVptr, int transitionBlockOffset); + VOID EmitMethodStubEpilog(WORD numArgBytes, int transitionBlockOffset); + + VOID EmitUnboxMethodStub(MethodDesc* pRealMD); +#if defined(FEATURE_SHARE_GENERIC_CODE) + VOID EmitInstantiatingMethodStub(MethodDesc* pSharedMD, void* extra); +#endif // FEATURE_SHARE_GENERIC_CODE + +#if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_) + //======================================================================== + // shared Epilog for stubs that enter managed code from COM + // uses a return thunk within the method desc + void EmitSharedComMethodStubEpilog(TADDR pFrameVptr, + CodeLabel** rgRareLabels, + CodeLabel** rgRejoinLabels, + unsigned offsetReturnThunk, + BOOL bShouldProfile); +#endif // FEATURE_COMINTEROP && _TARGET_X86_ + + //=========================================================================== + // Computes hash code for MulticastDelegate.Invoke() + static UINT_PTR HashMulticastInvoke(MetaSig* pSig); + + //=========================================================================== + // Emits code for Delegate.Invoke() any delegate type + VOID EmitDelegateInvoke(); + + //=========================================================================== + // Emits code for MulticastDelegate.Invoke() - sig specific + VOID EmitMulticastInvoke(UINT_PTR hash); + + //=========================================================================== + // Emits code for Delegate.Invoke() on delegates that recorded creator assembly + VOID EmitSecureDelegateInvoke(UINT_PTR hash); + + //=========================================================================== + // Emits code to adjust for a static delegate target. + VOID EmitShuffleThunk(struct ShuffleEntry *pShuffleEntryArray); + + + //=========================================================================== + // Emits code to do an array operation. + VOID EmitArrayOpStub(const ArrayOpScript*); + + //Worker function to emit throw helpers for array ops. + VOID EmitArrayOpStubThrow(unsigned exConst, unsigned cbRetArg); + + //=========================================================================== + // Emits code to break into debugger + VOID EmitDebugBreak(); + +#if defined(_DEBUG) && (defined(_TARGET_AMD64_) || defined(_TARGET_X86_)) && !defined(FEATURE_PAL) + //=========================================================================== + // Emits code to log JITHelper access + void EmitJITHelperLoggingThunk(PCODE pJitHelper, LPVOID helperFuncCount); +#endif + +#ifdef _DEBUG + VOID X86EmitDebugTrashReg(X86Reg reg); +#endif + +#if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO) && !defined(CROSSGEN_COMPILE) + virtual VOID EmitUnwindInfoCheckWorker (CodeLabel *pCheckLabel); + virtual VOID EmitUnwindInfoCheckSubfunction(); +#endif + +#ifdef _TARGET_AMD64_ + + static Stub * CreateTailCallCopyArgsThunk(CORINFO_SIG_INFO * pSig, + CorInfoHelperTailCallSpecialHandling flags); + +#endif // _TARGET_AMD64_ + + private: + VOID X86EmitSubEspWorker(INT32 imm32); + + public: + static void Init(); + +}; + +inline TADDR rel32Decode(/*PTR_INT32*/ TADDR pRel32) +{ + LIMITED_METHOD_CONTRACT; + SUPPORTS_DAC; + return pRel32 + 4 + *PTR_INT32(pRel32); +} + +BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD); + +//------------------------------------------------------------------------ +// +// Precode definitions +// +//------------------------------------------------------------------------ + +EXTERN_C VOID STDCALL PrecodeFixupThunk(); + +#ifdef _WIN64 + +#define OFFSETOF_PRECODE_TYPE 0 +#define OFFSETOF_PRECODE_TYPE_CALL_OR_JMP 5 +#define OFFSETOF_PRECODE_TYPE_MOV_R10 10 + +#define SIZEOF_PRECODE_BASE 16 + +#else + +EXTERN_C VOID STDCALL PrecodeRemotingThunk(); + +#define OFFSETOF_PRECODE_TYPE 5 +#define OFFSETOF_PRECODE_TYPE_CALL_OR_JMP 5 +#define OFFSETOF_PRECODE_TYPE_MOV_RM_R 6 + +#define SIZEOF_PRECODE_BASE 8 + +#endif // _WIN64 + + +#include + +// Invalid precode type +struct InvalidPrecode { + // int3 + static const int Type = 0xCC; +}; + + +// Regular precode +struct StubPrecode { + +#ifdef _WIN64 + static const BYTE Type = 0x40; + // mov r10,pMethodDesc + // inc eax + // jmp Stub +#else + static const BYTE Type = 0xED; + // mov eax,pMethodDesc + // mov ebp,ebp + // jmp Stub +#endif // _WIN64 + + IN_WIN64(USHORT m_movR10;) + IN_WIN32(BYTE m_movEAX;) + TADDR m_pMethodDesc; + IN_WIN32(BYTE m_mov_rm_r;) + BYTE m_type; + BYTE m_jmp; + INT32 m_rel32; + + void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator = NULL, BYTE type = StubPrecode::Type, TADDR target = NULL); + + TADDR GetMethodDesc() + { + LIMITED_METHOD_DAC_CONTRACT; + + return m_pMethodDesc; + } + + PCODE GetTarget() + { + LIMITED_METHOD_DAC_CONTRACT; + + return rel32Decode(PTR_HOST_MEMBER_TADDR(StubPrecode, this, m_rel32)); + } + + BOOL SetTargetInterlocked(TADDR target, TADDR expected) + { + CONTRACTL + { + THROWS; + GC_TRIGGERS; + } + CONTRACTL_END; + + EnsureWritableExecutablePages(&m_rel32); + return rel32SetInterlocked(&m_rel32, target, expected, (MethodDesc*)GetMethodDesc()); + } +}; +IN_WIN64(static_assert_no_msg(offsetof(StubPrecode, m_movR10) == OFFSETOF_PRECODE_TYPE);) +IN_WIN64(static_assert_no_msg(offsetof(StubPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_R10);) +IN_WIN32(static_assert_no_msg(offsetof(StubPrecode, m_mov_rm_r) == OFFSETOF_PRECODE_TYPE);) +IN_WIN32(static_assert_no_msg(offsetof(StubPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_RM_R);) +typedef DPTR(StubPrecode) PTR_StubPrecode; + + +#ifdef HAS_NDIRECT_IMPORT_PRECODE + +// NDirect import precode +// (This is fake precode. VTable slot does not point to it.) +struct NDirectImportPrecode : StubPrecode { + +#ifdef _WIN64 + static const int Type = 0x48; + // mov r10,pMethodDesc + // dec eax + // jmp NDirectImportThunk +#else + static const int Type = 0xC0; + // mov eax,pMethodDesc + // mov eax,eax + // jmp NDirectImportThunk +#endif // _WIN64 + + void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); + + LPVOID GetEntrypoint() + { + LIMITED_METHOD_CONTRACT; + return this; + } +}; +typedef DPTR(NDirectImportPrecode) PTR_NDirectImportPrecode; + +#endif // HAS_NDIRECT_IMPORT_PRECODE + + +#ifdef HAS_REMOTING_PRECODE + +// Precode with embedded remoting interceptor +struct RemotingPrecode { + +#ifdef _WIN64 + static const int Type = XXX; // NYI + // mov r10,pMethodDesc + // call PrecodeRemotingThunk + // jmp Prestub/Stub/NativeCode +#else + static const int Type = 0x90; + // mov eax,pMethodDesc + // nop + // call PrecodeRemotingThunk + // jmp Prestub/Stub/NativeCode +#endif // _WIN64 + + IN_WIN64(USHORT m_movR10;) + IN_WIN32(BYTE m_movEAX;) + TADDR m_pMethodDesc; + BYTE m_type; + BYTE m_call; + INT32 m_callRel32; + BYTE m_jmp; + INT32 m_rel32; + + void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator = NULL); + + TADDR GetMethodDesc() + { + LIMITED_METHOD_CONTRACT; + SUPPORTS_DAC; + + return m_pMethodDesc; + } + + PCODE GetTarget() + { + LIMITED_METHOD_DAC_CONTRACT; + + return rel32Decode(PTR_HOST_MEMBER_TADDR(RemotingPrecode, this, m_rel32)); + } + + BOOL SetTargetInterlocked(TADDR target, TADDR expected) + { + CONTRACTL + { + THROWS; + GC_TRIGGERS; + } + CONTRACTL_END; + + EnsureWritableExecutablePages(&m_rel32); + return rel32SetInterlocked(&m_rel32, target, expected, (MethodDesc*)GetMethodDesc()); + } +}; +IN_WIN64(static_assert_no_msg(offsetof(RemotingPrecode, m_movR10) == OFFSETOF_PRECODE_TYPE);) +IN_WIN64(static_assert_no_msg(offsetof(RemotingPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_R10);) +IN_WIN32(static_assert_no_msg(offsetof(RemotingPrecode, m_type) == OFFSETOF_PRECODE_TYPE);) +typedef DPTR(RemotingPrecode) PTR_RemotingPrecode; + +#endif // HAS_REMOTING_PRECODE + + +#ifdef HAS_FIXUP_PRECODE + +// Fixup precode is used in ngen images when the prestub does just one time fixup. +// The fixup precode is simple jump once patched. It does not have the two instruction overhead of regular precode. +struct FixupPrecode { + + static const int TypePrestub = 0x5E; + // The entrypoint has to be 8-byte aligned so that the "call PrecodeFixupThunk" can be patched to "jmp NativeCode" atomically. + // call PrecodeFixupThunk + // db TypePrestub (pop esi) + // db MethodDescChunkIndex + // db PrecodeChunkIndex + + static const int Type = 0x5F; + // After it has been patched to point to native code + // jmp NativeCode + // db Type (pop edi) + + BYTE m_op; + INT32 m_rel32; + BYTE m_type; + BYTE m_MethodDescChunkIndex; + BYTE m_PrecodeChunkIndex; +#ifdef HAS_FIXUP_PRECODE_CHUNKS + // Fixup precode chunk is associated with MethodDescChunk. The layout of the fixup precode chunk is: + // + // FixupPrecode Entrypoint PrecodeChunkIndex = 2 + // FixupPrecode Entrypoint PrecodeChunkIndex = 1 + // FixupPrecode Entrypoint PrecodeChunkIndex = 0 + // TADDR Base of MethodDescChunk +#else + TADDR m_pMethodDesc; +#endif + + void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex = 0, int iPrecodeChunkIndex = 0); + +#ifdef HAS_FIXUP_PRECODE_CHUNKS + TADDR GetBase() + { + LIMITED_METHOD_CONTRACT; + SUPPORTS_DAC; + + return dac_cast(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode); + } + + TADDR GetMethodDesc(); +#else // HAS_FIXUP_PRECODE_CHUNKS + TADDR GetMethodDesc() + { + LIMITED_METHOD_CONTRACT; + return m_pMethodDesc; + } +#endif // HAS_FIXUP_PRECODE_CHUNKS + + PCODE GetTarget() + { + LIMITED_METHOD_DAC_CONTRACT; + + return rel32Decode(PTR_HOST_MEMBER_TADDR(FixupPrecode, this, m_rel32)); + } + + BOOL SetTargetInterlocked(TADDR target, TADDR expected); + + static BOOL IsFixupPrecodeByASM(TADDR addr) + { + LIMITED_METHOD_CONTRACT; + + return *dac_cast(addr) == X86_INSTR_JMP_REL32; + } + +#ifdef FEATURE_PREJIT + // Partial initialization. Used to save regrouped chunks. + void InitForSave(int iPrecodeChunkIndex); + + void Fixup(DataImage *image, MethodDesc * pMD); +#endif + +#ifdef DACCESS_COMPILE + void EnumMemoryRegions(CLRDataEnumMemoryFlags flags); +#endif +}; +IN_WIN32(static_assert_no_msg(offsetof(FixupPrecode, m_type) == OFFSETOF_PRECODE_TYPE)); +IN_WIN64(static_assert_no_msg(offsetof(FixupPrecode, m_op) == OFFSETOF_PRECODE_TYPE);) +IN_WIN64(static_assert_no_msg(offsetof(FixupPrecode, m_type) == OFFSETOF_PRECODE_TYPE_CALL_OR_JMP);) + +typedef DPTR(FixupPrecode) PTR_FixupPrecode; + +#endif // HAS_FIXUP_PRECODE + +#ifdef HAS_THISPTR_RETBUF_PRECODE + +// Precode to stuffle this and retbuf for closed delegates over static methods with return buffer +struct ThisPtrRetBufPrecode { + +#ifdef _WIN64 + static const int Type = 0x90; +#else + static const int Type = 0xC2; +#endif // _WIN64 + + // mov regScratch,regArg0 + // mov regArg0,regArg1 + // mov regArg1,regScratch + // nop + // jmp EntryPoint + // dw pMethodDesc + + IN_WIN64(BYTE m_nop1;) + IN_WIN64(BYTE m_prefix1;) + WORD m_movScratchArg0; + IN_WIN64(BYTE m_prefix2;) + WORD m_movArg0Arg1; + IN_WIN64(BYTE m_prefix3;) + WORD m_movArg1Scratch; + BYTE m_nop2; + BYTE m_jmp; + INT32 m_rel32; + TADDR m_pMethodDesc; + + void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); + + TADDR GetMethodDesc() + { + LIMITED_METHOD_CONTRACT; + SUPPORTS_DAC; + + return m_pMethodDesc; + } + + PCODE GetTarget(); + + BOOL SetTargetInterlocked(TADDR target, TADDR expected); +}; +IN_WIN32(static_assert_no_msg(offsetof(ThisPtrRetBufPrecode, m_movArg1Scratch) + 1 == OFFSETOF_PRECODE_TYPE);) +typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode; + +#endif // HAS_THISPTR_RETBUF_PRECODE + +#include + +#endif // STUBLINKERX86_H_ diff --git a/src/vm/i386/virtualcallstubcpu.hpp b/src/vm/i386/virtualcallstubcpu.hpp new file mode 100644 index 0000000000..33ce8199b9 --- /dev/null +++ b/src/vm/i386/virtualcallstubcpu.hpp @@ -0,0 +1,1077 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// File: virtualcallstubcpu.hpp +// + + +// + +// +// ============================================================================ + +#ifndef _VIRTUAL_CALL_STUB_X86_H +#define _VIRTUAL_CALL_STUB_X86_H + +#ifdef DECLARE_DATA +#include "asmconstants.h" +#ifdef FEATURE_REMOTING +#include "remoting.h" +#endif +#endif + +#include // Since we are placing code, we want byte packing of the structs + +#define USES_LOOKUP_STUBS 1 + +/********************************************************************************************* +Stubs that contain code are all part of larger structs called Holders. There is a +Holder for each kind of stub, i.e XXXStub is contained with XXXHolder. Holders are +essentially an implementation trick that allowed rearranging the code sequences more +easily while trying out different alternatives, and for dealing with any alignment +issues in a way that was mostly immune to the actually code sequences. These Holders +should be revisited when the stub code sequences are fixed, since in many cases they +add extra space to a stub that is not really needed. + +Stubs are placed in cache and hash tables. Since unaligned access of data in memory +is very slow, the keys used in those tables should be aligned. The things used as keys +typically also occur in the generated code, e.g. a token as an immediate part of an instruction. +For now, to avoid alignment computations as different code strategies are tried out, the key +fields are all in the Holders. Eventually, many of these fields should be dropped, and the instruction +streams aligned so that the immediate fields fall on aligned boundaries. +*/ + +#if USES_LOOKUP_STUBS + +struct LookupStub; +struct LookupHolder; + +/*LookupStub************************************************************************************** +Virtual and interface call sites are initially setup to point at LookupStubs. +This is because the runtime type of the pointer is not yet known, +so the target cannot be resolved. Note: if the jit is able to determine the runtime type +of the pointer, it should be generating a direct call not a virtual or interface call. +This stub pushes a lookup token onto the stack to identify the sought after method, and then +jumps into the EE (VirtualCallStubManager::ResolveWorkerStub) to effectuate the lookup and +transfer of control to the appropriate target method implementation, perhaps patching of the call site +along the way to point to a more appropriate stub. Hence callsites that point to LookupStubs +get quickly changed to point to another kind of stub. +*/ +struct LookupStub +{ + inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } + inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); } + +private: + friend struct LookupHolder; + + // DispatchStub:: _entryPoint expects: + // ecx: object (the "this" pointer) + // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call + BYTE _entryPoint [2]; // 50 push eax ;save siteAddrForRegisterIndirect - this may be an indirect call + // 68 push + size_t _token; // xx xx xx xx 32-bit constant +#ifdef STUB_LOGGING + BYTE cntr2[2]; // ff 05 inc + size_t* c_lookup; // xx xx xx xx [call_lookup_counter] +#endif //STUB_LOGGING + BYTE part2 [1]; // e9 jmp + DISPL _resolveWorkerDispl;// xx xx xx xx pc-rel displ +}; + +/* LookupHolders are the containers for LookupStubs, they provide for any alignment of +stubs as necessary. In the case of LookupStubs, alignment is necessary since +LookupStubs are placed in a hash table keyed by token. */ +struct LookupHolder +{ + static void InitializeStatic(); + + void Initialize(PCODE resolveWorkerTarget, size_t dispatchToken); + + LookupStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } + + static LookupHolder* FromLookupEntry(PCODE lookupEntry); + +private: + friend struct LookupStub; + + BYTE align[(sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*)))%sizeof(void*)]; + LookupStub _stub; + BYTE pad[sizeof(void*) - + ((sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*))) + + (sizeof(LookupStub)) + ) % sizeof(void*)]; //complete DWORD + + static_assert_no_msg((sizeof(void*) - + ((sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*))) + + (sizeof(LookupStub)) + ) % sizeof(void*)) != 0); +}; + +#endif // USES_LOOKUP_STUBS + +struct DispatchStub; +struct DispatchHolder; + +/*DispatchStub************************************************************************************** +Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs. +A dispatch stub has an expected type (expectedMT), target address (target) and fail address (failure). +If the calling frame does in fact have the type be of the expected type, then +control is transfered to the target address, the method implementation. If not, +then control is transfered to the fail address, a fail stub (see below) where a polymorphic +lookup is done to find the correct address to go to. + +implementation note: Order, choice of instructions, and branch directions +should be carefully tuned since it can have an inordinate effect on performance. Particular +attention needs to be paid to the effects on the BTB and branch prediction, both in the small +and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions. +Note that since this stub is only used for mostly monomorphic callsites (ones that are not, get patched +to something else), therefore the conditional jump "jne failure" is mostly not taken, and hence it is important +that the branch prediction staticly predict this, which means it must be a forward jump. The alternative +is to reverse the order of the jumps and make sure that the resulting conditional jump "je implTarget" +is statically predicted as taken, i.e a backward jump. The current choice was taken since it was easier +to control the placement of the stubs than control the placement of the jitted code and the stubs. */ +struct DispatchStub +{ + inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } + + inline size_t expectedMT() { LIMITED_METHOD_CONTRACT; return _expectedMT; } + inline PCODE implTarget() { LIMITED_METHOD_CONTRACT; return (PCODE) &_implDispl + sizeof(DISPL) + _implDispl; } + inline PCODE failTarget() { LIMITED_METHOD_CONTRACT; return (PCODE) &_failDispl + sizeof(DISPL) + _failDispl; } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(DispatchStub); } + +private: + friend struct DispatchHolder; + + // DispatchStub:: _entryPoint expects: + // ecx: object (the "this" pointer) + // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call +#ifndef STUB_LOGGING + BYTE _entryPoint [2]; // 81 39 cmp [ecx], ; This is the place where we are going to fault on null this. + size_t _expectedMT; // xx xx xx xx expectedMT ; If you change it, change also AdjustContextForVirtualStub in excep.cpp!!! + BYTE jmpOp1[2]; // 0f 85 jne + DISPL _failDispl; // xx xx xx xx failEntry ;must be forward jmp for perf reasons + BYTE jmpOp2; // e9 jmp + DISPL _implDispl; // xx xx xx xx implTarget +#else //STUB_LOGGING + BYTE _entryPoint [2]; // ff 05 inc + size_t* d_call; // xx xx xx xx [call_mono_counter] + BYTE cmpOp [2]; // 81 39 cmp [ecx], + size_t _expectedMT; // xx xx xx xx expectedMT + BYTE jmpOp1[2]; // 0f 84 je + DISPL _implDispl; // xx xx xx xx implTarget ;during logging, perf is not so important + BYTE fail [2]; // ff 05 inc + size_t* d_miss; // xx xx xx xx [miss_mono_counter] + BYTE jmpFail; // e9 jmp + DISPL _failDispl; // xx xx xx xx failEntry +#endif //STUB_LOGGING +}; + +/* DispatchHolders are the containers for DispatchStubs, they provide for any alignment of +stubs as necessary. DispatchStubs are placed in a hashtable and in a cache. The keys for both +are the pair expectedMT and token. Efficiency of the of the hash table is not a big issue, +since lookups in it are fairly rare. Efficiency of the cache is paramount since it is accessed frequently +o(see ResolveStub below). Currently we are storing both of these fields in the DispatchHolder to simplify +alignment issues. If inlineMT in the stub itself was aligned, then it could be the expectedMT field. +While the token field can be logically gotten by following the failure target to the failEntryPoint +of the ResolveStub and then to the token over there, for perf reasons of cache access, it is duplicated here. +This allows us to use DispatchStubs in the cache. The alternative is to provide some other immutable struct +for the cache composed of the triplet (expectedMT, token, target) and some sort of reclaimation scheme when +they are thrown out of the cache via overwrites (since concurrency will make the obvious approaches invalid). +*/ + +/* @workaround for ee resolution - Since the EE does not currently have a resolver function that +does what we want, see notes in implementation of VirtualCallStubManager::Resolver, we are +using dispatch stubs to siumulate what we want. That means that inlineTarget, which should be immutable +is in fact written. Hence we have moved target out into the holder and aligned it so we can +atomically update it. When we get a resolver function that does what we want, we can drop this field, +and live with just the inlineTarget field in the stub itself, since immutability will hold.*/ +struct DispatchHolder +{ + static void InitializeStatic(); + + void Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT); + + DispatchStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } + + static DispatchHolder* FromDispatchEntry(PCODE dispatchEntry); + +private: + //force expectedMT to be aligned since used as key in hash tables. +#ifndef STUB_LOGGING + BYTE align[(sizeof(void*)-(offsetof(DispatchStub,_expectedMT)%sizeof(void*)))%sizeof(void*)]; +#endif + DispatchStub _stub; + BYTE pad[(sizeof(void*)-(sizeof(DispatchStub)%sizeof(void*))+offsetof(DispatchStub,_expectedMT))%sizeof(void*)]; //complete DWORD +}; + +struct ResolveStub; +struct ResolveHolder; + +/*ResolveStub************************************************************************************** +Polymorphic call sites and monomorphic calls that fail end up in a ResolverStub. There is only +one resolver stub built for any given token, even though there may be many call sites that +use that token and many distinct types that are used in the calling call frames. A resolver stub +actually has two entry points, one for polymorphic call sites and one for dispatch stubs that fail on their +expectedMT test. There is a third part of the resolver stub that enters the ee when a decision should +be made about changing the callsite. Therefore, we have defined the resolver stub as three distinct pieces, +even though they are actually allocated as a single contiguous block of memory. These pieces are: + +A ResolveStub has two entry points: + +FailEntry - where the dispatch stub goes if the expected MT test fails. This piece of the stub does +a check to see how often we are actually failing. If failures are frequent, control transfers to the +patch piece to cause the call site to be changed from a mostly monomorphic callsite +(calls dispatch stub) to a polymorphic callsize (calls resolve stub). If failures are rare, control +transfers to the resolve piece (see ResolveStub). The failEntryPoint decrements a counter +every time it is entered. The ee at various times will add a large chunk to the counter. + +ResolveEntry - does a lookup via in a cache by hashing the actual type of the calling frame s + and the token identifying the (contract,method) pair desired. If found, control is transfered +to the method implementation. If not found in the cache, the token is pushed and the ee is entered via +the ResolveWorkerStub to do a full lookup and eventual transfer to the correct method implementation. Since +there is a different resolve stub for every token, the token can be inlined and the token can be pre-hashed. +The effectiveness of this approach is highly sensitive to the effectiveness of the hashing algorithm used, +as well as its speed. It turns out it is very important to make the hash function sensitive to all +of the bits of the method table, as method tables are laid out in memory in a very non-random way. Before +making any changes to the code sequences here, it is very important to measure and tune them as perf +can vary greatly, in unexpected ways, with seeming minor changes. + +Implementation note - Order, choice of instructions, and branch directions +should be carefully tuned since it can have an inordinate effect on performance. Particular +attention needs to be paid to the effects on the BTB and branch prediction, both in the small +and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions. +Note that this stub is called in highly polymorphic cases, but the cache should have been sized +and the hash function chosen to maximize the cache hit case. Hence the cmp/jcc instructions should +mostly be going down the cache hit route, and it is important that this be statically predicted as so. +Hence the 3 jcc instrs need to be forward jumps. As structured, there is only one jmp/jcc that typically +gets put in the BTB since all the others typically fall straight thru. Minimizing potential BTB entries +is important. */ + +struct ResolveStub +{ + inline PCODE failEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[0]; } + inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; } + inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; } + + inline INT32* pCounter() { LIMITED_METHOD_CONTRACT; return _pCounter; } + inline UINT32 hashedToken() { LIMITED_METHOD_CONTRACT; return _hashedToken >> LOG2_PTRSIZE; } + inline size_t cacheAddress() { LIMITED_METHOD_CONTRACT; return _cacheAddress; } + inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(ResolveStub); } + +private: + friend struct ResolveHolder; + + // ResolveStub::_failEntryPoint expects: + // ecx: object (the "this" pointer) + // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call + BYTE _failEntryPoint [2]; // 83 2d sub + INT32* _pCounter; // xx xx xx xx [counter], + BYTE part0 [2]; // 01 01 + // 7c jl + BYTE toPatcher; // xx backpatcher ;must be forward jump, for perf reasons + // ;fall into the resolver stub + + // ResolveStub::_resolveEntryPoint expects: + // ecx: object (the "this" pointer) + // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call + BYTE _resolveEntryPoint[6]; // 50 push eax ;save siteAddrForRegisterIndirect - this may be an indirect call + // 8b 01 mov eax,[ecx] ;get the method table from the "this" pointer. This is the place + // ; where we are going to fault on null this. If you change it, + // ; change also AdjustContextForVirtualStub in excep.cpp!!! + // 52 push edx + // 8b d0 mov edx, eax + BYTE part1 [6]; // c1 e8 0C shr eax,12 ;we are adding upper bits into lower bits of mt + // 03 c2 add eax,edx + // 35 xor eax, + UINT32 _hashedToken; // xx xx xx xx hashedToken ;along with pre-hashed token + BYTE part2 [1]; // 25 and eax, + size_t mask; // xx xx xx xx cache_mask + BYTE part3 [2]; // 8b 80 mov eax, [eax+ + size_t _cacheAddress; // xx xx xx xx lookupCache] +#ifdef STUB_LOGGING + BYTE cntr1[2]; // ff 05 inc + size_t* c_call; // xx xx xx xx [call_cache_counter] +#endif //STUB_LOGGING + BYTE part4 [2]; // 3b 10 cmp edx,[eax+ + // BYTE mtOffset; // ResolverCacheElem.pMT] + BYTE part5 [1]; // 75 jne + BYTE toMiss1; // xx miss ;must be forward jump, for perf reasons + BYTE part6 [2]; // 81 78 cmp [eax+ + BYTE tokenOffset; // xx ResolverCacheElem.token], + size_t _token; // xx xx xx xx token + BYTE part7 [1]; // 75 jne + BYTE toMiss2; // xx miss ;must be forward jump, for perf reasons + BYTE part8 [2]; // 8B 40 xx mov eax,[eax+ + BYTE targetOffset; // ResolverCacheElem.target] + BYTE part9 [6]; // 5a pop edx + // 83 c4 04 add esp,4 ;throw away siteAddrForRegisterIndirect - we don't need it now + // ff e0 jmp eax + // miss: + BYTE miss [1]; // 5a pop edx ; don't pop siteAddrForRegisterIndirect - leave it on the stack for use by ResolveWorkerChainLookupAsmStub and/or ResolveWorkerAsmStub + BYTE _slowEntryPoint[1]; // 68 push + size_t _tokenPush; // xx xx xx xx token +#ifdef STUB_LOGGING + BYTE cntr2[2]; // ff 05 inc + size_t* c_miss; // xx xx xx xx [miss_cache_counter] +#endif //STUB_LOGGING + BYTE part10 [1]; // e9 jmp + DISPL _resolveWorkerDispl; // xx xx xx xx resolveWorker == ResolveWorkerChainLookupAsmStub or ResolveWorkerAsmStub + BYTE patch[1]; // e8 call + DISPL _backpatcherDispl; // xx xx xx xx backpatcherWorker == BackPatchWorkerAsmStub + BYTE part11 [1]; // eb jmp + BYTE toResolveStub; // xx resolveStub, i.e. go back to _resolveEntryPoint +}; + +/* ResolveHolders are the containers for ResolveStubs, They provide +for any alignment of the stubs as necessary. The stubs are placed in a hash table keyed by +the token for which they are built. Efficiency of access requires that this token be aligned. +For now, we have copied that field into the ResolveHolder itself, if the resolve stub is arranged such that +any of its inlined tokens (non-prehashed) is aligned, then the token field in the ResolveHolder +is not needed. */ +struct ResolveHolder +{ + static void InitializeStatic(); + + void Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget, + size_t dispatchToken, UINT32 hashedToken, + void * cacheAddr, INT32 * counterAddr); + + ResolveStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } + + static ResolveHolder* FromFailEntry(PCODE failEntry); + static ResolveHolder* FromResolveEntry(PCODE resolveEntry); + +private: + //align _token in resolve stub + + BYTE align[(sizeof(void*)-((offsetof(ResolveStub,_token))%sizeof(void*)))%sizeof(void*) +#ifdef STUB_LOGGING // This turns out to be zero-sized in stub_logging case, and is an error. So round up. + +sizeof(void*) +#endif + ]; + + ResolveStub _stub; + +//#ifdef STUB_LOGGING // This turns out to be zero-sized in non stub_logging case, and is an error. So remove + BYTE pad[(sizeof(void*)-((sizeof(ResolveStub))%sizeof(void*))+offsetof(ResolveStub,_token))%sizeof(void*)]; //fill out DWORD +//#endif +}; +#include + + +#ifdef DECLARE_DATA + +#ifndef DACCESS_COMPILE + +#ifdef _MSC_VER + +#ifdef CHAIN_LOOKUP +/* This will perform a chained lookup of the entry if the initial cache lookup fails + + Entry stack: + dispatch token + siteAddrForRegisterIndirect (used only if this is a RegisterIndirect dispatch call) + return address of caller to stub + Also, EAX contains the pointer to the first ResolveCacheElem pointer for the calculated + bucket in the cache table. +*/ +__declspec (naked) void ResolveWorkerChainLookupAsmStub() +{ + enum + { + e_token_size = 4, + e_indirect_addr_size = 4, + e_caller_ret_addr_size = 4, + }; + enum + { + // this is the part of the stack that is present as we enter this function: + e_token = 0, + e_indirect_addr = e_token + e_token_size, + e_caller_ret_addr = e_indirect_addr + e_indirect_addr_size, + e_ret_esp = e_caller_ret_addr + e_caller_ret_addr_size, + }; + enum + { + e_spilled_reg_size = 8, + }; + + // main loop setup + __asm { +#ifdef STUB_LOGGING + inc g_chained_lookup_call_counter +#endif + // spill regs + push edx + push ecx + // move the token into edx + mov edx,[esp+e_spilled_reg_size+e_token] + // move the MT into ecx + mov ecx,[ecx] + } + main_loop: + __asm { + // get the next entry in the chain (don't bother checking the first entry again) + mov eax,[eax+e_resolveCacheElem_offset_next] + // test if we hit a terminating NULL + test eax,eax + jz fail + // compare the MT of the ResolveCacheElem + cmp ecx,[eax+e_resolveCacheElem_offset_mt] + jne main_loop + // compare the token of the ResolveCacheElem + cmp edx,[eax+e_resolveCacheElem_offset_token] + jne main_loop + // success + // decrement success counter and move entry to start if necessary + sub g_dispatch_cache_chain_success_counter,1 + //@TODO: Perhaps this should be a jl for better branch prediction? + jge nopromote + // be quick to reset the counter so we don't get a bunch of contending threads + add g_dispatch_cache_chain_success_counter,CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT + // promote the entry to the beginning of the chain + mov ecx,eax + call VirtualCallStubManager::PromoteChainEntry + } + nopromote: + __asm { + // clean up the stack and jump to the target + pop ecx + pop edx + add esp,(e_caller_ret_addr - e_token) + mov eax,[eax+e_resolveCacheElem_offset_target] + jmp eax + } + fail: + __asm { +#ifdef STUB_LOGGING + inc g_chained_lookup_miss_counter +#endif + // restore registers + pop ecx + pop edx + jmp ResolveWorkerAsmStub + } +} +#endif + +/* Call the resolver, it will return where we are supposed to go. + There is a little stack magic here, in that we are entered with one + of the arguments for the resolver (the token) on the stack already. + We just push the other arguments, in the call frame and the call site pointer, + and call the resolver. + + On return we have the stack frame restored to the way it was when the ResolveStub + was called, i.e. as it was at the actual call site. The return value from + the resolver is the address we need to transfer control to, simulating a direct + call from the original call site. If we get passed back NULL, it means that the + resolution failed, an unimpelemented method is being called. + + Entry stack: + dispatch token + siteAddrForRegisterIndirect (used only if this is a RegisterIndirect dispatch call) + return address of caller to stub + + Call stack: + pointer to TransitionBlock + call site + dispatch token + TransitionBlock + ArgumentRegisters (ecx, edx) + CalleeSavedRegisters (ebp, ebx, esi, edi) + return address of caller to stub + */ +__declspec (naked) void ResolveWorkerAsmStub() +{ + CANNOT_HAVE_CONTRACT; + + __asm { + // + // The stub arguments are where we want to setup the TransitionBlock. We will + // setup the TransitionBlock later once we can trash them + // + // push ebp-frame + // push ebp + // mov ebp,esp + + // save CalleeSavedRegisters + // push ebx + + push esi + push edi + + // push ArgumentRegisters + push ecx + push edx + + mov esi, esp + + push [esi + 4*4] // dispatch token + push [esi + 5*4] // siteAddrForRegisterIndirect + push esi // pTransitionBlock + + // Setup up proper EBP frame now that the stub arguments can be trashed + mov [esi + 4*4],ebx + mov [esi + 5*4],ebp + lea ebp, [esi + 5*4] + + // Make the call + call VSD_ResolveWorker + + // From here on, mustn't trash eax + + // pop ArgumentRegisters + pop edx + pop ecx + + // pop CalleeSavedRegisters + pop edi + pop esi + pop ebx + pop ebp + + // Now jump to the target + jmp eax // continue on into the method + } +} + +#ifdef FEATURE_REMOTING +/* For an in-context dispatch, we will find the target. This + is the slow path, and erects a MachState structure for + creating a HelperMethodFrame + + Entry stack: + dispatch token + return address of caller to stub + + Call stack: + pointer to StubDispatchFrame + call site + dispatch token + StubDispatchFrame + GSCookie + negspace + vptr + datum + ArgumentRegisters (ecx, edx) + CalleeSavedRegisters (ebp, ebx, esi, edi) + return address of caller to stub +*/ +__declspec (naked) void InContextTPDispatchAsmStub() +{ + CANNOT_HAVE_CONTRACT; + + __asm { + // Pop dispatch token + pop eax + + // push ebp-frame + push ebp + mov ebp,esp + + // save CalleeSavedRegisters + push ebx + push esi + push edi + + // push ArgumentRegisters + push ecx + push edx + + mov esi, esp + + push eax // token + push esi // pTransitionContext + + // Make the call + call VSD_GetTargetForTPWorker + + // From here on, mustn't trash eax + + // pop ArgumentRegisters + pop edx + pop ecx + + // pop CalleeSavedRegisters + pop edi + pop esi + pop ebx + pop ebp + + // Now jump to the target + jmp eax // continue on into the method + } +} + +/* For an in-context dispatch, we will try to find the target in + the resolve cache. If this fails, we will jump to the full + version of InContextTPDispatchAsmStub + + Entry stack: + dispatch slot number of interface MD + caller return address + ECX: this object +*/ +__declspec (naked) void InContextTPQuickDispatchAsmStub() +{ + CANNOT_HAVE_CONTRACT; + + __asm { + // Spill registers + push ecx + push edx + + // Arg 2 - token + mov eax, [esp + 8] + push eax + + // Arg 1 - this + push ecx + + // Make the call + call VSD_GetTargetForTPWorkerQuick + + // Restore registers + pop edx + pop ecx + + // Test to see if we found a target + test eax, eax + jnz TargetFound + + // If no target, jump to the slow worker + jmp InContextTPDispatchAsmStub + + TargetFound: + // We got a target, so pop off the token and jump to it + add esp,4 + jmp eax + } +} +#endif // FEATURE_REMOTING + +/* Call the callsite back patcher. The fail stub piece of the resolver is being +call too often, i.e. dispatch stubs are failing the expect MT test too often. +In this stub wraps the call to the BackPatchWorker to take care of any stack magic +needed. +*/ +__declspec (naked) void BackPatchWorkerAsmStub() +{ + CANNOT_HAVE_CONTRACT; + + __asm { + push EBP + mov ebp,esp + push EAX // it may contain siteAddrForRegisterIndirect + push ECX + push EDX + push EAX // push any indirect call address as the second arg to BackPatchWorker + push [EBP+8] // and push return address as the first arg to BackPatchWorker + call VirtualCallStubManager::BackPatchWorkerStatic + pop EDX + pop ECX + pop EAX + mov esp,ebp + pop ebp + ret + } +} + +#endif // _MSC_VER + +#ifdef _DEBUG +// +// This function verifies that a pointer to an indirection cell lives inside a delegate object. +// In the delegate case the indirection cell is held by the delegate itself in _methodPtrAux, when the delegate Invoke is +// called the shuffle thunk is first invoked and that will call into the virtual dispatch stub. +// Before control is given to the virtual dispatch stub a pointer to the indirection cell (thus an interior pointer to the delegate) +// is pushed in EAX +// +BOOL isDelegateCall(BYTE *interiorPtr) +{ + LIMITED_METHOD_CONTRACT; + + if (GCHeap::GetGCHeap()->IsHeapPointer((void*)interiorPtr)) + { + Object *delegate = (Object*)(interiorPtr - DelegateObject::GetOffsetOfMethodPtrAux()); + VALIDATEOBJECTREF(ObjectToOBJECTREF(delegate)); + _ASSERTE(delegate->GetMethodTable()->IsDelegate()); + + return TRUE; + } + return FALSE; +} +#endif + +StubCallSite::StubCallSite(TADDR siteAddrForRegisterIndirect, PCODE returnAddr) +{ + LIMITED_METHOD_CONTRACT; + + // Not used + // if (isCallRelative(returnAddr)) + // { + // m_siteAddr = returnAddr - sizeof(DISPL); + // } + // else + if (isCallRelativeIndirect((BYTE *)returnAddr)) + { + m_siteAddr = *dac_cast(returnAddr - sizeof(PCODE)); + } + else + { + _ASSERTE(isCallRegisterIndirect((BYTE *)returnAddr) || isDelegateCall((BYTE *)siteAddrForRegisterIndirect)); + m_siteAddr = dac_cast(siteAddrForRegisterIndirect); + } +} + +// the special return address for VSD tailcalls +extern "C" void STDCALL JIT_TailCallReturnFromVSD(); + +PCODE StubCallSite::GetCallerAddress() +{ + LIMITED_METHOD_CONTRACT; + if (m_returnAddr != (PCODE)JIT_TailCallReturnFromVSD) + return m_returnAddr; + + // Find the tailcallframe in the frame chain and get the actual caller from the first TailCallFrame + return TailCallFrame::FindTailCallFrame(GetThread()->GetFrame())->GetCallerAddress(); +} + +#ifdef STUB_LOGGING +extern size_t g_lookup_inline_counter; +extern size_t g_mono_call_counter; +extern size_t g_mono_miss_counter; +extern size_t g_poly_call_counter; +extern size_t g_poly_miss_counter; +#endif + +/* Template used to generate the stub. We generate a stub by allocating a block of + memory and copy the template over it and just update the specific fields that need + to be changed. +*/ +LookupStub lookupInit; + +void LookupHolder::InitializeStatic() +{ + static_assert_no_msg(((offsetof(LookupStub, _token)+offsetof(LookupHolder, _stub)) % sizeof(void*)) == 0); + static_assert_no_msg((sizeof(LookupHolder) % sizeof(void*)) == 0); + + lookupInit._entryPoint [0] = 0x50; + lookupInit._entryPoint [1] = 0x68; + static_assert_no_msg(sizeof(lookupInit._entryPoint) == 2); + lookupInit._token = 0xcccccccc; +#ifdef STUB_LOGGING + lookupInit.cntr2 [0] = 0xff; + lookupInit.cntr2 [1] = 0x05; + static_assert_no_msg(sizeof(lookupInit.cntr2) == 2); + lookupInit.c_lookup = &g_call_lookup_counter; +#endif //STUB_LOGGING + lookupInit.part2 [0] = 0xe9; + static_assert_no_msg(sizeof(lookupInit.part2) == 1); + lookupInit._resolveWorkerDispl = 0xcccccccc; +} + +void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken) +{ + _stub = lookupInit; + + //fill in the stub specific fields + //@TODO: Get rid of this duplication of data. + _stub._token = dispatchToken; + _stub._resolveWorkerDispl = resolveWorkerTarget - ((PCODE) &_stub._resolveWorkerDispl + sizeof(DISPL)); +} + +LookupHolder* LookupHolder::FromLookupEntry(PCODE lookupEntry) +{ + LIMITED_METHOD_CONTRACT; + LookupHolder* lookupHolder = (LookupHolder*) ( lookupEntry - offsetof(LookupHolder, _stub) - offsetof(LookupStub, _entryPoint) ); + // _ASSERTE(lookupHolder->_stub._entryPoint[0] == lookupInit._entryPoint[0]); + return lookupHolder; +} + + +/* Template used to generate the stub. We generate a stub by allocating a block of + memory and copy the template over it and just update the specific fields that need + to be changed. +*/ +DispatchStub dispatchInit; + +void DispatchHolder::InitializeStatic() +{ + // Check that _expectedMT is aligned in the DispatchHolder + static_assert_no_msg(((offsetof(DispatchHolder, _stub) + offsetof(DispatchStub,_expectedMT)) % sizeof(void*)) == 0); + static_assert_no_msg((sizeof(DispatchHolder) % sizeof(void*)) == 0); + +#ifndef STUB_LOGGING + dispatchInit._entryPoint [0] = 0x81; + dispatchInit._entryPoint [1] = 0x39; + static_assert_no_msg(sizeof(dispatchInit._entryPoint) == 2); + + dispatchInit._expectedMT = 0xcccccccc; + dispatchInit.jmpOp1 [0] = 0x0f; + dispatchInit.jmpOp1 [1] = 0x85; + static_assert_no_msg(sizeof(dispatchInit.jmpOp1) == 2); + + dispatchInit._failDispl = 0xcccccccc; + dispatchInit.jmpOp2 = 0xe9; + dispatchInit._implDispl = 0xcccccccc; +#else //STUB_LOGGING + dispatchInit._entryPoint [0] = 0xff; + dispatchInit._entryPoint [1] = 0x05; + static_assert_no_msg(sizeof(dispatchInit._entryPoint) == 2); + + dispatchInit.d_call = &g_mono_call_counter; + dispatchInit.cmpOp [0] = 0x81; + dispatchInit.cmpOp [1] = 0x39; + static_assert_no_msg(sizeof(dispatchInit.cmpOp) == 2); + + dispatchInit._expectedMT = 0xcccccccc; + dispatchInit.jmpOp1 [0] = 0x0f; + dispatchInit.jmpOp1 [1] = 0x84; + static_assert_no_msg(sizeof(dispatchInit.jmpOp1) == 2); + + dispatchInit._implDispl = 0xcccccccc; + dispatchInit.fail [0] = 0xff; + dispatchInit.fail [1] = 0x05; + static_assert_no_msg(sizeof(dispatchInit.fail) == 2); + + dispatchInit.d_miss = &g_mono_miss_counter; + dispatchInit.jmpFail = 0xe9; + dispatchInit._failDispl = 0xcccccccc; +#endif //STUB_LOGGING +}; + +void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT) +{ + _stub = dispatchInit; + + //fill in the stub specific fields + _stub._expectedMT = (size_t) expectedMT; + _stub._failDispl = failTarget - ((PCODE) &_stub._failDispl + sizeof(DISPL)); + _stub._implDispl = implTarget - ((PCODE) &_stub._implDispl + sizeof(DISPL)); +} + +DispatchHolder* DispatchHolder::FromDispatchEntry(PCODE dispatchEntry) +{ + LIMITED_METHOD_CONTRACT; + DispatchHolder* dispatchHolder = (DispatchHolder*) ( dispatchEntry - offsetof(DispatchHolder, _stub) - offsetof(DispatchStub, _entryPoint) ); + // _ASSERTE(dispatchHolder->_stub._entryPoint[0] == dispatchInit._entryPoint[0]); + return dispatchHolder; +} + + +/* Template used to generate the stub. We generate a stub by allocating a block of + memory and copy the template over it and just update the specific fields that need + to be changed. +*/ + +ResolveStub resolveInit; + +void ResolveHolder::InitializeStatic() +{ + //Check that _token is aligned in ResolveHolder + static_assert_no_msg(((offsetof(ResolveHolder, _stub) + offsetof(ResolveStub, _token)) % sizeof(void*)) == 0); + static_assert_no_msg((sizeof(ResolveHolder) % sizeof(void*)) == 0); + + resolveInit._failEntryPoint [0] = 0x83; + resolveInit._failEntryPoint [1] = 0x2d; + static_assert_no_msg(sizeof(resolveInit._failEntryPoint) == 2); + + resolveInit._pCounter = (INT32 *) (size_t) 0xcccccccc; + resolveInit.part0 [0] = 0x01; + resolveInit.part0 [1] = 0x7c; + static_assert_no_msg(sizeof(resolveInit.part0) == 2); + + resolveInit.toPatcher = (offsetof(ResolveStub, patch) - (offsetof(ResolveStub, toPatcher) + 1)) & 0xFF; + + resolveInit._resolveEntryPoint [0] = 0x50; + resolveInit._resolveEntryPoint [1] = 0x8b; + resolveInit._resolveEntryPoint [2] = 0x01; + resolveInit._resolveEntryPoint [3] = 0x52; + resolveInit._resolveEntryPoint [4] = 0x8b; + resolveInit._resolveEntryPoint [5] = 0xd0; + static_assert_no_msg(sizeof(resolveInit._resolveEntryPoint) == 6); + + resolveInit.part1 [0] = 0xc1; + resolveInit.part1 [1] = 0xe8; + resolveInit.part1 [2] = CALL_STUB_CACHE_NUM_BITS; + resolveInit.part1 [3] = 0x03; + resolveInit.part1 [4] = 0xc2; + resolveInit.part1 [5] = 0x35; + static_assert_no_msg(sizeof(resolveInit.part1) == 6); + + resolveInit._hashedToken = 0xcccccccc; + resolveInit.part2 [0] = 0x25; + static_assert_no_msg(sizeof(resolveInit.part2) == 1); + + resolveInit.mask = (CALL_STUB_CACHE_MASK << LOG2_PTRSIZE); + resolveInit.part3 [0] = 0x8b; + resolveInit.part3 [1] = 0x80;; + static_assert_no_msg(sizeof(resolveInit.part3) == 2); + + resolveInit._cacheAddress = 0xcccccccc; +#ifdef STUB_LOGGING + resolveInit.cntr1 [0] = 0xff; + resolveInit.cntr1 [1] = 0x05; + static_assert_no_msg(sizeof(resolveInit.cntr1) == 2); + + resolveInit.c_call = &g_poly_call_counter; +#endif //STUB_LOGGING + resolveInit.part4 [0] = 0x3b; + resolveInit.part4 [1] = 0x10; + static_assert_no_msg(sizeof(resolveInit.part4) == 2); + + // resolveInit.mtOffset = offsetof(ResolveCacheElem,pMT) & 0xFF; + static_assert_no_msg(offsetof(ResolveCacheElem,pMT) == 0); + + resolveInit.part5 [0] = 0x75; + static_assert_no_msg(sizeof(resolveInit.part5) == 1); + + resolveInit.toMiss1 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss1)+1); + + resolveInit.part6 [0] = 0x81; + resolveInit.part6 [1] = 0x78; + static_assert_no_msg(sizeof(resolveInit.part6) == 2); + + resolveInit.tokenOffset = offsetof(ResolveCacheElem,token) & 0xFF; + + resolveInit._token = 0xcccccccc; + + resolveInit.part7 [0] = 0x75; + static_assert_no_msg(sizeof(resolveInit.part7) == 1); + + resolveInit.part8 [0] = 0x8b; + resolveInit.part8 [1] = 0x40; + static_assert_no_msg(sizeof(resolveInit.part8) == 2); + + resolveInit.targetOffset = offsetof(ResolveCacheElem,target) & 0xFF; + + resolveInit.toMiss2 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss2)+1); + + resolveInit.part9 [0] = 0x5a; + resolveInit.part9 [1] = 0x83; + resolveInit.part9 [2] = 0xc4; + resolveInit.part9 [3] = 0x04; + resolveInit.part9 [4] = 0xff; + resolveInit.part9 [5] = 0xe0; + static_assert_no_msg(sizeof(resolveInit.part9) == 6); + + resolveInit.miss [0] = 0x5a; +// resolveInit.miss [1] = 0xb8; +// resolveInit._hashedTokenMov = 0xcccccccc; + resolveInit._slowEntryPoint [0] = 0x68; + resolveInit._tokenPush = 0xcccccccc; +#ifdef STUB_LOGGING + resolveInit.cntr2 [0] = 0xff; + resolveInit.cntr2 [1] = 0x05; + resolveInit.c_miss = &g_poly_miss_counter; +#endif //STUB_LOGGING + resolveInit.part10 [0] = 0xe9; + resolveInit._resolveWorkerDispl = 0xcccccccc; + + resolveInit.patch [0] = 0xe8; + resolveInit._backpatcherDispl = 0xcccccccc; + resolveInit.part11 [0] = 0xeb; + resolveInit.toResolveStub = (offsetof(ResolveStub, _resolveEntryPoint) - (offsetof(ResolveStub, toResolveStub) + 1)) & 0xFF; +}; + +void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget, + size_t dispatchToken, UINT32 hashedToken, + void * cacheAddr, INT32 * counterAddr) +{ + _stub = resolveInit; + + //fill in the stub specific fields + _stub._pCounter = counterAddr; + _stub._hashedToken = hashedToken << LOG2_PTRSIZE; + _stub._cacheAddress = (size_t) cacheAddr; + _stub._token = dispatchToken; +// _stub._hashedTokenMov = hashedToken; + _stub._tokenPush = dispatchToken; + _stub._resolveWorkerDispl = resolveWorkerTarget - ((PCODE) &_stub._resolveWorkerDispl + sizeof(DISPL)); + _stub._backpatcherDispl = patcherTarget - ((PCODE) &_stub._backpatcherDispl + sizeof(DISPL)); +} + +ResolveHolder* ResolveHolder::FromFailEntry(PCODE failEntry) +{ + LIMITED_METHOD_CONTRACT; + ResolveHolder* resolveHolder = (ResolveHolder*) ( failEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _failEntryPoint) ); + // _ASSERTE(resolveHolder->_stub._resolveEntryPoint[0] == resolveInit._resolveEntryPoint[0]); + return resolveHolder; +} + +ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry) +{ + LIMITED_METHOD_CONTRACT; + ResolveHolder* resolveHolder = (ResolveHolder*) ( resolveEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _resolveEntryPoint) ); + // _ASSERTE(resolveHolder->_stub._resolveEntryPoint[0] == resolveInit._resolveEntryPoint[0]); + return resolveHolder; +} + +#endif // DACCESS_COMPILE + +VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress) +{ + SUPPORTS_DAC; +#ifdef DACCESS_COMPILE + + return SK_BREAKPOINT; // Dac always uses the slower lookup + +#else + + StubKind stubKind = SK_UNKNOWN; + + EX_TRY + { + // If stubStartAddress is completely bogus, then this might AV, + // so we protect it with SEH. An AV here is OK. + AVInRuntimeImplOkayHolder AVOkay; + + WORD firstWord = *((WORD*) stubStartAddress); + +#ifndef STUB_LOGGING + if (firstWord == 0x3981) +#else //STUB_LOGGING + if (firstWord == 0x05ff) +#endif + { + stubKind = SK_DISPATCH; + } + else if (firstWord == 0x6850) + { + stubKind = SK_LOOKUP; + } + else if (firstWord == 0x8b50) + { + stubKind = SK_RESOLVE; + } + else + { + BYTE firstByte = ((BYTE*) stubStartAddress)[0]; + BYTE secondByte = ((BYTE*) stubStartAddress)[1]; + + if ((firstByte == X86_INSTR_INT3) || + (secondByte == X86_INSTR_INT3)) + { + stubKind = SK_BREAKPOINT; + } + } + } + EX_CATCH + { + stubKind = SK_UNKNOWN; + } + EX_END_CATCH(SwallowAllExceptions); + + return stubKind; + +#endif // DACCESS_COMPILE +} + +#endif //DECLARE_DATA + +#endif // _VIRTUAL_CALL_STUB_X86_H -- cgit v1.2.3