summaryrefslogtreecommitdiff
path: root/src/vm/i386/stublinkerx86.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm/i386/stublinkerx86.cpp')
-rw-r--r--src/vm/i386/stublinkerx86.cpp6806
1 files changed, 6806 insertions, 0 deletions
diff --git a/src/vm/i386/stublinkerx86.cpp b/src/vm/i386/stublinkerx86.cpp
new file mode 100644
index 0000000000..0037a7d3e6
--- /dev/null
+++ b/src/vm/i386/stublinkerx86.cpp
@@ -0,0 +1,6806 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+// NOTE on Frame Size C_ASSERT usage in this file
+// if the frame size changes then the stubs have to be revisited for correctness
+// kindly revist the logic and then update the constants so that the C_ASSERT will again fire
+// if someone changes the frame size. You are expected to keep this hard coded constant
+// up to date so that changes in the frame size trigger errors at compile time if the code is not altered
+
+// Precompiled Header
+
+#include "common.h"
+
+#include "field.h"
+#include "stublink.h"
+
+#include "tls.h"
+#include "frames.h"
+#include "excep.h"
+#include "dllimport.h"
+#include "log.h"
+#include "security.h"
+#include "comdelegate.h"
+#include "array.h"
+#include "jitinterface.h"
+#include "codeman.h"
+#ifdef FEATURE_REMOTING
+#include "remoting.h"
+#endif
+#include "dbginterface.h"
+#include "eeprofinterfaces.h"
+#include "eeconfig.h"
+#include "securitydeclarative.h"
+#ifdef _TARGET_X86_
+#include "asmconstants.h"
+#endif // _TARGET_X86_
+#include "class.h"
+#include "stublink.inl"
+
+#ifdef FEATURE_COMINTEROP
+#include "comtoclrcall.h"
+#include "runtimecallablewrapper.h"
+#include "comcache.h"
+#include "olevariant.h"
+#include "notifyexternals.h"
+#endif // FEATURE_COMINTEROP
+
+#ifdef FEATURE_PREJIT
+#include "compile.h"
+#endif
+
+#if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
+#include <psapi.h>
+#endif
+
+
+#ifndef DACCESS_COMPILE
+
+extern "C" VOID __cdecl StubRareEnable(Thread *pThread);
+#ifdef FEATURE_COMINTEROP
+extern "C" HRESULT __cdecl StubRareDisableHR(Thread *pThread);
+#endif // FEATURE_COMINTEROP
+extern "C" VOID __cdecl StubRareDisableTHROW(Thread *pThread, Frame *pFrame);
+
+extern "C" VOID __cdecl ArrayOpStubNullException(void);
+extern "C" VOID __cdecl ArrayOpStubRangeException(void);
+extern "C" VOID __cdecl ArrayOpStubTypeMismatchException(void);
+
+#if defined(_TARGET_AMD64_)
+#define EXCEPTION_HELPERS(base) \
+ extern "C" VOID __cdecl base##_RSIRDI_ScratchArea(void); \
+ extern "C" VOID __cdecl base##_ScratchArea(void); \
+ extern "C" VOID __cdecl base##_RSIRDI(void); \
+ extern "C" VOID __cdecl base(void)
+EXCEPTION_HELPERS(ArrayOpStubNullException);
+EXCEPTION_HELPERS(ArrayOpStubRangeException);
+EXCEPTION_HELPERS(ArrayOpStubTypeMismatchException);
+#undef EXCEPTION_HELPERS
+
+#if defined(_DEBUG)
+extern "C" VOID __cdecl DebugCheckStubUnwindInfo();
+#endif
+#endif // _TARGET_AMD64_
+
+// Presumably this code knows what it is doing with TLS. If we are hiding these
+// services from normal code, reveal them here.
+#ifdef TlsGetValue
+#undef TlsGetValue
+#endif
+
+#ifdef FEATURE_COMINTEROP
+Thread* __stdcall CreateThreadBlockReturnHr(ComMethodFrame *pFrame);
+#endif
+
+
+
+#ifdef _TARGET_AMD64_
+
+BOOL IsPreservedReg (X86Reg reg)
+{
+ UINT16 PreservedRegMask =
+ (1 << kRBX)
+ | (1 << kRBP)
+ | (1 << kRSI)
+ | (1 << kRDI)
+ | (1 << kR12)
+ | (1 << kR13)
+ | (1 << kR14)
+ | (1 << kR15);
+ return PreservedRegMask & (1 << reg);
+}
+
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_AMD64_
+//-----------------------------------------------------------------------
+// InstructionFormat for near Jump and short Jump
+//-----------------------------------------------------------------------
+
+//X64EmitTailcallWithRSPAdjust
+class X64NearJumpSetup : public InstructionFormat
+{
+ public:
+ X64NearJumpSetup() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
+ | InstructionFormat::k64Small | InstructionFormat::k64
+ )
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT
+ switch (refsize)
+ {
+ case k8:
+ return 0;
+
+ case k32:
+ return 0;
+
+ case k64Small:
+ return 5;
+
+ case k64:
+ return 10;
+
+ default:
+ _ASSERTE(!"unexpected refsize");
+ return 0;
+
+ }
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT
+ if (k8 == refsize)
+ {
+ // do nothing, X64NearJump will take care of this
+ }
+ else if (k32 == refsize)
+ {
+ // do nothing, X64NearJump will take care of this
+ }
+ else if (k64Small == refsize)
+ {
+ UINT64 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
+ _ASSERTE(FitsInU4(TargetAddress));
+
+ // mov eax, imm32 ; zero-extended
+ pOutBuffer[0] = 0xB8;
+ *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
+ }
+ else if (k64 == refsize)
+ {
+ // mov rax, imm64
+ pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ pOutBuffer[1] = 0xB8;
+ *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
+ }
+ else
+ {
+ _ASSERTE(!"unreached");
+ }
+ }
+
+ virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
+ {
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_FORBID_FAULT;
+
+
+ if (fExternal)
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ // For external, we don't have enough info to predict
+ // the offset.
+ return FALSE;
+
+ case InstructionFormat::k32:
+ return sizeof(PVOID) <= sizeof(UINT32);
+
+ case InstructionFormat::k64Small:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ else
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ return FitsInI1(offset);
+
+ case InstructionFormat::k32:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64Small:
+ // EmitInstruction emits a non-relative jmp for
+ // k64Small. We don't have enough info to predict the
+ // target address. (Even if we did, this would only
+ // handle the set of unsigned offsets with bit 31 set
+ // and no higher bits set, too uncommon/hard to test.)
+ return FALSE;
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ }
+};
+
+class X64NearJumpExecute : public InstructionFormat
+{
+ public:
+ X64NearJumpExecute() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
+ | InstructionFormat::k64Small | InstructionFormat::k64
+ )
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT
+ switch (refsize)
+ {
+ case k8:
+ return 2;
+
+ case k32:
+ return 5;
+
+ case k64Small:
+ return 3;
+
+ case k64:
+ return 3;
+
+ default:
+ _ASSERTE(!"unexpected refsize");
+ return 0;
+
+ }
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT
+ if (k8 == refsize)
+ {
+ pOutBuffer[0] = 0xeb;
+ *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
+ }
+ else if (k32 == refsize)
+ {
+ pOutBuffer[0] = 0xe9;
+ *((__int32*)(pOutBuffer+1)) = (__int32)fixedUpReference;
+ }
+ else if (k64Small == refsize)
+ {
+ // REX.W jmp rax
+ pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ pOutBuffer[1] = 0xFF;
+ pOutBuffer[2] = 0xE0;
+ }
+ else if (k64 == refsize)
+ {
+ // REX.W jmp rax
+ pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ pOutBuffer[1] = 0xFF;
+ pOutBuffer[2] = 0xE0;
+ }
+ else
+ {
+ _ASSERTE(!"unreached");
+ }
+ }
+
+ virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
+ {
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_FORBID_FAULT;
+
+
+ if (fExternal)
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ // For external, we don't have enough info to predict
+ // the offset.
+ return FALSE;
+
+ case InstructionFormat::k32:
+ return sizeof(PVOID) <= sizeof(UINT32);
+
+ case InstructionFormat::k64Small:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ else
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ return FitsInI1(offset);
+
+ case InstructionFormat::k32:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64Small:
+ // EmitInstruction emits a non-relative jmp for
+ // k64Small. We don't have enough info to predict the
+ // target address. (Even if we did, this would only
+ // handle the set of unsigned offsets with bit 31 set
+ // and no higher bits set, too uncommon/hard to test.)
+ return FALSE;
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ }
+};
+
+#endif
+
+//-----------------------------------------------------------------------
+// InstructionFormat for near Jump and short Jump
+//-----------------------------------------------------------------------
+class X86NearJump : public InstructionFormat
+{
+ public:
+ X86NearJump() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
+#ifdef _TARGET_AMD64_
+ | InstructionFormat::k64Small | InstructionFormat::k64
+#endif // _TARGET_AMD64_
+ )
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT
+ switch (refsize)
+ {
+ case k8:
+ return 2;
+
+ case k32:
+ return 5;
+#ifdef _TARGET_AMD64_
+ case k64Small:
+ return 5 + 2;
+
+ case k64:
+ return 12;
+#endif // _TARGET_AMD64_
+ default:
+ _ASSERTE(!"unexpected refsize");
+ return 0;
+
+ }
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT
+ if (k8 == refsize)
+ {
+ pOutBuffer[0] = 0xeb;
+ *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
+ }
+ else if (k32 == refsize)
+ {
+ pOutBuffer[0] = 0xe9;
+ *((__int32*)(pOutBuffer+1)) = (__int32)fixedUpReference;
+ }
+#ifdef _TARGET_AMD64_
+ else if (k64Small == refsize)
+ {
+ UINT64 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
+ _ASSERTE(FitsInU4(TargetAddress));
+
+ // mov eax, imm32 ; zero-extended
+ pOutBuffer[0] = 0xB8;
+ *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
+
+ // jmp rax
+ pOutBuffer[5] = 0xFF;
+ pOutBuffer[6] = 0xE0;
+ }
+ else if (k64 == refsize)
+ {
+ // mov rax, imm64
+ pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ pOutBuffer[1] = 0xB8;
+ *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
+
+ // jmp rax
+ pOutBuffer[10] = 0xFF;
+ pOutBuffer[11] = 0xE0;
+ }
+#endif // _TARGET_AMD64_
+ else
+ {
+ _ASSERTE(!"unreached");
+ }
+ }
+
+ virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
+ {
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_FORBID_FAULT;
+
+
+ if (fExternal)
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ // For external, we don't have enough info to predict
+ // the offset.
+ return FALSE;
+
+ case InstructionFormat::k32:
+ return sizeof(PVOID) <= sizeof(UINT32);
+
+#ifdef _TARGET_AMD64_
+ case InstructionFormat::k64Small:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+#endif
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ else
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k8:
+ return FitsInI1(offset);
+
+ case InstructionFormat::k32:
+#ifdef _TARGET_AMD64_
+ return FitsInI4(offset);
+#else
+ return TRUE;
+#endif
+
+#ifdef _TARGET_AMD64_
+ case InstructionFormat::k64Small:
+ // EmitInstruction emits a non-relative jmp for
+ // k64Small. We don't have enough info to predict the
+ // target address. (Even if we did, this would only
+ // handle the set of unsigned offsets with bit 31 set
+ // and no higher bits set, too uncommon/hard to test.)
+ return FALSE;
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+#endif
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ }
+};
+
+
+//-----------------------------------------------------------------------
+// InstructionFormat for conditional jump. Set the variationCode
+// to members of X86CondCode.
+//-----------------------------------------------------------------------
+class X86CondJump : public InstructionFormat
+{
+ public:
+ X86CondJump(UINT allowedSizes) : InstructionFormat(allowedSizes)
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT
+ return (refsize == k8 ? 2 : 6);
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT
+ if (refsize == k8)
+ {
+ pOutBuffer[0] = static_cast<BYTE>(0x70 | variationCode);
+ *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
+ }
+ else
+ {
+ pOutBuffer[0] = 0x0f;
+ pOutBuffer[1] = static_cast<BYTE>(0x80 | variationCode);
+ *((__int32*)(pOutBuffer+2)) = (__int32)fixedUpReference;
+ }
+ }
+};
+
+
+//-----------------------------------------------------------------------
+// InstructionFormat for near call.
+//-----------------------------------------------------------------------
+class X86Call : public InstructionFormat
+{
+ public:
+ X86Call ()
+ : InstructionFormat( InstructionFormat::k32
+#ifdef _TARGET_AMD64_
+ | InstructionFormat::k64Small | InstructionFormat::k64
+#endif // _TARGET_AMD64_
+ )
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ switch (refsize)
+ {
+ case k32:
+ return 5;
+
+#ifdef _TARGET_AMD64_
+ case k64Small:
+ return 5 + 2;
+
+ case k64:
+ return 10 + 2;
+#endif // _TARGET_AMD64_
+
+ default:
+ _ASSERTE(!"unexpected refsize");
+ return 0;
+ }
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT
+
+ switch (refsize)
+ {
+ case k32:
+ pOutBuffer[0] = 0xE8;
+ *((__int32*)(1+pOutBuffer)) = (__int32)fixedUpReference;
+ break;
+
+#ifdef _TARGET_AMD64_
+ case k64Small:
+ UINT64 TargetAddress;
+
+ TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
+ _ASSERTE(FitsInU4(TargetAddress));
+
+ // mov eax,<fixedUpReference> ; zero-extends
+ pOutBuffer[0] = 0xB8;
+ *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
+
+ // call rax
+ pOutBuffer[5] = 0xff;
+ pOutBuffer[6] = 0xd0;
+ break;
+
+ case k64:
+ // mov rax,<fixedUpReference>
+ pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ pOutBuffer[1] = 0xB8;
+ *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
+
+ // call rax
+ pOutBuffer[10] = 0xff;
+ pOutBuffer[11] = 0xd0;
+ break;
+#endif // _TARGET_AMD64_
+
+ default:
+ _ASSERTE(!"unreached");
+ break;
+ }
+ }
+
+// For x86, the default CanReach implementation will suffice. It only needs
+// to handle k32.
+#ifdef _TARGET_AMD64_
+ virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
+ {
+ if (fExternal)
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k32:
+ // For external, we don't have enough info to predict
+ // the offset.
+ return FALSE;
+
+ case InstructionFormat::k64Small:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ else
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k32:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64Small:
+ // EmitInstruction emits a non-relative jmp for
+ // k64Small. We don't have enough info to predict the
+ // target address. (Even if we did, this would only
+ // handle the set of unsigned offsets with bit 31 set
+ // and no higher bits set, too uncommon/hard to test.)
+ return FALSE;
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ }
+#endif // _TARGET_AMD64_
+};
+
+
+//-----------------------------------------------------------------------
+// InstructionFormat for push imm32.
+//-----------------------------------------------------------------------
+class X86PushImm32 : public InstructionFormat
+{
+ public:
+ X86PushImm32(UINT allowedSizes) : InstructionFormat(allowedSizes)
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ return 5;
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ pOutBuffer[0] = 0x68;
+ // only support absolute pushimm32 of the label address. The fixedUpReference is
+ // the offset to the label from the current point, so add to get address
+ *((__int32*)(1+pOutBuffer)) = (__int32)(fixedUpReference);
+ }
+};
+
+#if defined(_TARGET_AMD64_)
+//-----------------------------------------------------------------------
+// InstructionFormat for lea reg, [RIP relative].
+//-----------------------------------------------------------------------
+class X64LeaRIP : public InstructionFormat
+{
+ public:
+ X64LeaRIP() : InstructionFormat(InstructionFormat::k64Small)
+ {
+ LIMITED_METHOD_CONTRACT;
+ }
+
+ virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ return 7;
+ }
+
+ virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
+ {
+ if (fExternal)
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k64Small:
+ // For external, we don't have enough info to predict
+ // the offset.
+ return FALSE;
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ else
+ {
+ switch (refsize)
+ {
+ case InstructionFormat::k64Small:
+ return FitsInI4(offset);
+
+ case InstructionFormat::k64:
+ // intentional fallthru
+ case InstructionFormat::kAllowAlways:
+ return TRUE;
+
+ default:
+ _ASSERTE(0);
+ return FALSE;
+ }
+ }
+ }
+
+ virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ X86Reg reg = (X86Reg)variationCode;
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+
+ pOutBuffer[0] = rex;
+ pOutBuffer[1] = 0x8D;
+ pOutBuffer[2] = 0x05 | (reg << 3);
+ // only support absolute pushimm32 of the label address. The fixedUpReference is
+ // the offset to the label from the current point, so add to get address
+ *((__int32*)(3+pOutBuffer)) = (__int32)(fixedUpReference);
+ }
+};
+
+#endif // _TARGET_AMD64_
+
+#if defined(_TARGET_AMD64_)
+static BYTE gX64NearJumpSetup[sizeof(X64NearJumpSetup)];
+static BYTE gX64NearJumpExecute[sizeof(X64NearJumpExecute)];
+static BYTE gX64LeaRIP[sizeof(X64LeaRIP)];
+#endif
+
+static BYTE gX86NearJump[sizeof(X86NearJump)];
+static BYTE gX86CondJump[sizeof(X86CondJump)];
+static BYTE gX86Call[sizeof(X86Call)];
+static BYTE gX86PushImm32[sizeof(X86PushImm32)];
+
+/* static */ void StubLinkerCPU::Init()
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_NOTRIGGER;
+ INJECT_FAULT(COMPlusThrowOM(););
+ }
+ CONTRACTL_END;
+ new (gX86NearJump) X86NearJump();
+ new (gX86CondJump) X86CondJump( InstructionFormat::k8|InstructionFormat::k32);
+ new (gX86Call) X86Call();
+ new (gX86PushImm32) X86PushImm32(InstructionFormat::k32);
+
+#if defined(_TARGET_AMD64_)
+ new (gX64NearJumpSetup) X64NearJumpSetup();
+ new (gX64NearJumpExecute) X64NearJumpExecute();
+ new (gX64LeaRIP) X64LeaRIP();
+#endif
+}
+
+//---------------------------------------------------------------
+// Emits:
+// mov destReg, srcReg
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitMovRegReg(X86Reg destReg, X86Reg srcReg)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (destReg >= kR8)
+ {
+ rex |= REX_MODRM_RM_EXT;
+ destReg = X86RegFromAMD64Reg(destReg);
+ }
+ if (srcReg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ srcReg = X86RegFromAMD64Reg(srcReg);
+ }
+ Emit8(rex);
+#endif
+
+ Emit8(0x89);
+ Emit8(static_cast<UINT8>(0xC0 | (srcReg << 3) | destReg));
+}
+
+//---------------------------------------------------------------
+
+VOID StubLinkerCPU::X86EmitMovSPReg(X86Reg srcReg)
+{
+ STANDARD_VM_CONTRACT;
+ const X86Reg kESP = (X86Reg)4;
+ X86EmitMovRegReg(kESP, srcReg);
+}
+
+VOID StubLinkerCPU::X86EmitMovRegSP(X86Reg destReg)
+{
+ STANDARD_VM_CONTRACT;
+ const X86Reg kESP = (X86Reg)4;
+ X86EmitMovRegReg(destReg, kESP);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// PUSH <reg32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPushReg(X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef STUBLINKER_GENERATES_UNWIND_INFO
+ X86Reg origReg = reg;
+#endif
+
+#ifdef _TARGET_AMD64_
+ if (reg >= kR8)
+ {
+ Emit8(REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT);
+ reg = X86RegFromAMD64Reg(reg);
+ }
+#endif
+ Emit8(static_cast<UINT8>(0x50 + reg));
+
+#ifdef STUBLINKER_GENERATES_UNWIND_INFO
+ if (IsPreservedReg(origReg))
+ {
+ UnwindPushedReg(origReg);
+ }
+ else
+#endif
+ {
+ Push(sizeof(void*));
+ }
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// POP <reg32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPopReg(X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ if (reg >= kR8)
+ {
+ Emit8(REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT);
+ reg = X86RegFromAMD64Reg(reg);
+ }
+#endif // _TARGET_AMD64_
+
+ Emit8(static_cast<UINT8>(0x58 + reg));
+ Pop(sizeof(void*));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// PUSH <imm32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPushImm32(UINT32 value)
+{
+ STANDARD_VM_CONTRACT;
+
+ Emit8(0x68);
+ Emit32(value);
+ Push(sizeof(void*));
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// PUSH <imm32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPushImm32(CodeLabel &target)
+{
+ STANDARD_VM_CONTRACT;
+
+ EmitLabelRef(&target, reinterpret_cast<X86PushImm32&>(gX86PushImm32), 0);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// PUSH <imm8>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPushImm8(BYTE value)
+{
+ STANDARD_VM_CONTRACT;
+
+ Emit8(0x6a);
+ Emit8(value);
+ Push(sizeof(void*));
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// PUSH <ptr>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitPushImmPtr(LPVOID value WIN64_ARG(X86Reg tmpReg /*=kR10*/))
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ X86EmitRegLoad(tmpReg, (UINT_PTR) value);
+ X86EmitPushReg(tmpReg);
+#else
+ X86EmitPushImm32((UINT_PTR) value);
+#endif
+}
+
+//---------------------------------------------------------------
+// Emits:
+// XOR <reg32>,<reg32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitZeroOutReg(X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ // 32-bit results are zero-extended, so we only need the REX byte if
+ // it's an extended register.
+ if (reg >= kR8)
+ {
+ Emit8(REX_PREFIX_BASE | REX_MODRM_REG_EXT | REX_MODRM_RM_EXT);
+ reg = X86RegFromAMD64Reg(reg);
+ }
+#endif
+ Emit8(0x33);
+ Emit8(static_cast<UINT8>(0xc0 | (reg << 3) | reg));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// jmp [reg]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitJumpReg(X86Reg reg)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ }
+ CONTRACTL_END;
+
+ Emit8(0xff);
+ Emit8(static_cast<BYTE>(0xe0) | static_cast<BYTE>(reg));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// CMP <reg32>,imm32
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitCmpRegImm32(X86Reg reg, INT32 imm32)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION((int) reg < NumX86Regs);
+ }
+ CONTRACTL_END;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_OPCODE_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+#endif
+
+ if (FitsInI1(imm32)) {
+ Emit8(0x83);
+ Emit8(static_cast<UINT8>(0xF8 | reg));
+ Emit8((INT8)imm32);
+ } else {
+ Emit8(0x81);
+ Emit8(static_cast<UINT8>(0xF8 | reg));
+ Emit32(imm32);
+ }
+}
+
+#ifdef _TARGET_AMD64_
+//---------------------------------------------------------------
+// Emits:
+// CMP [reg+offs], imm32
+// CMP [reg], imm32
+//---------------------------------------------------------------
+VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
+{
+ STANDARD_VM_CONTRACT;
+
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_OPCODE_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+
+ X64EmitCmp32RegIndexImm32(reg, offs, imm32);
+}
+
+VOID StubLinkerCPU:: X64EmitCmp32RegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
+#else // _TARGET_AMD64_
+VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
+#endif // _TARGET_AMD64_
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION((int) reg < NumX86Regs);
+ }
+ CONTRACTL_END;
+
+ //
+ // The binary representation of "cmp [mem], imm32" is :
+ // 1000-00sw mod11-1r/m
+ //
+
+ unsigned wBit = (FitsInI1(imm32) ? 0 : 1);
+ Emit8(static_cast<UINT8>(0x80 | wBit));
+
+ unsigned modBits;
+ if (offs == 0)
+ modBits = 0;
+ else if (FitsInI1(offs))
+ modBits = 1;
+ else
+ modBits = 2;
+
+ Emit8(static_cast<UINT8>((modBits << 6) | 0x38 | reg));
+
+ if (offs)
+ {
+ if (FitsInI1(offs))
+ Emit8((INT8)offs);
+ else
+ Emit32(offs);
+ }
+
+ if (FitsInI1(imm32))
+ Emit8((INT8)imm32);
+ else
+ Emit32(imm32);
+}
+
+//---------------------------------------------------------------
+// Emits:
+#if defined(_TARGET_AMD64_)
+// mov rax, <target>
+// add rsp, imm32
+// jmp rax
+#else
+// add rsp, imm32
+// jmp <target>
+#endif
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitTailcallWithESPAdjust(CodeLabel *pTarget, INT32 imm32)
+{
+ STANDARD_VM_CONTRACT;
+
+#if defined(_TARGET_AMD64_)
+ EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpSetup&>(gX64NearJumpSetup), 0);
+ X86EmitAddEsp(imm32);
+ EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpExecute&>(gX64NearJumpExecute), 0);
+#else
+ X86EmitAddEsp(imm32);
+ X86EmitNearJump(pTarget);
+#endif
+}
+
+//---------------------------------------------------------------
+// Emits:
+#if defined(_TARGET_AMD64_)
+// mov rax, <target>
+// pop reg
+// jmp rax
+#else
+// pop reg
+// jmp <target>
+#endif
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitTailcallWithSinglePop(CodeLabel *pTarget, X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+
+#if defined(_TARGET_AMD64_)
+ EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpSetup&>(gX64NearJumpSetup), 0);
+ X86EmitPopReg(reg);
+ EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpExecute&>(gX64NearJumpExecute), 0);
+#else
+ X86EmitPopReg(reg);
+ X86EmitNearJump(pTarget);
+#endif
+}
+
+//---------------------------------------------------------------
+// Emits:
+// JMP <ofs8> or
+// JMP <ofs32}
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitNearJump(CodeLabel *target)
+{
+ STANDARD_VM_CONTRACT;
+ EmitLabelRef(target, reinterpret_cast<X86NearJump&>(gX86NearJump), 0);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// Jcc <ofs8> or
+// Jcc <ofs32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitCondJump(CodeLabel *target, X86CondCode::cc condcode)
+{
+ STANDARD_VM_CONTRACT;
+ EmitLabelRef(target, reinterpret_cast<X86CondJump&>(gX86CondJump), condcode);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// call <ofs32>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitCall(CodeLabel *target, int iArgBytes)
+{
+ STANDARD_VM_CONTRACT;
+
+ EmitLabelRef(target, reinterpret_cast<X86Call&>(gX86Call), 0);
+
+ INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
+ // we know that this is a call that can directly call
+ // managed code
+#ifndef _TARGET_AMD64_
+ Pop(iArgBytes);
+#endif // !_TARGET_AMD64_
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// ret n
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitReturn(WORD wArgBytes)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+#ifdef _TARGET_AMD64_
+ PRECONDITION(wArgBytes == 0);
+#endif
+
+ }
+ CONTRACTL_END;
+
+ if (wArgBytes == 0)
+ Emit8(0xc3);
+ else
+ {
+ Emit8(0xc2);
+ Emit16(wArgBytes);
+ }
+
+ Pop(wArgBytes);
+}
+
+#ifdef _TARGET_AMD64_
+//---------------------------------------------------------------
+// Emits:
+// JMP <ofs8> or
+// JMP <ofs32}
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitLeaRIP(CodeLabel *target, X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+ EmitLabelRef(target, reinterpret_cast<X64LeaRIP&>(gX64LeaRIP), reg);
+}
+#endif // _TARGET_AMD64_
+
+
+
+VOID StubLinkerCPU::X86EmitPushRegs(unsigned regSet)
+{
+ STANDARD_VM_CONTRACT;
+
+ for (X86Reg r = kEAX; r <= NumX86Regs; r = (X86Reg)(r+1))
+ if (regSet & (1U<<r))
+ {
+ X86EmitPushReg(r);
+ }
+}
+
+
+VOID StubLinkerCPU::X86EmitPopRegs(unsigned regSet)
+{
+ STANDARD_VM_CONTRACT;
+
+ for (X86Reg r = NumX86Regs; r >= kEAX; r = (X86Reg)(r-1))
+ if (regSet & (1U<<r))
+ X86EmitPopReg(r);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// mov <dstreg>, [<srcreg> + <ofs>]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexRegLoad(X86Reg dstreg,
+ X86Reg srcreg,
+ __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X86EmitOffsetModRM(0x8b, dstreg, srcreg, ofs);
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// mov [<dstreg> + <ofs>],<srcreg>
+//
+// Note: If you intend to use this to perform 64bit moves to a RSP
+// based offset, then this method may not work. Consider
+// using X86EmitIndexRegStoreRSP.
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexRegStore(X86Reg dstreg,
+ __int32 ofs,
+ X86Reg srcreg)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (dstreg != kESP_Unsafe)
+ X86EmitOffsetModRM(0x89, srcreg, dstreg, ofs);
+ else
+ X86EmitOp(0x89, srcreg, (X86Reg)kESP_Unsafe, ofs);
+}
+
+#if defined(_TARGET_AMD64_)
+//---------------------------------------------------------------
+// Emits:
+// mov [RSP + <ofs>],<srcreg>
+//
+// It marks the instruction has 64bit so that the processor
+// performs a 8byte data move to a RSP based stack location.
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexRegStoreRSP(__int32 ofs,
+ X86Reg srcreg)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitOp(0x89, srcreg, (X86Reg)kESP_Unsafe, ofs, (X86Reg)0, 0, k64BitOp);
+}
+
+//---------------------------------------------------------------
+// Emits:
+// mov [R12 + <ofs>],<srcreg>
+//
+// It marks the instruction has 64bit so that the processor
+// performs a 8byte data move to a R12 based stack location.
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexRegStoreR12(__int32 ofs,
+ X86Reg srcreg)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitOp(0x89, srcreg, (X86Reg)kR12, ofs, (X86Reg)0, 0, k64BitOp);
+}
+#endif // defined(_TARGET_AMD64_)
+
+//---------------------------------------------------------------
+// Emits:
+// push dword ptr [<srcreg> + <ofs>]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexPush(X86Reg srcreg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ if(srcreg != kESP_Unsafe)
+ X86EmitOffsetModRM(0xff, (X86Reg)0x6, srcreg, ofs);
+ else
+ X86EmitOp(0xff,(X86Reg)0x6, srcreg, ofs);
+
+ Push(sizeof(void*));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// push dword ptr [<baseReg> + <indexReg>*<scale> + <ofs>]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitBaseIndexPush(
+ X86Reg baseReg,
+ X86Reg indexReg,
+ __int32 scale,
+ __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitOffsetModRmSIB(0xff, (X86Reg)0x6, baseReg, indexReg, scale, ofs);
+ Push(sizeof(void*));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// push dword ptr [ESP + <ofs>]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitSPIndexPush(__int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ __int8 ofs8 = (__int8) ofs;
+ if (ofs == (__int32) ofs8)
+ {
+ // The offset can be expressed in a byte (can use the byte
+ // form of the push esp instruction)
+
+ BYTE code[] = {0xff, 0x74, 0x24, ofs8};
+ EmitBytes(code, sizeof(code));
+ }
+ else
+ {
+ // The offset requires 4 bytes (need to use the long form
+ // of the push esp instruction)
+
+ BYTE code[] = {0xff, 0xb4, 0x24, 0x0, 0x0, 0x0, 0x0};
+ *(__int32 *)(&code[3]) = ofs;
+ EmitBytes(code, sizeof(code));
+ }
+
+ Push(sizeof(void*));
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// pop dword ptr [<srcreg> + <ofs>]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexPop(X86Reg srcreg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ if(srcreg != kESP_Unsafe)
+ X86EmitOffsetModRM(0x8f, (X86Reg)0x0, srcreg, ofs);
+ else
+ X86EmitOp(0x8f,(X86Reg)0x0, srcreg, ofs);
+
+ Pop(sizeof(void*));
+}
+
+//---------------------------------------------------------------
+// Emits:
+// lea <dstreg>, [<srcreg> + <ofs>
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitIndexLea(X86Reg dstreg, X86Reg srcreg, __int32 ofs)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION((int) dstreg < NumX86Regs);
+ PRECONDITION((int) srcreg < NumX86Regs);
+ }
+ CONTRACTL_END;
+
+ X86EmitOffsetModRM(0x8d, dstreg, srcreg, ofs);
+}
+
+#if defined(_TARGET_AMD64_)
+VOID StubLinkerCPU::X86EmitIndexLeaRSP(X86Reg dstreg, X86Reg srcreg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitOp(0x8d, dstreg, (X86Reg)kESP_Unsafe, ofs, (X86Reg)0, 0, k64BitOp);
+}
+#endif // defined(_TARGET_AMD64_)
+
+//---------------------------------------------------------------
+// Emits:
+// sub esp, IMM
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitSubEsp(INT32 imm32)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (imm32 < 0x1000-100)
+ {
+ // As long as the esp size is less than 1 page plus a small
+ // safety fudge factor, we can just bump esp.
+ X86EmitSubEspWorker(imm32);
+ }
+ else
+ {
+ // Otherwise, must touch at least one byte for each page.
+ while (imm32 >= 0x1000)
+ {
+
+ X86EmitSubEspWorker(0x1000-4);
+ X86EmitPushReg(kEAX);
+
+ imm32 -= 0x1000;
+ }
+ if (imm32 < 500)
+ {
+ X86EmitSubEspWorker(imm32);
+ }
+ else
+ {
+ // If the remainder is large, touch the last byte - again,
+ // as a fudge factor.
+ X86EmitSubEspWorker(imm32-4);
+ X86EmitPushReg(kEAX);
+ }
+ }
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// sub esp, IMM
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitSubEspWorker(INT32 imm32)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // On Win32, stacks must be faulted in one page at a time.
+ PRECONDITION(imm32 < 0x1000);
+ }
+ CONTRACTL_END;
+
+ if (!imm32)
+ {
+ // nop
+ }
+ else
+ {
+ X86_64BitOperands();
+
+ if (FitsInI1(imm32))
+ {
+ Emit16(0xec83);
+ Emit8((INT8)imm32);
+ }
+ else
+ {
+ Emit16(0xec81);
+ Emit32(imm32);
+ }
+
+ Push(imm32);
+ }
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// add esp, IMM
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitAddEsp(INT32 imm32)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (!imm32)
+ {
+ // nop
+ }
+ else
+ {
+ X86_64BitOperands();
+
+ if (FitsInI1(imm32))
+ {
+ Emit16(0xc483);
+ Emit8((INT8)imm32);
+ }
+ else
+ {
+ Emit16(0xc481);
+ Emit32(imm32);
+ }
+ }
+ Pop(imm32);
+}
+
+VOID StubLinkerCPU::X86EmitAddReg(X86Reg reg, INT32 imm32)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION((int) reg < NumX86Regs);
+ }
+ CONTRACTL_END;
+
+ if (imm32 == 0)
+ return;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_OPCODE_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+#endif
+
+ if (FitsInI1(imm32)) {
+ Emit8(0x83);
+ Emit8(static_cast<UINT8>(0xC0 | reg));
+ Emit8(static_cast<UINT8>(imm32));
+ } else {
+ Emit8(0x81);
+ Emit8(static_cast<UINT8>(0xC0 | reg));
+ Emit32(imm32);
+ }
+}
+
+//---------------------------------------------------------------
+// Emits: add destReg, srcReg
+//---------------------------------------------------------------
+
+VOID StubLinkerCPU::X86EmitAddRegReg(X86Reg destReg, X86Reg srcReg)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitR2ROp(0x01, srcReg, destReg);
+}
+
+
+
+
+VOID StubLinkerCPU::X86EmitSubReg(X86Reg reg, INT32 imm32)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION((int) reg < NumX86Regs);
+ }
+ CONTRACTL_END;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_OPCODE_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+#endif
+
+ if (FitsInI1(imm32)) {
+ Emit8(0x83);
+ Emit8(static_cast<UINT8>(0xE8 | reg));
+ Emit8(static_cast<UINT8>(imm32));
+ } else {
+ Emit8(0x81);
+ Emit8(static_cast<UINT8>(0xE8 | reg));
+ Emit32(imm32);
+ }
+}
+
+//---------------------------------------------------------------
+// Emits: sub destReg, srcReg
+//---------------------------------------------------------------
+
+VOID StubLinkerCPU::X86EmitSubRegReg(X86Reg destReg, X86Reg srcReg)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitR2ROp(0x29, srcReg, destReg);
+}
+
+#if defined(_TARGET_AMD64_)
+
+//---------------------------------------------------------------
+// movdqa destXmmreg, srcXmmReg
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovXmmXmm(X86Reg destXmmreg, X86Reg srcXmmReg)
+{
+ STANDARD_VM_CONTRACT;
+ // There are several that could be used to mov xmm registers. MovAps is
+ // what C++ compiler uses so let's use it here too.
+ X86EmitR2ROp(X86_INSTR_MOVAPS_R_RM, destXmmreg, srcXmmReg, k32BitOp);
+}
+
+//---------------------------------------------------------------
+// movdqa XmmN, [baseReg + offset]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovdqaFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0x66, 0x6F, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// movdqa [baseReg + offset], XmmN
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovdqaToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0x66, 0x7F, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// movsd XmmN, [baseReg + offset]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovSDFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0xF2, 0x10, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// movsd [baseReg + offset], XmmN
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovSDToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0xF2, 0x11, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// movss XmmN, [baseReg + offset]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovSSFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0xF3, 0x10, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// movss [baseReg + offset], XmmN
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovSSToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovXmmWorker(0xF3, 0x11, Xmmreg, baseReg, ofs);
+}
+
+//---------------------------------------------------------------
+// Helper method for emitting of XMM from/to memory moves
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovXmmWorker(BYTE prefix, BYTE opcode, X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ BYTE codeBuffer[10];
+ unsigned int nBytes = 0;
+
+ // Setup the legacyPrefix for movsd
+ codeBuffer[nBytes++] = prefix;
+
+ // By default, assume we dont have to emit the REX byte.
+ bool fEmitRex = false;
+
+ BYTE rex = REX_PREFIX_BASE;
+
+ if (baseReg >= kR8)
+ {
+ rex |= REX_MODRM_RM_EXT;
+ baseReg = X86RegFromAMD64Reg(baseReg);
+ fEmitRex = true;
+ }
+ if (Xmmreg >= kXMM8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ Xmmreg = X86RegFromAMD64Reg(Xmmreg);
+ fEmitRex = true;
+ }
+
+ if (fEmitRex == true)
+ {
+ codeBuffer[nBytes++] = rex;
+ }
+
+ // Next, specify the two byte opcode - first byte is always 0x0F.
+ codeBuffer[nBytes++] = 0x0F;
+ codeBuffer[nBytes++] = opcode;
+
+ BYTE modrm = static_cast<BYTE>((Xmmreg << 3) | baseReg);
+ bool fOffsetFitsInSignedByte = FitsInI1(ofs)?true:false;
+
+ if (fOffsetFitsInSignedByte)
+ codeBuffer[nBytes++] = 0x40|modrm;
+ else
+ codeBuffer[nBytes++] = 0x80|modrm;
+
+ // If we are dealing with RSP or R12 as the baseReg, we need to emit the SIB byte.
+ if ((baseReg == (X86Reg)4 /*kRSP*/) || (baseReg == kR12))
+ {
+ codeBuffer[nBytes++] = 0x24;
+ }
+
+ // Finally, specify the offset
+ if (fOffsetFitsInSignedByte)
+ {
+ codeBuffer[nBytes++] = (BYTE)ofs;
+ }
+ else
+ {
+ *((__int32*)(codeBuffer+nBytes)) = ofs;
+ nBytes += 4;
+ }
+
+ _ASSERTE(nBytes <= _countof(codeBuffer));
+
+ // Lastly, emit the encoded bytes
+ EmitBytes(codeBuffer, nBytes);
+}
+
+#endif // defined(_TARGET_AMD64_)
+
+//---------------------------------------------------------------
+// Emits a MOD/RM for accessing a dword at [<indexreg> + ofs32]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitOffsetModRM(BYTE opcode, X86Reg opcodereg, X86Reg indexreg, __int32 ofs)
+{
+ STANDARD_VM_CONTRACT;
+
+ BYTE codeBuffer[7];
+ BYTE* code = codeBuffer;
+ int nBytes = 0;
+#ifdef _TARGET_AMD64_
+ code++;
+ //
+ // code points to base X86 instruction,
+ // codeBuffer points to full AMD64 instruction
+ //
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (indexreg >= kR8)
+ {
+ rex |= REX_MODRM_RM_EXT;
+ indexreg = X86RegFromAMD64Reg(indexreg);
+ }
+ if (opcodereg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ opcodereg = X86RegFromAMD64Reg(opcodereg);
+ }
+
+ nBytes++;
+ code[-1] = rex;
+#endif
+ code[0] = opcode;
+ nBytes++;
+ BYTE modrm = static_cast<BYTE>((opcodereg << 3) | indexreg);
+ if (ofs == 0 && indexreg != kEBP)
+ {
+ code[1] = modrm;
+ nBytes++;
+ EmitBytes(codeBuffer, nBytes);
+ }
+ else if (FitsInI1(ofs))
+ {
+ code[1] = 0x40|modrm;
+ code[2] = (BYTE)ofs;
+ nBytes += 2;
+ EmitBytes(codeBuffer, nBytes);
+ }
+ else
+ {
+ code[1] = 0x80|modrm;
+ *((__int32*)(2+code)) = ofs;
+ nBytes += 5;
+ EmitBytes(codeBuffer, nBytes);
+ }
+}
+
+//---------------------------------------------------------------
+// Emits a MOD/RM for accessing a dword at [<baseReg> + <indexReg>*<scale> + ofs32]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitOffsetModRmSIB(BYTE opcode, X86Reg opcodeOrReg, X86Reg baseReg, X86Reg indexReg, __int32 scale, __int32 ofs)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION(scale == 1 || scale == 2 || scale == 4 || scale == 8);
+ PRECONDITION(indexReg != kESP_Unsafe);
+ }
+ CONTRACTL_END;
+
+ BYTE codeBuffer[8];
+ BYTE* code = codeBuffer;
+ int nBytes = 0;
+
+#ifdef _TARGET_AMD64_
+ _ASSERTE(!"NYI");
+#endif
+ code[0] = opcode;
+ nBytes++;
+
+ BYTE scaleEnc = 0;
+ switch(scale)
+ {
+ case 1: scaleEnc = 0; break;
+ case 2: scaleEnc = 1; break;
+ case 4: scaleEnc = 2; break;
+ case 8: scaleEnc = 3; break;
+ default: _ASSERTE(!"Unexpected");
+ }
+
+ BYTE sib = static_cast<BYTE>((scaleEnc << 6) | (indexReg << 3) | baseReg);
+
+ if (FitsInI1(ofs))
+ {
+ code[1] = static_cast<BYTE>(0x44 | (opcodeOrReg << 3));
+ code[2] = sib;
+ code[3] = (BYTE)ofs;
+ nBytes += 3;
+ EmitBytes(codeBuffer, nBytes);
+ }
+ else
+ {
+ code[1] = static_cast<BYTE>(0x84 | (opcodeOrReg << 3));
+ code[2] = sib;
+ *(__int32*)(&code[3]) = ofs;
+ nBytes += 6;
+ EmitBytes(codeBuffer, nBytes);
+ }
+}
+
+
+
+VOID StubLinkerCPU::X86EmitRegLoad(X86Reg reg, UINT_PTR imm)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (!imm)
+ {
+ X86EmitZeroOutReg(reg);
+ return;
+ }
+
+ UINT cbimm = sizeof(void*);
+
+#ifdef _TARGET_AMD64_
+ // amd64 zero-extends all 32-bit operations. If the immediate will fit in
+ // 32 bits, use the smaller encoding.
+
+ if (reg >= kR8 || !FitsInU4(imm))
+ {
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ if (reg >= kR8)
+ {
+ rex |= REX_MODRM_RM_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+ }
+ else
+ {
+ // amd64 is little endian, so the &imm below will correctly read off
+ // the low 4 bytes.
+ cbimm = sizeof(UINT32);
+ }
+#endif // _TARGET_AMD64_
+ Emit8(0xB8 | (BYTE)reg);
+ EmitBytes((BYTE*)&imm, cbimm);
+}
+
+
+//---------------------------------------------------------------
+// Emits the most efficient form of the operation:
+//
+// opcode altreg, [basereg + scaledreg*scale + ofs]
+//
+// or
+//
+// opcode [basereg + scaledreg*scale + ofs], altreg
+//
+// (the opcode determines which comes first.)
+//
+//
+// Limitations:
+//
+// scale must be 0,1,2,4 or 8.
+// if scale == 0, scaledreg is ignored.
+// basereg and altreg may be equal to 4 (ESP) but scaledreg cannot
+// for some opcodes, "altreg" may actually select an operation
+// rather than a second register argument.
+// if basereg is EBP, scale must be 0.
+//
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitOp(WORD opcode,
+ X86Reg altreg,
+ X86Reg basereg,
+ __int32 ofs /*=0*/,
+ X86Reg scaledreg /*=0*/,
+ BYTE scale /*=0*/
+ AMD64_ARG(X86OperandSize OperandSize /*= k32BitOp*/))
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // All 2-byte opcodes start with 0x0f.
+ PRECONDITION(!(opcode >> 8) || (opcode & 0xff) == 0x0f);
+
+ PRECONDITION(scale == 0 || scale == 1 || scale == 2 || scale == 4 || scale == 8);
+ PRECONDITION(scaledreg != (X86Reg)4);
+ PRECONDITION(!(basereg == kEBP && scale != 0));
+
+ PRECONDITION( ((UINT)basereg) < NumX86Regs );
+ PRECONDITION( ((UINT)scaledreg) < NumX86Regs );
+ PRECONDITION( ((UINT)altreg) < NumX86Regs );
+ }
+ CONTRACTL_END;
+
+#ifdef _TARGET_AMD64_
+ if ( k64BitOp == OperandSize
+ || altreg >= kR8
+ || basereg >= kR8
+ || scaledreg >= kR8)
+ {
+ BYTE rex = REX_PREFIX_BASE;
+
+ if (k64BitOp == OperandSize)
+ rex |= REX_OPERAND_SIZE_64BIT;
+
+ if (altreg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ altreg = X86RegFromAMD64Reg(altreg);
+ }
+
+ if (basereg >= kR8)
+ {
+ // basereg might be in the modrm or sib fields. This will be
+ // decided below, but the encodings are the same either way.
+ _ASSERTE(REX_SIB_BASE_EXT == REX_MODRM_RM_EXT);
+ rex |= REX_SIB_BASE_EXT;
+ basereg = X86RegFromAMD64Reg(basereg);
+ }
+
+ if (scaledreg >= kR8)
+ {
+ rex |= REX_SIB_INDEX_EXT;
+ scaledreg = X86RegFromAMD64Reg(scaledreg);
+ }
+
+ Emit8(rex);
+ }
+#endif // _TARGET_AMD64_
+
+ BYTE modrmbyte = static_cast<BYTE>(altreg << 3);
+ BOOL fNeedSIB = FALSE;
+ BYTE SIBbyte = 0;
+ BYTE ofssize;
+ BYTE scaleselect= 0;
+
+ if (ofs == 0 && basereg != kEBP)
+ {
+ ofssize = 0; // Don't change this constant!
+ }
+ else if (FitsInI1(ofs))
+ {
+ ofssize = 1; // Don't change this constant!
+ }
+ else
+ {
+ ofssize = 2; // Don't change this constant!
+ }
+
+ switch (scale)
+ {
+ case 1: scaleselect = 0; break;
+ case 2: scaleselect = 1; break;
+ case 4: scaleselect = 2; break;
+ case 8: scaleselect = 3; break;
+ }
+
+ if (scale == 0 && basereg != (X86Reg)4 /*ESP*/)
+ {
+ // [basereg + ofs]
+ modrmbyte |= basereg | (ofssize << 6);
+ }
+ else if (scale == 0)
+ {
+ // [esp + ofs]
+ _ASSERTE(basereg == (X86Reg)4);
+ fNeedSIB = TRUE;
+ SIBbyte = 0044;
+
+ modrmbyte |= 4 | (ofssize << 6);
+ }
+ else
+ {
+
+ //[basereg + scaledreg*scale + ofs]
+
+ modrmbyte |= 0004 | (ofssize << 6);
+ fNeedSIB = TRUE;
+ SIBbyte = static_cast<BYTE>((scaleselect << 6) | (scaledreg << 3) | basereg);
+
+ }
+
+ //Some sanity checks:
+ _ASSERTE(!(fNeedSIB && basereg == kEBP)); // EBP not valid as a SIB base register.
+ _ASSERTE(!( (!fNeedSIB) && basereg == (X86Reg)4 )) ; // ESP addressing requires SIB byte
+
+ Emit8((BYTE)opcode);
+
+ if (opcode >> 8)
+ Emit8(opcode >> 8);
+
+ Emit8(modrmbyte);
+ if (fNeedSIB)
+ {
+ Emit8(SIBbyte);
+ }
+ switch (ofssize)
+ {
+ case 0: break;
+ case 1: Emit8( (__int8)ofs ); break;
+ case 2: Emit32( ofs ); break;
+ default: _ASSERTE(!"Can't get here.");
+ }
+}
+
+
+// Emits
+//
+// opcode altreg, modrmreg
+//
+// or
+//
+// opcode modrmreg, altreg
+//
+// (the opcode determines which one comes first)
+//
+// For single-operand opcodes, "altreg" actually selects
+// an operation rather than a register.
+
+VOID StubLinkerCPU::X86EmitR2ROp (WORD opcode,
+ X86Reg altreg,
+ X86Reg modrmreg
+ AMD64_ARG(X86OperandSize OperandSize /*= k64BitOp*/)
+ )
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // All 2-byte opcodes start with 0x0f.
+ PRECONDITION(!(opcode >> 8) || (opcode & 0xff) == 0x0f);
+
+ PRECONDITION( ((UINT)altreg) < NumX86Regs );
+ PRECONDITION( ((UINT)modrmreg) < NumX86Regs );
+ }
+ CONTRACTL_END;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = 0;
+
+ if (modrmreg >= kR8)
+ {
+ rex |= REX_MODRM_RM_EXT;
+ modrmreg = X86RegFromAMD64Reg(modrmreg);
+ }
+
+ if (altreg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ altreg = X86RegFromAMD64Reg(altreg);
+ }
+
+ if (k64BitOp == OperandSize)
+ rex |= REX_OPERAND_SIZE_64BIT;
+
+ if (rex)
+ Emit8(REX_PREFIX_BASE | rex);
+#endif // _TARGET_AMD64_
+
+ Emit8((BYTE)opcode);
+
+ if (opcode >> 8)
+ Emit8(opcode >> 8);
+
+ Emit8(static_cast<UINT8>(0300 | (altreg << 3) | modrmreg));
+}
+
+
+//---------------------------------------------------------------
+// Emits:
+// op altreg, [esp+ofs]
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitEspOffset(BYTE opcode,
+ X86Reg altreg,
+ __int32 ofs
+ AMD64_ARG(X86OperandSize OperandSize /*= k64BitOp*/)
+ )
+{
+ STANDARD_VM_CONTRACT;
+
+ BYTE codeBuffer[8];
+ BYTE *code = codeBuffer;
+ int nBytes;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = 0;
+
+ if (k64BitOp == OperandSize)
+ rex |= REX_OPERAND_SIZE_64BIT;
+
+ if (altreg >= kR8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ altreg = X86RegFromAMD64Reg(altreg);
+ }
+
+ if (rex)
+ {
+ *code = (REX_PREFIX_BASE | rex);
+ code++;
+ nBytes = 1;
+ }
+ else
+#endif // _TARGET_AMD64_
+ {
+ nBytes = 0;
+ }
+
+ code[0] = opcode;
+ BYTE modrm = static_cast<BYTE>((altreg << 3) | 004);
+ if (ofs == 0)
+ {
+ code[1] = modrm;
+ code[2] = 0044;
+ EmitBytes(codeBuffer, 3 + nBytes);
+ }
+ else if (FitsInI1(ofs))
+ {
+ code[1] = 0x40|modrm;
+ code[2] = 0044;
+ code[3] = (BYTE)ofs;
+ EmitBytes(codeBuffer, 4 + nBytes);
+ }
+ else
+ {
+ code[1] = 0x80|modrm;
+ code[2] = 0044;
+ *((__int32*)(3+code)) = ofs;
+ EmitBytes(codeBuffer, 7 + nBytes);
+ }
+
+}
+
+//---------------------------------------------------------------
+
+VOID StubLinkerCPU::X86EmitPushEBPframe()
+{
+ STANDARD_VM_CONTRACT;
+
+ // push ebp
+ X86EmitPushReg(kEBP);
+ // mov ebp,esp
+ X86EmitMovRegSP(kEBP);
+}
+
+#ifdef _DEBUG
+//---------------------------------------------------------------
+// Emits:
+// mov <reg32>,0xcccccccc
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitDebugTrashReg(X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+
+ if (reg >= kR8)
+ {
+ rex |= REX_OPCODE_REG_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ Emit8(rex);
+ Emit8(0xb8|reg);
+ Emit64(0xcccccccccccccccc);
+#else
+ Emit8(static_cast<UINT8>(0xb8 | reg));
+ Emit32(0xcccccccc);
+#endif
+}
+#endif //_DEBUG
+
+
+// Get X86Reg indexes of argument registers based on offset into ArgumentRegister
+X86Reg GetX86ArgumentRegisterFromOffset(size_t ofs)
+{
+ CONTRACT(X86Reg)
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+
+ }
+ CONTRACT_END;
+
+ #define ARGUMENT_REGISTER(reg) if (ofs == offsetof(ArgumentRegisters, reg)) RETURN k##reg ;
+ ENUM_ARGUMENT_REGISTERS();
+ #undef ARGUMENT_REGISTER
+
+ _ASSERTE(0);//Can't get here.
+ RETURN kEBP;
+}
+
+
+#ifdef _TARGET_AMD64_
+static const X86Reg c_argRegs[] = {
+ #define ARGUMENT_REGISTER(regname) k##regname,
+ ENUM_ARGUMENT_REGISTERS()
+ #undef ARGUMENT_REGISTER
+};
+#endif
+
+
+#ifndef CROSSGEN_COMPILE
+
+#if defined(_DEBUG) && (defined(_TARGET_AMD64_) || defined(_TARGET_X86_)) && !defined(FEATURE_PAL)
+void StubLinkerCPU::EmitJITHelperLoggingThunk(PCODE pJitHelper, LPVOID helperFuncCount)
+{
+ STANDARD_VM_CONTRACT;
+
+ VMHELPCOUNTDEF* pHelperFuncCount = (VMHELPCOUNTDEF*)helperFuncCount;
+/*
+ push rcx
+ mov rcx, &(pHelperFuncCount->count)
+ lock inc [rcx]
+ pop rcx
+#ifdef _TARGET_AMD64_
+ mov rax, <pJitHelper>
+ jmp rax
+#else
+ jmp <pJitHelper>
+#endif
+*/
+
+ // push rcx
+ // mov rcx, &(pHelperFuncCount->count)
+ X86EmitPushReg(kECX);
+ X86EmitRegLoad(kECX, (UINT_PTR)(&(pHelperFuncCount->count)));
+
+ // lock inc [rcx]
+ BYTE lock_inc_RCX[] = { 0xf0, 0xff, 0x01 };
+ EmitBytes(lock_inc_RCX, sizeof(lock_inc_RCX));
+
+#if defined(_TARGET_AMD64_)
+ // mov rax, <pJitHelper>
+ // pop rcx
+ // jmp rax
+#else
+ // pop rcx
+ // jmp <pJitHelper>
+#endif
+ X86EmitTailcallWithSinglePop(NewExternalCodeLabel(pJitHelper), kECX);
+}
+#endif // _DEBUG && (_TARGET_AMD64_ || _TARGET_X86_) && !FEATURE_PAL
+
+#ifndef FEATURE_IMPLICIT_TLS
+//---------------------------------------------------------------
+// Emit code to store the current Thread structure in dstreg
+// preservedRegSet is a set of registers to be preserved
+// TRASHES EAX, EDX, ECX unless they are in preservedRegSet.
+// RESULTS dstreg = current Thread
+//---------------------------------------------------------------
+VOID StubLinkerCPU::X86EmitTLSFetch(DWORD idx, X86Reg dstreg, unsigned preservedRegSet)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // It doesn't make sense to have the destination register be preserved
+ PRECONDITION((preservedRegSet & (1<<dstreg)) == 0);
+ AMD64_ONLY(PRECONDITION(dstreg < 8)); // code below doesn't support high registers
+ }
+ CONTRACTL_END;
+
+ TLSACCESSMODE mode = GetTLSAccessMode(idx);
+
+#ifdef _DEBUG
+ {
+ static BOOL f = TRUE;
+ f = !f;
+ if (f)
+ {
+ mode = TLSACCESS_GENERIC;
+ }
+ }
+#endif
+
+ switch (mode)
+ {
+ case TLSACCESS_WNT:
+ {
+ unsigned __int32 tlsofs = offsetof(TEB, TlsSlots) + (idx * sizeof(void*));
+#ifdef _TARGET_AMD64_
+ BYTE code[] = {0x65,0x48,0x8b,0x04,0x25}; // mov dstreg, qword ptr gs:[IMM32]
+ static const int regByteIndex = 3;
+#elif defined(_TARGET_X86_)
+ BYTE code[] = {0x64,0x8b,0x05}; // mov dstreg, dword ptr fs:[IMM32]
+ static const int regByteIndex = 2;
+#endif
+ code[regByteIndex] |= (dstreg << 3);
+
+ EmitBytes(code, sizeof(code));
+ Emit32(tlsofs);
+ }
+ break;
+
+ case TLSACCESS_GENERIC:
+
+ X86EmitPushRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+ X86EmitPushImm32(idx);
+#ifdef _TARGET_AMD64_
+ X86EmitPopReg (kECX); // arg in reg
+#endif
+
+ // call TLSGetValue
+ X86EmitCall(NewExternalCodeLabel((LPVOID) TlsGetValue), sizeof(void*));
+
+ // mov dstreg, eax
+ X86EmitMovRegReg(dstreg, kEAX);
+
+ X86EmitPopRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+ break;
+
+ default:
+ _ASSERTE(0);
+ }
+
+#ifdef _DEBUG
+ // Trash caller saved regs that we were not told to preserve, and that aren't the dstreg.
+ preservedRegSet |= 1<<dstreg;
+ if (!(preservedRegSet & (1<<kEAX)))
+ X86EmitDebugTrashReg(kEAX);
+ if (!(preservedRegSet & (1<<kEDX)))
+ X86EmitDebugTrashReg(kEDX);
+ if (!(preservedRegSet & (1<<kECX)))
+ X86EmitDebugTrashReg(kECX);
+#endif
+
+}
+#endif // FEATURE_IMPLICIT_TLS
+
+VOID StubLinkerCPU::X86EmitCurrentThreadFetch(X86Reg dstreg, unsigned preservedRegSet)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // It doesn't make sense to have the destination register be preserved
+ PRECONDITION((preservedRegSet & (1<<dstreg)) == 0);
+ AMD64_ONLY(PRECONDITION(dstreg < 8)); // code below doesn't support high registers
+ }
+ CONTRACTL_END;
+
+#ifdef FEATURE_IMPLICIT_TLS
+
+ X86EmitPushRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+ //TODO: Inline the instruction instead of a call
+ // call GetThread
+ X86EmitCall(NewExternalCodeLabel((LPVOID) GetThread), sizeof(void*));
+
+ // mov dstreg, eax
+ X86EmitMovRegReg(dstreg, kEAX);
+
+ X86EmitPopRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+#ifdef _DEBUG
+ // Trash caller saved regs that we were not told to preserve, and that aren't the dstreg.
+ preservedRegSet |= 1<<dstreg;
+ if (!(preservedRegSet & (1<<kEAX)))
+ X86EmitDebugTrashReg(kEAX);
+ if (!(preservedRegSet & (1<<kEDX)))
+ X86EmitDebugTrashReg(kEDX);
+ if (!(preservedRegSet & (1<<kECX)))
+ X86EmitDebugTrashReg(kECX);
+#endif // _DEBUG
+
+#else // FEATURE_IMPLICIT_TLS
+
+ X86EmitTLSFetch(GetThreadTLSIndex(), dstreg, preservedRegSet);
+
+#endif // FEATURE_IMPLICIT_TLS
+
+}
+
+VOID StubLinkerCPU::X86EmitCurrentAppDomainFetch(X86Reg dstreg, unsigned preservedRegSet)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ // It doesn't make sense to have the destination register be preserved
+ PRECONDITION((preservedRegSet & (1<<dstreg)) == 0);
+ AMD64_ONLY(PRECONDITION(dstreg < 8)); // code below doesn't support high registers
+ }
+ CONTRACTL_END;
+
+#ifdef FEATURE_IMPLICIT_TLS
+ X86EmitPushRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+ //TODO: Inline the instruction instead of a call
+ // call GetThread
+ X86EmitCall(NewExternalCodeLabel((LPVOID) GetAppDomain), sizeof(void*));
+
+ // mov dstreg, eax
+ X86EmitMovRegReg(dstreg, kEAX);
+
+ X86EmitPopRegs(preservedRegSet & ((1<<kEAX)|(1<<kEDX)|(1<<kECX)));
+
+#ifdef _DEBUG
+ // Trash caller saved regs that we were not told to preserve, and that aren't the dstreg.
+ preservedRegSet |= 1<<dstreg;
+ if (!(preservedRegSet & (1<<kEAX)))
+ X86EmitDebugTrashReg(kEAX);
+ if (!(preservedRegSet & (1<<kEDX)))
+ X86EmitDebugTrashReg(kEDX);
+ if (!(preservedRegSet & (1<<kECX)))
+ X86EmitDebugTrashReg(kECX);
+#endif
+
+#else // FEATURE_IMPLICIT_TLS
+
+ X86EmitTLSFetch(GetAppDomainTLSIndex(), dstreg, preservedRegSet);
+
+#endif // FEATURE_IMPLICIT_TLS
+}
+
+#ifdef _TARGET_X86_
+
+#ifdef PROFILING_SUPPORTED
+VOID StubLinkerCPU::EmitProfilerComCallProlog(TADDR pFrameVptr, X86Reg regFrame)
+{
+ STANDARD_VM_CONTRACT;
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // Load the methoddesc into ECX (UMThkCallFrame->m_pvDatum->m_pMD)
+ X86EmitIndexRegLoad(kECX, regFrame, UMThkCallFrame::GetOffsetOfDatum());
+ X86EmitIndexRegLoad(kECX, kECX, UMEntryThunk::GetOffsetOfMethodDesc());
+
+ // Push arguments and notify profiler
+ X86EmitPushImm32(COR_PRF_TRANSITION_CALL); // Reason
+ X86EmitPushReg(kECX); // MethodDesc*
+ X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerUnmanagedToManagedTransitionMD), 2*sizeof(void*));
+ }
+
+#ifdef FEATURE_COMINTEROP
+ else if (pFrameVptr == ComMethodFrame::GetMethodFrameVPtr())
+ {
+ // Load the methoddesc into ECX (Frame->m_pvDatum->m_pMD)
+ X86EmitIndexRegLoad(kECX, regFrame, ComMethodFrame::GetOffsetOfDatum());
+ X86EmitIndexRegLoad(kECX, kECX, ComCallMethodDesc::GetOffsetOfMethodDesc());
+
+ // Push arguments and notify profiler
+ X86EmitPushImm32(COR_PRF_TRANSITION_CALL); // Reason
+ X86EmitPushReg(kECX); // MethodDesc*
+ X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerUnmanagedToManagedTransitionMD), 2*sizeof(void*));
+ }
+#endif // FEATURE_COMINTEROP
+
+ // Unrecognized frame vtbl
+ else
+ {
+ _ASSERTE(!"Unrecognized vtble passed to EmitComMethodStubProlog with profiling turned on.");
+ }
+}
+
+
+VOID StubLinkerCPU::EmitProfilerComCallEpilog(TADDR pFrameVptr, X86Reg regFrame)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+#ifdef FEATURE_COMINTEROP
+ PRECONDITION(pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr() || pFrameVptr == ComMethodFrame::GetMethodFrameVPtr());
+#else
+ PRECONDITION(pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr());
+#endif // FEATURE_COMINTEROP
+ }
+ CONTRACTL_END;
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // Load the methoddesc into ECX (UMThkCallFrame->m_pvDatum->m_pMD)
+ X86EmitIndexRegLoad(kECX, regFrame, UMThkCallFrame::GetOffsetOfDatum());
+ X86EmitIndexRegLoad(kECX, kECX, UMEntryThunk::GetOffsetOfMethodDesc());
+
+ // Push arguments and notify profiler
+ X86EmitPushImm32(COR_PRF_TRANSITION_RETURN); // Reason
+ X86EmitPushReg(kECX); // MethodDesc*
+ X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerManagedToUnmanagedTransitionMD), 2*sizeof(void*));
+ }
+
+#ifdef FEATURE_COMINTEROP
+ else if (pFrameVptr == ComMethodFrame::GetMethodFrameVPtr())
+ {
+ // Load the methoddesc into ECX (Frame->m_pvDatum->m_pMD)
+ X86EmitIndexRegLoad(kECX, regFrame, ComMethodFrame::GetOffsetOfDatum());
+ X86EmitIndexRegLoad(kECX, kECX, ComCallMethodDesc::GetOffsetOfMethodDesc());
+
+ // Push arguments and notify profiler
+ X86EmitPushImm32(COR_PRF_TRANSITION_RETURN); // Reason
+ X86EmitPushReg(kECX); // MethodDesc*
+ X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerManagedToUnmanagedTransitionMD), 2*sizeof(void*));
+ }
+#endif // FEATURE_COMINTEROP
+
+ // Unrecognized frame vtbl
+ else
+ {
+ _ASSERTE(!"Unrecognized vtble passed to EmitComMethodStubEpilog with profiling turned on.");
+ }
+}
+#endif // PROFILING_SUPPORTED
+
+
+//========================================================================
+// Prolog for entering managed code from COM
+// pushes the appropriate frame ptr
+// sets up a thread and returns a label that needs to be emitted by the caller
+// At the end:
+// ESI will hold the pointer to the ComMethodFrame or UMThkCallFrame
+// EBX will hold the result of GetThread()
+// EDI will hold the previous Frame ptr
+
+void StubLinkerCPU::EmitComMethodStubProlog(TADDR pFrameVptr,
+ CodeLabel** rgRareLabels,
+ CodeLabel** rgRejoinLabels,
+ BOOL bShouldProfile)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ PRECONDITION(rgRareLabels != NULL);
+ PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
+ PRECONDITION(rgRejoinLabels != NULL);
+ PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
+ }
+ CONTRACTL_END;
+
+ // push ebp ;; save callee-saved register
+ // push ebx ;; save callee-saved register
+ // push esi ;; save callee-saved register
+ // push edi ;; save callee-saved register
+ X86EmitPushEBPframe();
+
+ X86EmitPushReg(kEBX);
+ X86EmitPushReg(kESI);
+ X86EmitPushReg(kEDI);
+
+ // push eax ; datum
+ X86EmitPushReg(kEAX);
+
+ // push edx ;leave room for m_next (edx is an arbitrary choice)
+ X86EmitPushReg(kEDX);
+
+ // push IMM32 ; push Frame vptr
+ X86EmitPushImmPtr((LPVOID) pFrameVptr);
+
+ X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
+
+ // lea esi, [esp+4] ;; set ESI -> new frame
+ X86EmitEspOffset(0x8d, kESI, 4); // lea ESI, [ESP+4]
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // Preserve argument registers for thiscall/fastcall
+ X86EmitPushReg(kECX);
+ X86EmitPushReg(kEDX);
+ }
+
+ // Emit Setup thread
+ EmitSetup(rgRareLabels[0]); // rareLabel for rare setup
+ EmitLabel(rgRejoinLabels[0]); // rejoin label for rare setup
+
+#ifdef PROFILING_SUPPORTED
+ // If profiling is active, emit code to notify profiler of transition
+ // Must do this before preemptive GC is disabled, so no problem if the
+ // profiler blocks.
+ if (CORProfilerTrackTransitions() && bShouldProfile)
+ {
+ EmitProfilerComCallProlog(pFrameVptr, /*Frame*/ kESI);
+ }
+#endif // PROFILING_SUPPORTED
+
+ //-----------------------------------------------------------------------
+ // Generate the inline part of disabling preemptive GC. It is critical
+ // that this part happen before we link in the frame. That's because
+ // we won't be able to unlink the frame from preemptive mode. And during
+ // shutdown, we cannot switch to cooperative mode under some circumstances
+ //-----------------------------------------------------------------------
+ EmitDisable(rgRareLabels[1], /*fCallIn=*/TRUE, kEBX); // rare disable gc
+ EmitLabel(rgRejoinLabels[1]); // rejoin for rare disable gc
+
+ // If we take an SO after installing the new frame but before getting the exception
+ // handlers in place, we will have a corrupt frame stack. So probe-by-touch first for
+ // sufficient stack space to erect the handler. Because we know we will be touching
+ // that stack right away when install the handler, this probe-by-touch will not incur
+ // unnecessary cache misses. And this allows us to do the probe with one instruction.
+
+ // Note that for Win64, the personality routine will handle unlinking the frame, so
+ // we don't need to probe in the Win64 stubs. The exception is ComToCLRWorker
+ // where we don't setup a personality routine. However, we push the frame inside
+ // that function and it is probe-protected with an entry point probe first, so we are
+ // OK there too.
+
+ // We push two registers to setup the EH handler and none to setup the frame
+ // so probe for double that to give ourselves a small margin for error.
+ // mov eax, [esp+n] ;; probe for sufficient stack to setup EH
+ X86EmitEspOffset(0x8B, kEAX, -0x20);
+ // mov edi,[ebx + Thread.GetFrame()] ;; get previous frame
+ X86EmitIndexRegLoad(kEDI, kEBX, Thread::GetOffsetOfCurrentFrame());
+
+ // mov [esi + Frame.m_next], edi
+ X86EmitIndexRegStore(kESI, Frame::GetOffsetOfNextLink(), kEDI);
+
+ // mov [ebx + Thread.GetFrame()], esi
+ X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kESI);
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // push UnmanagedToManagedExceptHandler
+ X86EmitPushImmPtr((LPVOID)UMThunkPrestubHandler);
+
+ // mov eax, fs:[0]
+ static const BYTE codeSEH1[] = { 0x64, 0xA1, 0x0, 0x0, 0x0, 0x0};
+ EmitBytes(codeSEH1, sizeof(codeSEH1));
+
+ // push eax
+ X86EmitPushReg(kEAX);
+
+ // mov dword ptr fs:[0], esp
+ static const BYTE codeSEH2[] = { 0x64, 0x89, 0x25, 0x0, 0x0, 0x0, 0x0};
+ EmitBytes(codeSEH2, sizeof(codeSEH2));
+ }
+
+#if _DEBUG
+ if (Frame::ShouldLogTransitions())
+ {
+ // call LogTransition
+ X86EmitPushReg(kESI);
+ X86EmitCall(NewExternalCodeLabel((LPVOID) Frame::LogTransition), sizeof(void*));
+ }
+#endif
+}
+
+//========================================================================
+// Epilog for stubs that enter managed code from COM
+//
+// At this point of the stub, the state should be as follows:
+// ESI holds the ComMethodFrame or UMThkCallFrame ptr
+// EBX holds the result of GetThread()
+// EDI holds the previous Frame ptr
+//
+void StubLinkerCPU::EmitComMethodStubEpilog(TADDR pFrameVptr,
+ CodeLabel** rgRareLabels,
+ CodeLabel** rgRejoinLabels,
+ BOOL bShouldProfile)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ PRECONDITION(rgRareLabels != NULL);
+ PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
+ PRECONDITION(rgRejoinLabels != NULL);
+ PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
+ }
+ CONTRACTL_END;
+
+ EmitCheckGSCookie(kESI, UnmanagedToManagedFrame::GetOffsetOfGSCookie());
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // if we are using exceptions, unlink the SEH
+ // mov ecx,[esp] ;;pointer to the next exception record
+ X86EmitEspOffset(0x8b, kECX, 0);
+
+ // mov dword ptr fs:[0], ecx
+ static const BYTE codeSEH[] = { 0x64, 0x89, 0x0D, 0x0, 0x0, 0x0, 0x0 };
+ EmitBytes(codeSEH, sizeof(codeSEH));
+
+ X86EmitAddEsp(sizeof(EXCEPTION_REGISTRATION_RECORD));
+ }
+
+ // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
+ X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
+
+ //-----------------------------------------------------------------------
+ // Generate the inline part of disabling preemptive GC
+ //-----------------------------------------------------------------------
+ EmitEnable(rgRareLabels[2]); // rare gc
+ EmitLabel(rgRejoinLabels[2]); // rejoin for rare gc
+
+ if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
+ {
+ // Restore argument registers for thiscall/fastcall
+ X86EmitPopReg(kEDX);
+ X86EmitPopReg(kECX);
+ }
+
+ // add esp, popstack
+ X86EmitAddEsp(sizeof(GSCookie) + UnmanagedToManagedFrame::GetOffsetOfCalleeSavedRegisters());
+
+ // pop edi ; restore callee-saved registers
+ // pop esi
+ // pop ebx
+ // pop ebp
+ X86EmitPopReg(kEDI);
+ X86EmitPopReg(kESI);
+ X86EmitPopReg(kEBX);
+ X86EmitPopReg(kEBP);
+
+ // jmp eax //reexecute!
+ X86EmitR2ROp(0xff, (X86Reg)4, kEAX);
+
+ // ret
+ // This will never be executed. It is just to help out stack-walking logic
+ // which disassembles the epilog to unwind the stack. A "ret" instruction
+ // indicates that no more code needs to be disassembled, if the stack-walker
+ // keeps on going past the previous "jmp eax".
+ X86EmitReturn(0);
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of enabling preemptive GC - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[2]); // label for rare enable gc
+ EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of disabling preemptive GC - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[1]); // label for rare disable gc
+ EmitRareDisable(rgRejoinLabels[1]); // emit rare disable gc
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of setup thread - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[0]); // label for rare setup thread
+ EmitRareSetup(rgRejoinLabels[0], /*fThrow*/ TRUE); // emit rare setup thread
+}
+
+//---------------------------------------------------------------
+// Emit code to store the setup current Thread structure in eax.
+// TRASHES eax,ecx&edx.
+// RESULTS ebx = current Thread
+//---------------------------------------------------------------
+VOID StubLinkerCPU::EmitSetup(CodeLabel *pForwardRef)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef FEATURE_IMPLICIT_TLS
+ DWORD idx = 0;
+ TLSACCESSMODE mode = TLSACCESS_GENERIC;
+#else
+ DWORD idx = GetThreadTLSIndex();
+ TLSACCESSMODE mode = GetTLSAccessMode(idx);
+#endif
+
+#ifdef _DEBUG
+ {
+ static BOOL f = TRUE;
+ f = !f;
+ if (f)
+ {
+ mode = TLSACCESS_GENERIC;
+ }
+ }
+#endif
+
+ switch (mode)
+ {
+ case TLSACCESS_WNT:
+ {
+ unsigned __int32 tlsofs = offsetof(TEB, TlsSlots) + (idx * sizeof(void*));
+
+ static const BYTE code[] = {0x64,0x8b,0x1d}; // mov ebx, dword ptr fs:[IMM32]
+ EmitBytes(code, sizeof(code));
+ Emit32(tlsofs);
+ }
+ break;
+
+ case TLSACCESS_GENERIC:
+#ifdef FEATURE_IMPLICIT_TLS
+ X86EmitCall(NewExternalCodeLabel((LPVOID) GetThread), sizeof(void*));
+#else
+ X86EmitPushImm32(idx);
+
+ // call TLSGetValue
+ X86EmitCall(NewExternalCodeLabel((LPVOID) TlsGetValue), sizeof(void*));
+#endif
+ // mov ebx,eax
+ Emit16(0xc389);
+ break;
+ default:
+ _ASSERTE(0);
+ }
+
+ // cmp ebx, 0
+ static const BYTE b[] = { 0x83, 0xFB, 0x0};
+
+ EmitBytes(b, sizeof(b));
+
+ // jz RarePath
+ X86EmitCondJump(pForwardRef, X86CondCode::kJZ);
+
+#ifdef _DEBUG
+ X86EmitDebugTrashReg(kECX);
+ X86EmitDebugTrashReg(kEDX);
+#endif
+
+}
+
+VOID StubLinkerCPU::EmitRareSetup(CodeLabel *pRejoinPoint, BOOL fThrow)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifndef FEATURE_COMINTEROP
+ _ASSERTE(fThrow);
+#else // !FEATURE_COMINTEROP
+ if (!fThrow)
+ {
+ X86EmitPushReg(kESI);
+ X86EmitCall(NewExternalCodeLabel((LPVOID) CreateThreadBlockReturnHr), sizeof(void*));
+ }
+ else
+#endif // !FEATURE_COMINTEROP
+ {
+ X86EmitCall(NewExternalCodeLabel((LPVOID) CreateThreadBlockThrow), 0);
+ }
+
+ // mov ebx,eax
+ Emit16(0xc389);
+ X86EmitNearJump(pRejoinPoint);
+}
+
+//========================================================================
+#endif // _TARGET_X86_
+//========================================================================
+#if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
+//========================================================================
+// Epilog for stubs that enter managed code from COM
+//
+// On entry, ESI points to the Frame
+// ESP points to below FramedMethodFrame::m_vc5Frame
+// EBX hold GetThread()
+// EDI holds the previous Frame
+
+void StubLinkerCPU::EmitSharedComMethodStubEpilog(TADDR pFrameVptr,
+ CodeLabel** rgRareLabels,
+ CodeLabel** rgRejoinLabels,
+ unsigned offsetRetThunk,
+ BOOL bShouldProfile)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ PRECONDITION(rgRareLabels != NULL);
+ PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
+ PRECONDITION(rgRejoinLabels != NULL);
+ PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
+ }
+ CONTRACTL_END;
+
+ CodeLabel *NoEntryLabel;
+ NoEntryLabel = NewCodeLabel();
+
+ EmitCheckGSCookie(kESI, UnmanagedToManagedFrame::GetOffsetOfGSCookie());
+
+ // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
+ X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
+
+ //-----------------------------------------------------------------------
+ // Generate the inline part of enabling preemptive GC
+ //-----------------------------------------------------------------------
+ EmitLabel(NoEntryLabel); // need to enable preemp mode even when we fail the disable as rare disable will return in coop mode
+
+ EmitEnable(rgRareLabels[2]); // rare enable gc
+ EmitLabel(rgRejoinLabels[2]); // rejoin for rare enable gc
+
+#ifdef PROFILING_SUPPORTED
+ // If profiling is active, emit code to notify profiler of transition
+ if (CORProfilerTrackTransitions() && bShouldProfile)
+ {
+ // Save return value
+ X86EmitPushReg(kEAX);
+ X86EmitPushReg(kEDX);
+
+ EmitProfilerComCallEpilog(pFrameVptr, kESI);
+
+ // Restore return value
+ X86EmitPopReg(kEDX);
+ X86EmitPopReg(kEAX);
+ }
+#endif // PROFILING_SUPPORTED
+
+ X86EmitAddEsp(sizeof(GSCookie) + UnmanagedToManagedFrame::GetOffsetOfDatum());
+
+ // pop ecx
+ X86EmitPopReg(kECX); // pop the MethodDesc*
+
+ // pop edi ; restore callee-saved registers
+ // pop esi
+ // pop ebx
+ // pop ebp
+ X86EmitPopReg(kEDI);
+ X86EmitPopReg(kESI);
+ X86EmitPopReg(kEBX);
+ X86EmitPopReg(kEBP);
+
+ // add ecx, offsetRetThunk
+ X86EmitAddReg(kECX, offsetRetThunk);
+
+ // jmp ecx
+ // This will jump to the "ret cbStackArgs" instruction in COMMETHOD_PREPAD.
+ static const BYTE bjmpecx[] = { 0xff, 0xe1 };
+ EmitBytes(bjmpecx, sizeof(bjmpecx));
+
+ // ret
+ // This will never be executed. It is just to help out stack-walking logic
+ // which disassembles the epilog to unwind the stack. A "ret" instruction
+ // indicates that no more code needs to be disassembled, if the stack-walker
+ // keeps on going past the previous "jmp ecx".
+ X86EmitReturn(0);
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of enabling preemptive GC - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[2]); // label for rare enable gc
+ EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of disabling preemptive GC - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[1]); // label for rare disable gc
+ EmitRareDisableHRESULT(rgRejoinLabels[1], NoEntryLabel);
+
+ //-----------------------------------------------------------------------
+ // The out-of-line portion of setup thread - rarely executed
+ //-----------------------------------------------------------------------
+ EmitLabel(rgRareLabels[0]); // label for rare setup thread
+ EmitRareSetup(rgRejoinLabels[0],/*fThrow*/ FALSE); // emit rare setup thread
+}
+
+//========================================================================
+#endif // defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
+
+#ifndef FEATURE_STUBS_AS_IL
+/*==============================================================================
+ Pushes a TransitionFrame on the stack
+ If you make any changes to the prolog instruction sequence, be sure
+ to update UpdateRegdisplay, too!! This service should only be called from
+ within the runtime. It should not be called for any unmanaged -> managed calls in.
+
+ At the end of the generated prolog stub code:
+ pFrame is in ESI/RSI.
+ the previous pFrame is in EDI/RDI
+ The current Thread* is in EBX/RBX.
+ For x86, ESP points to TransitionFrame
+ For amd64, ESP points to the space reserved for the outgoing argument registers
+*/
+
+VOID StubLinkerCPU::EmitMethodStubProlog(TADDR pFrameVptr, int transitionBlockOffset)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ X86EmitPushReg(kR15); // CalleeSavedRegisters
+ X86EmitPushReg(kR14);
+ X86EmitPushReg(kR13);
+ X86EmitPushReg(kR12);
+ X86EmitPushReg(kRBP);
+ X86EmitPushReg(kRBX);
+ X86EmitPushReg(kRSI);
+ X86EmitPushReg(kRDI);
+
+ // Push m_datum
+ X86EmitPushReg(SCRATCH_REGISTER_X86REG);
+
+ // push edx ;leave room for m_next (edx is an arbitrary choice)
+ X86EmitPushReg(kEDX);
+
+ // push Frame vptr
+ X86EmitPushImmPtr((LPVOID) pFrameVptr);
+
+ // mov rsi, rsp
+ X86EmitR2ROp(0x8b, kRSI, (X86Reg)4 /*kESP*/);
+ UnwindSetFramePointer(kRSI);
+
+ // Save ArgumentRegisters
+ #define ARGUMENT_REGISTER(regname) X86EmitRegSave(k##regname, SecureDelegateFrame::GetOffsetOfTransitionBlock() + \
+ sizeof(TransitionBlock) + offsetof(ArgumentRegisters, regname));
+ ENUM_ARGUMENT_REGISTERS();
+ #undef ARGUMENT_REGISTER
+
+ _ASSERTE(((Frame*)&pFrameVptr)->GetGSCookiePtr() == PTR_GSCookie(PBYTE(&pFrameVptr) - sizeof(GSCookie)));
+ X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
+
+ // sub rsp, 4*sizeof(void*) ;; allocate callee scratch area and ensure rsp is 16-byte-aligned
+ const INT32 padding = sizeof(ArgumentRegisters) + ((sizeof(FramedMethodFrame) % (2 * sizeof(LPVOID))) ? 0 : sizeof(LPVOID));
+ X86EmitSubEsp(padding);
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_X86_
+ // push ebp ;; save callee-saved register
+ // mov ebp,esp
+ // push ebx ;; save callee-saved register
+ // push esi ;; save callee-saved register
+ // push edi ;; save callee-saved register
+ X86EmitPushEBPframe();
+
+ X86EmitPushReg(kEBX);
+ X86EmitPushReg(kESI);
+ X86EmitPushReg(kEDI);
+
+ // Push & initialize ArgumentRegisters
+ #define ARGUMENT_REGISTER(regname) X86EmitPushReg(k##regname);
+ ENUM_ARGUMENT_REGISTERS();
+ #undef ARGUMENT_REGISTER
+
+ // Push m_datum
+ X86EmitPushReg(kEAX);
+
+ // push edx ;leave room for m_next (edx is an arbitrary choice)
+ X86EmitPushReg(kEDX);
+
+ // push Frame vptr
+ X86EmitPushImmPtr((LPVOID) pFrameVptr);
+
+ // mov esi,esp
+ X86EmitMovRegSP(kESI);
+
+ X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
+#endif // _TARGET_X86_
+
+ // ebx <-- GetThread()
+ // Trashes X86TLSFetch_TRASHABLE_REGS
+ X86EmitCurrentThreadFetch(kEBX, 0);
+
+#if _DEBUG
+
+ // call ObjectRefFlush
+#ifdef _TARGET_AMD64_
+
+ // mov rcx, rbx
+ X86EmitR2ROp(0x8b, kECX, kEBX); // arg in reg
+
+#else // !_TARGET_AMD64_
+ X86EmitPushReg(kEBX); // arg on stack
+#endif // _TARGET_AMD64_
+
+ // Make the call
+ X86EmitCall(NewExternalCodeLabel((LPVOID) Thread::ObjectRefFlush), sizeof(void*));
+
+#endif // _DEBUG
+
+ // mov edi,[ebx + Thread.GetFrame()] ;; get previous frame
+ X86EmitIndexRegLoad(kEDI, kEBX, Thread::GetOffsetOfCurrentFrame());
+
+ // mov [esi + Frame.m_next], edi
+ X86EmitIndexRegStore(kESI, Frame::GetOffsetOfNextLink(), kEDI);
+
+ // mov [ebx + Thread.GetFrame()], esi
+ X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kESI);
+
+#if _DEBUG
+
+ if (Frame::ShouldLogTransitions())
+ {
+ // call LogTransition
+#ifdef _TARGET_AMD64_
+
+ // mov rcx, rsi
+ X86EmitR2ROp(0x8b, kECX, kESI); // arg in reg
+
+#else // !_TARGET_AMD64_
+ X86EmitPushReg(kESI); // arg on stack
+#endif // _TARGET_AMD64_
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) Frame::LogTransition), sizeof(void*));
+
+#ifdef _TARGET_AMD64_
+ // Reload parameter registers
+ // mov r, [esp+offs]
+ #define ARGUMENT_REGISTER(regname) X86EmitEspOffset(0x8b, k##regname, sizeof(ArgumentRegisters) + \
+ sizeof(TransitionFrame) + offsetof(ArgumentRegisters, regname));
+ ENUM_ARGUMENT_REGISTERS();
+ #undef ARGUMENT_REGISTER
+
+#endif // _TARGET_AMD64_
+ }
+
+#endif // _DEBUG
+
+
+#ifdef _TARGET_AMD64_
+ // OK for the debugger to examine the new frame now
+ // (Note that if it's not OK yet for some stub, another patch label
+ // can be emitted later which will override this one.)
+ EmitPatchLabel();
+#else
+ // For x86, the patch label can be specified only after the GSCookie is pushed
+ // Otherwise the debugger will see a Frame without a valid GSCookie
+#endif
+}
+
+/*==============================================================================
+ EmitMethodStubEpilog generates the part of the stub that will pop off the
+ Frame
+
+ restoreArgRegs - indicates whether the argument registers need to be
+ restored from m_argumentRegisters
+
+ At this point of the stub:
+ pFrame is in ESI/RSI.
+ the previous pFrame is in EDI/RDI
+ The current Thread* is in EBX/RBX.
+ For x86, ESP points to the FramedMethodFrame::NegInfo
+*/
+
+VOID StubLinkerCPU::EmitMethodStubEpilog(WORD numArgBytes, int transitionBlockOffset)
+{
+ STANDARD_VM_CONTRACT;
+
+ // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
+ X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
+
+#ifdef _TARGET_X86_
+ // deallocate Frame
+ X86EmitAddEsp(sizeof(GSCookie) + transitionBlockOffset + TransitionBlock::GetOffsetOfCalleeSavedRegisters());
+
+#elif defined(_TARGET_AMD64_)
+ // lea rsp, [rsi + <offset of preserved registers>]
+ X86EmitOffsetModRM(0x8d, (X86Reg)4 /*kRSP*/, kRSI, transitionBlockOffset + TransitionBlock::GetOffsetOfCalleeSavedRegisters());
+#endif // _TARGET_AMD64_
+
+ // pop edi ; restore callee-saved registers
+ // pop esi
+ // pop ebx
+ // pop ebp
+ X86EmitPopReg(kEDI);
+ X86EmitPopReg(kESI);
+ X86EmitPopReg(kEBX);
+ X86EmitPopReg(kEBP);
+
+#ifdef _TARGET_AMD64_
+ X86EmitPopReg(kR12);
+ X86EmitPopReg(kR13);
+ X86EmitPopReg(kR14);
+ X86EmitPopReg(kR15);
+#endif
+
+#ifdef _TARGET_AMD64_
+ // Caller deallocates argument space. (Bypasses ASSERT in
+ // X86EmitReturn.)
+ numArgBytes = 0;
+#endif
+
+ X86EmitReturn(numArgBytes);
+}
+
+
+// On entry, ESI should be pointing to the Frame
+
+VOID StubLinkerCPU::EmitCheckGSCookie(X86Reg frameReg, int gsCookieOffset)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _DEBUG
+ // cmp dword ptr[frameReg-gsCookieOffset], gsCookie
+#ifdef _TARGET_X86_
+ X86EmitCmpRegIndexImm32(frameReg, gsCookieOffset, GetProcessGSCookie());
+#else
+ X64EmitCmp32RegIndexImm32(frameReg, gsCookieOffset, (INT32)GetProcessGSCookie());
+#endif
+
+ CodeLabel * pLabel = NewCodeLabel();
+ X86EmitCondJump(pLabel, X86CondCode::kJE);
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) JIT_FailFast), 0);
+
+ EmitLabel(pLabel);
+#endif
+}
+#endif // !FEATURE_STUBS_AS_IL
+
+
+// This method unboxes the THIS pointer and then calls pRealMD
+// If it's shared code for a method in a generic value class, then also extract the vtable pointer
+// and pass it as an extra argument. Thus this stub generator really covers both
+// - Unboxing, non-instantiating stubs
+// - Unboxing, method-table-instantiating stubs
+VOID StubLinkerCPU::EmitUnboxMethodStub(MethodDesc* pUnboxMD)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION(!pUnboxMD->IsStatic());
+ }
+ CONTRACTL_END;
+
+#ifdef FEATURE_STUBS_AS_IL
+ _ASSERTE(!pUnboxMD->RequiresInstMethodTableArg());
+#else
+ if (pUnboxMD->RequiresInstMethodTableArg())
+ {
+ EmitInstantiatingMethodStub(pUnboxMD, NULL);
+ return;
+ }
+#endif
+
+ //
+ // unboxing a value class simply means adding sizeof(void*) to the THIS pointer
+ //
+#ifdef _TARGET_AMD64_
+ X86EmitAddReg(THIS_kREG, sizeof(void*));
+
+ // Use direct call if possible
+ if (pUnboxMD->HasStableEntryPoint())
+ {
+ X86EmitRegLoad(kRAX, pUnboxMD->GetStableEntryPoint());// MOV RAX, DWORD
+ }
+ else
+ {
+ X86EmitRegLoad(kRAX, (UINT_PTR)pUnboxMD->GetAddrOfSlot()); // MOV RAX, DWORD
+
+ X86EmitIndexRegLoad(kRAX, kRAX); // MOV RAX, [RAX]
+ }
+
+ Emit16(X86_INSTR_JMP_EAX); // JMP EAX
+#else // _TARGET_AMD64_
+ X86EmitAddReg(THIS_kREG, sizeof(void*));
+
+ // Use direct call if possible
+ if (pUnboxMD->HasStableEntryPoint())
+ {
+ X86EmitNearJump(NewExternalCodeLabel((LPVOID) pUnboxMD->GetStableEntryPoint()));
+ }
+ else
+ {
+ // jmp [slot]
+ Emit16(0x25ff);
+ Emit32((DWORD)(size_t)pUnboxMD->GetAddrOfSlot());
+ }
+#endif //_TARGET_AMD64_
+}
+
+
+#if defined(FEATURE_SHARE_GENERIC_CODE) && !defined(FEATURE_STUBS_AS_IL)
+// The stub generated by this method passes an extra dictionary argument before jumping to
+// shared-instantiation generic code.
+//
+// pMD is either
+// * An InstantiatedMethodDesc for a generic method whose code is shared across instantiations.
+// In this case, the extra argument is the InstantiatedMethodDesc for the instantiation-specific stub itself.
+// or * A MethodDesc for a static method in a generic class whose code is shared across instantiations.
+// In this case, the extra argument is the MethodTable pointer of the instantiated type.
+// or * A MethodDesc for unboxing stub. In this case, the extra argument is null.
+VOID StubLinkerCPU::EmitInstantiatingMethodStub(MethodDesc* pMD, void* extra)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+ PRECONDITION(pMD->RequiresInstArg());
+ }
+ CONTRACTL_END;
+
+ MetaSig msig(pMD);
+ ArgIterator argit(&msig);
+
+#ifdef _TARGET_AMD64_
+ int paramTypeArgOffset = argit.GetParamTypeArgOffset();
+ int paramTypeArgIndex = TransitionBlock::GetArgumentIndexFromOffset(paramTypeArgOffset);
+
+ CorElementType argTypes[5];
+
+ int firstRealArg = paramTypeArgIndex + 1;
+ int argNum = firstRealArg;
+
+ //
+ // Compute types of the 4 register args and first stack arg
+ //
+
+ CorElementType sigType;
+ while ((sigType = msig.NextArgNormalized()) != ELEMENT_TYPE_END)
+ {
+ argTypes[argNum++] = sigType;
+ if (argNum > 4)
+ break;
+ }
+ msig.Reset();
+
+ BOOL fUseInstantiatingMethodStubWorker = FALSE;
+
+ if (argNum > 4)
+ {
+ //
+ // We will need to go through assembly helper.
+ //
+ fUseInstantiatingMethodStubWorker = TRUE;
+
+ // Allocate space for frame before pushing the arguments for the assembly helper
+ X86EmitSubEsp((INT32)(AlignUp(sizeof(void *) /* extra stack param */ + sizeof(GSCookie) + sizeof(StubHelperFrame), 16) - sizeof(void *) /* return address */));
+
+ //
+ // Store extra arg stack arg param for the helper.
+ //
+ CorElementType argType = argTypes[--argNum];
+ switch (argType)
+ {
+ case ELEMENT_TYPE_R4:
+ // movss dword ptr [rsp], xmm?
+ X64EmitMovSSToMem(kXMM3, (X86Reg)4 /*kRSP*/);
+ break;
+ case ELEMENT_TYPE_R8:
+ // movsd qword ptr [rsp], xmm?
+ X64EmitMovSDToMem(kXMM3, (X86Reg)4 /*kRSP*/);
+ break;
+ default:
+ X86EmitIndexRegStoreRSP(0, kR9);
+ break;
+ }
+ }
+
+ //
+ // Shuffle the register arguments
+ //
+ while (argNum > firstRealArg)
+ {
+ CorElementType argType = argTypes[--argNum];
+
+ switch (argType)
+ {
+ case ELEMENT_TYPE_R4:
+ case ELEMENT_TYPE_R8:
+ // mov xmm#, xmm#-1
+ X64EmitMovXmmXmm((X86Reg)argNum, (X86Reg)(argNum - 1));
+ break;
+ default:
+ //mov reg#, reg#-1
+ X86EmitMovRegReg(c_argRegs[argNum], c_argRegs[argNum-1]);
+ break;
+ }
+ }
+
+ //
+ // Setup the hidden instantiation argument
+ //
+ if (extra != NULL)
+ {
+ X86EmitRegLoad(c_argRegs[paramTypeArgIndex], (UINT_PTR)extra);
+ }
+ else
+ {
+ X86EmitIndexRegLoad(c_argRegs[paramTypeArgIndex], THIS_kREG);
+
+ X86EmitAddReg(THIS_kREG, sizeof(void*));
+ }
+
+ // Use direct call if possible
+ if (pMD->HasStableEntryPoint())
+ {
+ X86EmitRegLoad(kRAX, pMD->GetStableEntryPoint());// MOV RAX, DWORD
+ }
+ else
+ {
+ X86EmitRegLoad(kRAX, (UINT_PTR)pMD->GetAddrOfSlot()); // MOV RAX, DWORD
+
+ X86EmitIndexRegLoad(kRAX, kRAX); // MOV RAX, [RAX]
+ }
+
+ if (fUseInstantiatingMethodStubWorker)
+ {
+ X86EmitPushReg(kRAX);
+
+ UINT cbStack = argit.SizeOfArgStack();
+ _ASSERTE(cbStack > 0);
+
+ X86EmitPushImm32((AlignUp(cbStack, 16) / sizeof(void*)) - 1); // -1 for extra stack arg
+
+ X86EmitRegLoad(kRAX, GetEEFuncEntryPoint(InstantiatingMethodStubWorker));// MOV RAX, DWORD
+ }
+ else
+ {
+ _ASSERTE(argit.SizeOfArgStack() == 0);
+ }
+
+ Emit16(X86_INSTR_JMP_EAX);
+
+#else
+ int paramTypeArgOffset = argit.GetParamTypeArgOffset();
+
+ // It's on the stack
+ if (TransitionBlock::IsStackArgumentOffset(paramTypeArgOffset))
+ {
+ // Pop return address into AX
+ X86EmitPopReg(kEAX);
+
+ if (extra != NULL)
+ {
+ // Push extra dictionary argument
+ X86EmitPushImmPtr(extra);
+ }
+ else
+ {
+ // Push the vtable pointer from "this"
+ X86EmitIndexPush(THIS_kREG, 0);
+ }
+
+ // Put return address back
+ X86EmitPushReg(kEAX);
+ }
+ // It's in a register
+ else
+ {
+ X86Reg paramReg = GetX86ArgumentRegisterFromOffset(paramTypeArgOffset - TransitionBlock::GetOffsetOfArgumentRegisters());
+
+ if (extra != NULL)
+ {
+ X86EmitRegLoad(paramReg, (UINT_PTR)extra);
+ }
+ else
+ {
+ // Just extract the vtable pointer from "this"
+ X86EmitIndexRegLoad(paramReg, THIS_kREG);
+ }
+ }
+
+ if (extra == NULL)
+ {
+ // Unboxing stub case.
+ X86EmitAddReg(THIS_kREG, sizeof(void*));
+ }
+
+ // Use direct call if possible
+ if (pMD->HasStableEntryPoint())
+ {
+ X86EmitNearJump(NewExternalCodeLabel((LPVOID) pMD->GetStableEntryPoint()));
+ }
+ else
+ {
+ // jmp [slot]
+ Emit16(0x25ff);
+ Emit32((DWORD)(size_t)pMD->GetAddrOfSlot());
+ }
+#endif //
+}
+#endif // FEATURE_SHARE_GENERIC_CODE && FEATURE_STUBS_AS_IL
+
+
+#if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
+
+typedef BOOL GetModuleInformationProc(
+ HANDLE hProcess,
+ HMODULE hModule,
+ LPMODULEINFO lpmodinfo,
+ DWORD cb
+);
+
+GetModuleInformationProc *g_pfnGetModuleInformation = NULL;
+
+extern "C" VOID __cdecl DebugCheckStubUnwindInfoWorker (CONTEXT *pStubContext)
+{
+ BEGIN_ENTRYPOINT_VOIDRET;
+
+ LOG((LF_STUBS, LL_INFO1000000, "checking stub unwind info:\n"));
+
+ //
+ // Make a copy of the CONTEXT. RtlVirtualUnwind will modify this copy.
+ // DebugCheckStubUnwindInfo will need to restore registers from the
+ // original CONTEXT.
+ //
+ CONTEXT ctx = *pStubContext;
+ ctx.ContextFlags = (CONTEXT_CONTROL | CONTEXT_INTEGER);
+
+ //
+ // Find the upper bound of the stack and address range of KERNEL32. This
+ // is where we expect the unwind to stop.
+ //
+ void *pvStackTop = GetThread()->GetCachedStackBase();
+
+ if (!g_pfnGetModuleInformation)
+ {
+ HMODULE hmodPSAPI = WszGetModuleHandle(W("PSAPI.DLL"));
+
+ if (!hmodPSAPI)
+ {
+ hmodPSAPI = WszLoadLibrary(W("PSAPI.DLL"));
+ if (!hmodPSAPI)
+ {
+ _ASSERTE(!"unable to load PSAPI.DLL");
+ goto ErrExit;
+ }
+ }
+
+ g_pfnGetModuleInformation = (GetModuleInformationProc*)GetProcAddress(hmodPSAPI, "GetModuleInformation");
+ if (!g_pfnGetModuleInformation)
+ {
+ _ASSERTE(!"can't find PSAPI!GetModuleInformation");
+ goto ErrExit;
+ }
+
+ // Intentionally leak hmodPSAPI. We don't want to
+ // LoadLibrary/FreeLibrary every time, this is slow + produces lots of
+ // debugger spew. This is just debugging code after all...
+ }
+
+ HMODULE hmodKERNEL32 = WszGetModuleHandle(W("KERNEL32"));
+ _ASSERTE(hmodKERNEL32);
+
+ MODULEINFO modinfoKERNEL32;
+ if (!g_pfnGetModuleInformation(GetCurrentProcess(), hmodKERNEL32, &modinfoKERNEL32, sizeof(modinfoKERNEL32)))
+ {
+ _ASSERTE(!"unable to get bounds of KERNEL32");
+ goto ErrExit;
+ }
+
+ //
+ // Unwind until IP is 0, sp is at the stack top, and callee IP is in kernel32.
+ //
+
+ for (;;)
+ {
+ ULONG64 ControlPc = (ULONG64)GetIP(&ctx);
+
+ LOG((LF_STUBS, LL_INFO1000000, "pc %p, sp %p\n", ControlPc, GetSP(&ctx)));
+
+ ULONG64 ImageBase;
+ T_RUNTIME_FUNCTION *pFunctionEntry = RtlLookupFunctionEntry(
+ ControlPc,
+ &ImageBase,
+ NULL);
+ if (pFunctionEntry)
+ {
+ PVOID HandlerData;
+ ULONG64 EstablisherFrame;
+
+ RtlVirtualUnwind(
+ 0,
+ ImageBase,
+ ControlPc,
+ pFunctionEntry,
+ &ctx,
+ &HandlerData,
+ &EstablisherFrame,
+ NULL);
+
+ ULONG64 NewControlPc = (ULONG64)GetIP(&ctx);
+
+ LOG((LF_STUBS, LL_INFO1000000, "function %p, image %p, new pc %p, new sp %p\n", pFunctionEntry, ImageBase, NewControlPc, GetSP(&ctx)));
+
+ if (!NewControlPc)
+ {
+ if (dac_cast<PTR_BYTE>(GetSP(&ctx)) < (BYTE*)pvStackTop - 0x100)
+ {
+ _ASSERTE(!"SP did not end up at top of stack");
+ goto ErrExit;
+ }
+
+ if (!( ControlPc > (ULONG64)modinfoKERNEL32.lpBaseOfDll
+ && ControlPc < (ULONG64)modinfoKERNEL32.lpBaseOfDll + modinfoKERNEL32.SizeOfImage))
+ {
+ _ASSERTE(!"PC did not end up in KERNEL32");
+ goto ErrExit;
+ }
+
+ break;
+ }
+ }
+ else
+ {
+ // Nested functions that do not use any stack space or nonvolatile
+ // registers are not required to have unwind info (ex.
+ // USER32!ZwUserCreateWindowEx).
+ ctx.Rip = *(ULONG64*)(ctx.Rsp);
+ ctx.Rsp += sizeof(ULONG64);
+ }
+ }
+ErrExit:
+
+ END_ENTRYPOINT_VOIDRET;
+ return;
+}
+
+//virtual
+VOID StubLinkerCPU::EmitUnwindInfoCheckWorker (CodeLabel *pCheckLabel)
+{
+ STANDARD_VM_CONTRACT;
+ X86EmitCall(pCheckLabel, 0);
+}
+
+//virtual
+VOID StubLinkerCPU::EmitUnwindInfoCheckSubfunction()
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+ // X86EmitCall will generate "mov rax, target/jmp rax", so we have to save
+ // rax on the stack. DO NOT use X86EmitPushReg. That will induce infinite
+ // recursion, since the push may require more unwind info. This "push rax"
+ // will be accounted for by DebugCheckStubUnwindInfo's unwind info
+ // (considered part of its locals), so there doesn't have to be unwind
+ // info for it.
+ Emit8(0x50);
+#endif
+
+ X86EmitNearJump(NewExternalCodeLabel(DebugCheckStubUnwindInfo));
+}
+
+#endif // defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
+
+
+#ifdef _TARGET_X86_
+
+//-----------------------------------------------------------------------
+// Generates the inline portion of the code to enable preemptive GC. Hopefully,
+// the inline code is all that will execute most of the time. If this code
+// path is entered at certain times, however, it will need to jump out to
+// a separate out-of-line path which is more expensive. The "pForwardRef"
+// label indicates the start of the out-of-line path.
+//
+// Assumptions:
+// ebx = Thread
+// Preserves
+// all registers except ecx.
+//
+//-----------------------------------------------------------------------
+VOID StubLinkerCPU::EmitEnable(CodeLabel *pForwardRef)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ PRECONDITION(4 == sizeof( ((Thread*)0)->m_State ));
+ PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled ));
+ }
+ CONTRACTL_END;
+
+ // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],0
+ X86EmitOffsetModRM(0xc6, (X86Reg)0, kEBX, Thread::GetOffsetOfGCFlag());
+ Emit8(0);
+
+ _ASSERTE(FitsInI1(Thread::TS_CatchAtSafePoint));
+
+ // test byte ptr [ebx + Thread.m_State], TS_CatchAtSafePoint
+ X86EmitOffsetModRM(0xf6, (X86Reg)0, kEBX, Thread::GetOffsetOfState());
+ Emit8(Thread::TS_CatchAtSafePoint);
+
+ // jnz RarePath
+ X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
+
+#ifdef _DEBUG
+ X86EmitDebugTrashReg(kECX);
+#endif
+
+}
+
+
+//-----------------------------------------------------------------------
+// Generates the out-of-line portion of the code to enable preemptive GC.
+// After the work is done, the code jumps back to the "pRejoinPoint"
+// which should be emitted right after the inline part is generated.
+//
+// Assumptions:
+// ebx = Thread
+// Preserves
+// all registers except ecx.
+//
+//-----------------------------------------------------------------------
+VOID StubLinkerCPU::EmitRareEnable(CodeLabel *pRejoinPoint)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareEnable), 0);
+#ifdef _DEBUG
+ X86EmitDebugTrashReg(kECX);
+#endif
+ if (pRejoinPoint)
+ {
+ X86EmitNearJump(pRejoinPoint);
+ }
+
+}
+
+
+//-----------------------------------------------------------------------
+// Generates the inline portion of the code to disable preemptive GC. Hopefully,
+// the inline code is all that will execute most of the time. If this code
+// path is entered at certain times, however, it will need to jump out to
+// a separate out-of-line path which is more expensive. The "pForwardRef"
+// label indicates the start of the out-of-line path.
+//
+// Assumptions:
+// ebx = Thread
+// Preserves
+// all registers except ecx.
+//
+//-----------------------------------------------------------------------
+VOID StubLinkerCPU::EmitDisable(CodeLabel *pForwardRef, BOOL fCallIn, X86Reg ThreadReg)
+{
+ CONTRACTL
+ {
+ STANDARD_VM_CHECK;
+
+ PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled ));
+ PRECONDITION(4 == sizeof(g_TrapReturningThreads));
+ }
+ CONTRACTL_END;
+
+#if defined(FEATURE_COMINTEROP) && defined(MDA_SUPPORTED)
+ // If we are checking whether the current thread is already holds the loader lock, vector
+ // such cases to the rare disable pathway, where we can check again.
+ if (fCallIn && (NULL != MDA_GET_ASSISTANT(Reentrancy)))
+ {
+ CodeLabel *pNotReentrantLabel = NewCodeLabel();
+
+ // test byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],1
+ X86EmitOffsetModRM(0xf6, (X86Reg)0, ThreadReg, Thread::GetOffsetOfGCFlag());
+ Emit8(1);
+
+ // jz NotReentrant
+ X86EmitCondJump(pNotReentrantLabel, X86CondCode::kJZ);
+
+ X86EmitPushReg(kEAX);
+ X86EmitPushReg(kEDX);
+ X86EmitPushReg(kECX);
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) HasIllegalReentrancy), 0);
+
+ // If the probe fires, we go ahead and allow the call anyway. At this point, there could be
+ // GC heap corruptions. So the probe detects the illegal case, but doesn't prevent it.
+
+ X86EmitPopReg(kECX);
+ X86EmitPopReg(kEDX);
+ X86EmitPopReg(kEAX);
+
+ EmitLabel(pNotReentrantLabel);
+ }
+#endif
+
+ // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],1
+ X86EmitOffsetModRM(0xc6, (X86Reg)0, ThreadReg, Thread::GetOffsetOfGCFlag());
+ Emit8(1);
+
+ // cmp dword ptr g_TrapReturningThreads, 0
+ Emit16(0x3d83);
+ EmitPtr((void *)&g_TrapReturningThreads);
+ Emit8(0);
+
+ // jnz RarePath
+ X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
+
+#if defined(FEATURE_COMINTEROP) && !defined(FEATURE_CORESYSTEM)
+ // If we are checking whether the current thread holds the loader lock, vector
+ // such cases to the rare disable pathway, where we can check again.
+ if (fCallIn && ShouldCheckLoaderLock())
+ {
+ X86EmitPushReg(kEAX);
+ X86EmitPushReg(kEDX);
+
+ if (ThreadReg == kECX)
+ X86EmitPushReg(kECX);
+
+ // BOOL AuxUlibIsDLLSynchronizationHeld(BOOL *IsHeld)
+ //
+ // So we need to be sure that both the return value and the passed BOOL are both TRUE.
+ // If either is FALSE, then the call failed or the lock is not held. Either way, the
+ // probe should not fire.
+
+ X86EmitPushReg(kEDX); // BOOL temp
+ Emit8(0x54); // push ESP because arg is &temp
+ X86EmitCall(NewExternalCodeLabel((LPVOID) AuxUlibIsDLLSynchronizationHeld), 0);
+
+ // callee has popped.
+ X86EmitPopReg(kEDX); // recover temp
+
+ CodeLabel *pPopLabel = NewCodeLabel();
+
+ Emit16(0xc085); // test eax, eax
+ X86EmitCondJump(pPopLabel, X86CondCode::kJZ);
+
+ Emit16(0xd285); // test edx, edx
+
+ EmitLabel(pPopLabel); // retain the conditional flags across the pops
+
+ if (ThreadReg == kECX)
+ X86EmitPopReg(kECX);
+
+ X86EmitPopReg(kEDX);
+ X86EmitPopReg(kEAX);
+
+ X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
+ }
+#endif
+
+#ifdef _DEBUG
+ if (ThreadReg != kECX)
+ X86EmitDebugTrashReg(kECX);
+#endif
+
+}
+
+
+//-----------------------------------------------------------------------
+// Generates the out-of-line portion of the code to disable preemptive GC.
+// After the work is done, the code jumps back to the "pRejoinPoint"
+// which should be emitted right after the inline part is generated. However,
+// if we cannot execute managed code at this time, an exception is thrown
+// which cannot be caught by managed code.
+//
+// Assumptions:
+// ebx = Thread
+// Preserves
+// all registers except ecx, eax.
+//
+//-----------------------------------------------------------------------
+VOID StubLinkerCPU::EmitRareDisable(CodeLabel *pRejoinPoint)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareDisableTHROW), 0);
+
+#ifdef _DEBUG
+ X86EmitDebugTrashReg(kECX);
+#endif
+ X86EmitNearJump(pRejoinPoint);
+}
+
+#ifdef FEATURE_COMINTEROP
+//-----------------------------------------------------------------------
+// Generates the out-of-line portion of the code to disable preemptive GC.
+// After the work is done, the code normally jumps back to the "pRejoinPoint"
+// which should be emitted right after the inline part is generated. However,
+// if we cannot execute managed code at this time, an HRESULT is returned
+// via the ExitPoint.
+//
+// Assumptions:
+// ebx = Thread
+// Preserves
+// all registers except ecx, eax.
+//
+//-----------------------------------------------------------------------
+VOID StubLinkerCPU::EmitRareDisableHRESULT(CodeLabel *pRejoinPoint, CodeLabel *pExitPoint)
+{
+ STANDARD_VM_CONTRACT;
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareDisableHR), 0);
+
+#ifdef _DEBUG
+ X86EmitDebugTrashReg(kECX);
+#endif
+
+ // test eax, eax ;; test the result of StubRareDisableHR
+ Emit16(0xc085);
+
+ // JZ pRejoinPoint
+ X86EmitCondJump(pRejoinPoint, X86CondCode::kJZ);
+
+ X86EmitNearJump(pExitPoint);
+}
+#endif // FEATURE_COMINTEROP
+
+#endif // _TARGET_X86_
+
+#endif // CROSSGEN_COMPILE
+
+
+VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray)
+{
+ STANDARD_VM_CONTRACT;
+
+#ifdef _TARGET_AMD64_
+
+ // mov SCRATCHREG,rsp
+ X86_64BitOperands();
+ Emit8(0x8b);
+ Emit8(0304 | (SCRATCH_REGISTER_X86REG << 3));
+
+ // save the real target in r11, will jump to it later. r10 is used below.
+ // Windows: mov r11, rcx
+ // Unix: mov r11, rdi
+ X86EmitMovRegReg(kR11, THIS_kREG);
+
+#ifdef UNIX_AMD64_ABI
+ for (ShuffleEntry* pEntry = pShuffleEntryArray; pEntry->srcofs != ShuffleEntry::SENTINEL; pEntry++)
+ {
+ if (pEntry->srcofs & ShuffleEntry::REGMASK)
+ {
+ // If source is present in register then destination must also be a register
+ _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK);
+ // Both the srcofs and dstofs must be of the same kind of registers - float or general purpose.
+ _ASSERTE((pEntry->dstofs & ShuffleEntry::FPREGMASK) == (pEntry->srcofs & ShuffleEntry::FPREGMASK));
+
+ int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK;
+ int srcRegIndex = pEntry->srcofs & ShuffleEntry::OFSREGMASK;
+
+ if (pEntry->srcofs & ShuffleEntry::FPREGMASK)
+ {
+ // movdqa dstReg, srcReg
+ X64EmitMovXmmXmm((X86Reg)(kXMM0 + dstRegIndex), (X86Reg)(kXMM0 + srcRegIndex));
+ }
+ else
+ {
+ // mov dstReg, srcReg
+ X86EmitMovRegReg(c_argRegs[dstRegIndex], c_argRegs[srcRegIndex]);
+ }
+ }
+ else if (pEntry->dstofs & ShuffleEntry::REGMASK)
+ {
+ // source must be on the stack
+ _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK));
+
+ int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK;
+ int srcOffset = (pEntry->srcofs + 1) * sizeof(void*);
+
+ if (pEntry->dstofs & ShuffleEntry::FPREGMASK)
+ {
+ if (pEntry->dstofs & ShuffleEntry::FPSINGLEMASK)
+ {
+ // movss dstReg, [rax + src]
+ X64EmitMovSSFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset);
+ }
+ else
+ {
+ // movsd dstReg, [rax + src]
+ X64EmitMovSDFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset);
+ }
+ }
+ else
+ {
+ // mov dstreg, [rax + src]
+ X86EmitIndexRegLoad(c_argRegs[dstRegIndex], SCRATCH_REGISTER_X86REG, srcOffset);
+ }
+ }
+ else
+ {
+ // source must be on the stack
+ _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK));
+
+ // dest must be on the stack
+ _ASSERTE(!(pEntry->dstofs & ShuffleEntry::REGMASK));
+
+ // mov r10, [rax + src]
+ X86EmitIndexRegLoad (kR10, SCRATCH_REGISTER_X86REG, (pEntry->srcofs + 1) * sizeof(void*));
+
+ // mov [rax + dst], r10
+ X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, (pEntry->dstofs + 1) * sizeof(void*), kR10);
+ }
+ }
+#else // UNIX_AMD64_ABI
+ UINT step = 1;
+
+ if (pShuffleEntryArray->argtype == ELEMENT_TYPE_END)
+ {
+ // Special handling of open instance methods with return buffer. Move "this"
+ // by two slots, and leave the "retbufptr" between the two slots intact.
+
+ // mov rcx, r8
+ X86EmitMovRegReg(kRCX, kR8);
+
+ // Skip this entry
+ pShuffleEntryArray++;
+
+ // Skip this entry and leave retbufptr intact
+ step += 2;
+ }
+
+ // Now shuffle the args by one position:
+ // steps 1-3 : reg args (rcx, rdx, r8)
+ // step 4 : stack->reg arg (r9)
+ // step >4 : stack args
+
+ for(;
+ pShuffleEntryArray->srcofs != ShuffleEntry::SENTINEL;
+ step++, pShuffleEntryArray++)
+ {
+ switch (step)
+ {
+ case 1:
+ case 2:
+ case 3:
+ switch (pShuffleEntryArray->argtype)
+ {
+ case ELEMENT_TYPE_R4:
+ case ELEMENT_TYPE_R8:
+ // mov xmm-1#, xmm#
+ X64EmitMovXmmXmm((X86Reg)(step - 1), (X86Reg)(step));
+ break;
+ default:
+ // mov argRegs[step-1], argRegs[step]
+ X86EmitMovRegReg(c_argRegs[step-1], c_argRegs[step]);
+ break;
+ }
+ break;
+
+ case 4:
+ {
+ switch (pShuffleEntryArray->argtype)
+ {
+ case ELEMENT_TYPE_R4:
+ X64EmitMovSSFromMem(kXMM3, kRAX, 0x28);
+ break;
+
+ case ELEMENT_TYPE_R8:
+ X64EmitMovSDFromMem(kXMM3, kRAX, 0x28);
+ break;
+
+ default:
+ // mov r9, [rax + 28h]
+ X86EmitIndexRegLoad (kR9, SCRATCH_REGISTER_X86REG, 5*sizeof(void*));
+ }
+ break;
+ }
+ default:
+
+ // mov r10, [rax + (step+1)*sizeof(void*)]
+ X86EmitIndexRegLoad (kR10, SCRATCH_REGISTER_X86REG, (step+1)*sizeof(void*));
+
+ // mov [rax + step*sizeof(void*)], r10
+ X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, step*sizeof(void*), kR10);
+ }
+ }
+#endif // UNIX_AMD64_ABI
+
+ // mov r10, [r11 + Delegate._methodptraux]
+ X86EmitIndexRegLoad(kR10, kR11, DelegateObject::GetOffsetOfMethodPtrAux());
+ // add r11, DelegateObject::GetOffsetOfMethodPtrAux() - load the indirection cell into r11
+ X86EmitAddReg(kR11, DelegateObject::GetOffsetOfMethodPtrAux());
+ // Now jump to real target
+ // jmp r10
+ X86EmitR2ROp(0xff, (X86Reg)4, kR10);
+
+#else // _TARGET_AMD64_
+
+ UINT espadjust = 0;
+ BOOL haveMemMemMove = FALSE;
+
+ ShuffleEntry *pWalk = NULL;
+ for (pWalk = pShuffleEntryArray; pWalk->srcofs != ShuffleEntry::SENTINEL; pWalk++)
+ {
+ if (!(pWalk->dstofs & ShuffleEntry::REGMASK) &&
+ !(pWalk->srcofs & ShuffleEntry::REGMASK) &&
+ pWalk->srcofs != pWalk->dstofs)
+ {
+ haveMemMemMove = TRUE;
+ espadjust = sizeof(void*);
+ break;
+ }
+ }
+
+ if (haveMemMemMove)
+ {
+ // push ecx
+ X86EmitPushReg(THIS_kREG);
+ }
+ else
+ {
+ // mov eax, ecx
+ Emit8(0x8b);
+ Emit8(0300 | SCRATCH_REGISTER_X86REG << 3 | THIS_kREG);
+ }
+
+ UINT16 emptySpot = 0x4 | ShuffleEntry::REGMASK;
+
+ while (true)
+ {
+ for (pWalk = pShuffleEntryArray; pWalk->srcofs != ShuffleEntry::SENTINEL; pWalk++)
+ if (pWalk->dstofs == emptySpot)
+ break;
+
+ if (pWalk->srcofs == ShuffleEntry::SENTINEL)
+ break;
+
+ if ((pWalk->dstofs & ShuffleEntry::REGMASK))
+ {
+ if (pWalk->srcofs & ShuffleEntry::REGMASK)
+ {
+ // mov <dstReg>,<srcReg>
+ Emit8(0x8b);
+ Emit8(static_cast<UINT8>(0300 |
+ (GetX86ArgumentRegisterFromOffset( pWalk->dstofs & ShuffleEntry::OFSMASK ) << 3) |
+ (GetX86ArgumentRegisterFromOffset( pWalk->srcofs & ShuffleEntry::OFSMASK ))));
+ }
+ else
+ {
+ X86EmitEspOffset(0x8b, GetX86ArgumentRegisterFromOffset( pWalk->dstofs & ShuffleEntry::OFSMASK ), pWalk->srcofs+espadjust);
+ }
+ }
+ else
+ {
+ // if the destination is not a register, the source shouldn't be either.
+ _ASSERTE(!(pWalk->srcofs & ShuffleEntry::REGMASK));
+ if (pWalk->srcofs != pWalk->dstofs)
+ {
+ X86EmitEspOffset(0x8b, kEAX, pWalk->srcofs+espadjust);
+ X86EmitEspOffset(0x89, kEAX, pWalk->dstofs+espadjust);
+ }
+ }
+ emptySpot = pWalk->srcofs;
+ }
+
+ // Capture the stacksizedelta while we're at the end of the list.
+ _ASSERTE(pWalk->srcofs == ShuffleEntry::SENTINEL);
+
+ if (haveMemMemMove)
+ X86EmitPopReg(SCRATCH_REGISTER_X86REG);
+
+ if (pWalk->stacksizedelta)
+ X86EmitAddEsp(pWalk->stacksizedelta);
+
+ // Now jump to real target
+ // JMP [SCRATCHREG]
+ // we need to jump indirect so that for virtual delegates eax contains a pointer to the indirection cell
+ X86EmitAddReg(SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtrAux());
+ static const BYTE bjmpeax[] = { 0xff, 0x20 };
+ EmitBytes(bjmpeax, sizeof(bjmpeax));
+
+#endif // _TARGET_AMD64_
+}
+
+
+#if !defined(CROSSGEN_COMPILE) && !defined(FEATURE_STUBS_AS_IL)
+
+//===========================================================================
+// Computes hash code for MulticastDelegate.Invoke()
+UINT_PTR StubLinkerCPU::HashMulticastInvoke(MetaSig* pSig)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ ArgIterator argit(pSig);
+
+ UINT numStackBytes = argit.SizeOfArgStack();
+
+ if (numStackBytes > 0x7FFF)
+ COMPlusThrow(kNotSupportedException, W("NotSupported_TooManyArgs"));
+
+#ifdef _TARGET_AMD64_
+ // Generate a hash key as follows:
+ // UINT Arg0Type:2; // R4 (1), R8 (2), other (3)
+ // UINT Arg1Type:2; // R4 (1), R8 (2), other (3)
+ // UINT Arg2Type:2; // R4 (1), R8 (2), other (3)
+ // UINT Arg3Type:2; // R4 (1), R8 (2), other (3)
+ // UINT NumArgs:24; // number of arguments
+ // (This should cover all the prestub variations)
+
+ _ASSERTE(!(numStackBytes & 7));
+ UINT hash = (numStackBytes / sizeof(void*)) << 8;
+
+ UINT argNum = 0;
+
+ // NextArg() doesn't take into account the "this" pointer.
+ // That's why we have to special case it here.
+ if (argit.HasThis())
+ {
+ hash |= 3 << (2*argNum);
+ argNum++;
+ }
+
+ if (argit.HasRetBuffArg())
+ {
+ hash |= 3 << (2*argNum);
+ argNum++;
+ }
+
+ for (; argNum < 4; argNum++)
+ {
+ switch (pSig->NextArgNormalized())
+ {
+ case ELEMENT_TYPE_END:
+ argNum = 4;
+ break;
+ case ELEMENT_TYPE_R4:
+ hash |= 1 << (2*argNum);
+ break;
+ case ELEMENT_TYPE_R8:
+ hash |= 2 << (2*argNum);
+ break;
+ default:
+ hash |= 3 << (2*argNum);
+ break;
+ }
+ }
+
+#else // _TARGET_AMD64_
+
+ // check if the function is returning a float, in which case the stub has to take
+ // care of popping the floating point stack except for the last invocation
+
+ _ASSERTE(!(numStackBytes & 3));
+
+ UINT hash = numStackBytes;
+
+ if (CorTypeInfo::IsFloat(pSig->GetReturnType()))
+ {
+ hash |= 2;
+ }
+#endif // _TARGET_AMD64_
+
+ return hash;
+}
+
+#ifdef _TARGET_X86_
+//===========================================================================
+// Emits code for MulticastDelegate.Invoke()
+VOID StubLinkerCPU::EmitDelegateInvoke()
+{
+ STANDARD_VM_CONTRACT;
+
+ CodeLabel *pNullLabel = NewCodeLabel();
+
+ // test THISREG, THISREG
+ X86EmitR2ROp(0x85, THIS_kREG, THIS_kREG);
+
+ // jz null
+ X86EmitCondJump(pNullLabel, X86CondCode::kJZ);
+
+ // mov SCRATCHREG, [THISREG + Delegate.FP] ; Save target stub in register
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfMethodPtr());
+
+ // mov THISREG, [THISREG + Delegate.OR] ; replace "this" pointer
+ X86EmitIndexRegLoad(THIS_kREG, THIS_kREG, DelegateObject::GetOffsetOfTarget());
+
+ // jmp SCRATCHREG
+ Emit16(0xe0ff | (SCRATCH_REGISTER_X86REG<<8));
+
+ // Do a null throw
+ EmitLabel(pNullLabel);
+
+ // mov ECX, CORINFO_NullReferenceException
+ Emit8(0xb8+kECX);
+ Emit32(CORINFO_NullReferenceException);
+
+ X86EmitCall(NewExternalCodeLabel(GetEEFuncEntryPoint(JIT_InternalThrowFromHelper)), 0);
+
+ X86EmitReturn(0);
+}
+#endif // _TARGET_X86_
+
+VOID StubLinkerCPU::EmitMulticastInvoke(UINT_PTR hash)
+{
+ STANDARD_VM_CONTRACT;
+
+ int thisRegOffset = MulticastFrame::GetOffsetOfTransitionBlock() +
+ TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, THIS_REG);
+
+ // push the methoddesc on the stack
+ // mov eax, [ecx + offsetof(_methodAuxPtr)]
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfMethodPtrAux());
+
+ // Push a MulticastFrame on the stack.
+ EmitMethodStubProlog(MulticastFrame::GetMethodFrameVPtr(), MulticastFrame::GetOffsetOfTransitionBlock());
+
+#ifdef _TARGET_X86_
+ // Frame is ready to be inspected by debugger for patch location
+ EmitPatchLabel();
+#else // _TARGET_AMD64_
+
+ // Save register arguments in their home locations.
+ // Non-FP registers are already saved by EmitMethodStubProlog.
+ // (Assumes Sig.NextArg() does not enum RetBuffArg or "this".)
+
+ int argNum = 0;
+ __int32 argOfs = MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ CorElementType argTypes[4];
+ CorElementType argType;
+
+ // 'this'
+ argOfs += sizeof(void*);
+ argTypes[argNum] = ELEMENT_TYPE_I8;
+ argNum++;
+
+ do
+ {
+ argType = ELEMENT_TYPE_END;
+
+ switch ((hash >> (2 * argNum)) & 3)
+ {
+ case 0:
+ argType = ELEMENT_TYPE_END;
+ break;
+ case 1:
+ argType = ELEMENT_TYPE_R4;
+
+ // movss dword ptr [rsp + argOfs], xmm?
+ X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ case 2:
+ argType = ELEMENT_TYPE_R8;
+
+ // movsd qword ptr [rsp + argOfs], xmm?
+ X64EmitMovSDToMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ default:
+ argType = ELEMENT_TYPE_I;
+ break;
+ }
+
+ argOfs += sizeof(void*);
+ argTypes[argNum] = argType;
+ argNum++;
+ }
+ while (argNum < 4 && ELEMENT_TYPE_END != argType);
+
+ _ASSERTE(4 == argNum || ELEMENT_TYPE_END == argTypes[argNum-1]);
+
+#endif // _TARGET_AMD64_
+
+ // TODO: on AMD64, pick different regs for locals so don't need the pushes
+
+ // push edi ;; Save EDI (want to use it as loop index)
+ X86EmitPushReg(kEDI);
+
+ // xor edi,edi ;; Loop counter: EDI=0,1,2...
+ X86EmitZeroOutReg(kEDI);
+
+ CodeLabel *pLoopLabel = NewCodeLabel();
+ CodeLabel *pEndLoopLabel = NewCodeLabel();
+
+ EmitLabel(pLoopLabel);
+
+ // Entry:
+ // EDI == iteration counter
+
+ // mov ecx, [esi + this] ;; get delegate
+ X86EmitIndexRegLoad(THIS_kREG, kESI, thisRegOffset);
+
+ // cmp edi,[ecx]._invocationCount
+ X86EmitOp(0x3b, kEDI, THIS_kREG, DelegateObject::GetOffsetOfInvocationCount());
+
+ // je ENDLOOP
+ X86EmitCondJump(pEndLoopLabel, X86CondCode::kJZ);
+
+#ifdef _TARGET_AMD64_
+
+ INT32 numStackBytes = (INT32)((hash >> 8) * sizeof(void *));
+
+ INT32 stackUsed, numStackArgs, ofs;
+
+ // Push any stack args, plus an extra location
+ // for rsp alignment if needed
+
+ numStackArgs = numStackBytes / sizeof(void*);
+
+ // 1 push above, so stack is currently misaligned
+ const unsigned STACK_ALIGN_ADJUST = 8;
+
+ if (!numStackArgs)
+ {
+ // sub rsp, 28h ;; 4 reg arg home locs + rsp alignment
+ stackUsed = 0x20 + STACK_ALIGN_ADJUST;
+ X86EmitSubEsp(stackUsed);
+ }
+ else
+ {
+ stackUsed = numStackArgs * sizeof(void*);
+
+ // If the stack is misaligned, then an odd number of arguments
+ // will naturally align the stack.
+ if ( ((numStackArgs & 1) == 0)
+ != (STACK_ALIGN_ADJUST == 0))
+ {
+ X86EmitPushReg(kRAX);
+ stackUsed += sizeof(void*);
+ }
+
+ ofs = MulticastFrame::GetOffsetOfTransitionBlock() +
+ TransitionBlock::GetOffsetOfArgs() + sizeof(ArgumentRegisters) + numStackBytes;
+
+ while (numStackArgs--)
+ {
+ ofs -= sizeof(void*);
+
+ // push [rsi + ofs] ;; Push stack args
+ X86EmitIndexPush(kESI, ofs);
+ }
+
+ // sub rsp, 20h ;; Create 4 reg arg home locations
+ X86EmitSubEsp(0x20);
+
+ stackUsed += 0x20;
+ }
+
+ for(
+ argNum = 0, argOfs = MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ argNum < 4 && argTypes[argNum] != ELEMENT_TYPE_END;
+ argNum++, argOfs += sizeof(void*)
+ )
+ {
+ switch (argTypes[argNum])
+ {
+ case ELEMENT_TYPE_R4:
+ // movss xmm?, dword ptr [rsi + argOfs]
+ X64EmitMovSSFromMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ case ELEMENT_TYPE_R8:
+ // movsd xmm?, qword ptr [rsi + argOfs]
+ X64EmitMovSDFromMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ default:
+ if (c_argRegs[argNum] != THIS_kREG)
+ {
+ // mov r*, [rsi + dstOfs]
+ X86EmitIndexRegLoad(c_argRegs[argNum], kESI,argOfs);
+ }
+ break;
+ } // switch
+ }
+
+ // mov SCRATCHREG, [rcx+Delegate._invocationList] ;;fetch invocation list
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
+
+ // mov SCRATCHREG, [SCRATCHREG+m_Array+rdi*8] ;; index into invocation list
+ X86EmitOp(0x8b, kEAX, SCRATCH_REGISTER_X86REG, static_cast<int>(PtrArray::GetDataOffset()), kEDI, sizeof(void*), k64BitOp);
+
+ // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
+ X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
+
+ // call [SCRATCHREG+Delegate.target] ;; call current subscriber
+ X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
+
+ // add rsp, stackUsed ;; Clean up stack
+ X86EmitAddEsp(stackUsed);
+
+ // inc edi
+ Emit16(0xC7FF);
+
+#else // _TARGET_AMD64_
+
+ UINT16 numStackBytes = static_cast<UINT16>(hash & ~3);
+
+ // ..repush & reenregister args..
+ INT32 ofs = numStackBytes + MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ while (ofs != MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs())
+ {
+ ofs -= sizeof(void*);
+ X86EmitIndexPush(kESI, ofs);
+ }
+
+ #define ARGUMENT_REGISTER(regname) if (k##regname != THIS_kREG) { X86EmitIndexRegLoad(k##regname, kESI, \
+ offsetof(ArgumentRegisters, regname) + MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters()); }
+
+ ENUM_ARGUMENT_REGISTERS_BACKWARD();
+
+ #undef ARGUMENT_REGISTER
+
+ // mov SCRATCHREG, [ecx+Delegate._invocationList] ;;fetch invocation list
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
+
+ // mov SCRATCHREG, [SCRATCHREG+m_Array+edi*4] ;; index into invocation list
+ X86EmitOp(0x8b, kEAX, SCRATCH_REGISTER_X86REG, PtrArray::GetDataOffset(), kEDI, sizeof(void*));
+
+ // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
+ X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
+
+ // call [SCRATCHREG+Delegate.target] ;; call current subscriber
+ X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
+ INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
+ // we know that this is a call that can directly call
+ // managed code
+
+ // inc edi
+ Emit8(0x47);
+
+ if (hash & 2) // CorTypeInfo::IsFloat(pSig->GetReturnType())
+ {
+ // if the return value is a float/double check if we just did the last call - if not,
+ // emit the pop of the float stack
+
+ // mov SCRATCHREG, [esi + this] ;; get delegate
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, kESI, thisRegOffset);
+
+ // cmp edi,[SCRATCHREG]._invocationCount
+ X86EmitOffsetModRM(0x3b, kEDI, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfInvocationCount());
+
+ CodeLabel *pNoFloatStackPopLabel = NewCodeLabel();
+
+ // je NOFLOATSTACKPOP
+ X86EmitCondJump(pNoFloatStackPopLabel, X86CondCode::kJZ);
+
+ // fstp 0
+ Emit16(0xd8dd);
+
+ // NoFloatStackPopLabel:
+ EmitLabel(pNoFloatStackPopLabel);
+ }
+
+#endif // _TARGET_AMD64_
+
+ // The debugger may need to stop here, so grab the offset of this code.
+ EmitPatchLabel();
+
+ // jmp LOOP
+ X86EmitNearJump(pLoopLabel);
+
+ //ENDLOOP:
+ EmitLabel(pEndLoopLabel);
+
+ // pop edi ;; Restore edi
+ X86EmitPopReg(kEDI);
+
+ EmitCheckGSCookie(kESI, MulticastFrame::GetOffsetOfGSCookie());
+
+ // Epilog
+ EmitMethodStubEpilog(numStackBytes, MulticastFrame::GetOffsetOfTransitionBlock());
+}
+
+VOID StubLinkerCPU::EmitSecureDelegateInvoke(UINT_PTR hash)
+{
+ STANDARD_VM_CONTRACT;
+
+ int thisRegOffset = SecureDelegateFrame::GetOffsetOfTransitionBlock() +
+ TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, THIS_REG);
+
+ // push the methoddesc on the stack
+ // mov eax, [ecx + offsetof(_invocationCount)]
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationCount());
+
+ // Push a SecureDelegateFrame on the stack.
+ EmitMethodStubProlog(SecureDelegateFrame::GetMethodFrameVPtr(), SecureDelegateFrame::GetOffsetOfTransitionBlock());
+
+#ifdef _TARGET_X86_
+ // Frame is ready to be inspected by debugger for patch location
+ EmitPatchLabel();
+#else // _TARGET_AMD64_
+
+ // Save register arguments in their home locations.
+ // Non-FP registers are already saved by EmitMethodStubProlog.
+ // (Assumes Sig.NextArg() does not enum RetBuffArg or "this".)
+
+ int argNum = 0;
+ __int32 argOfs = SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ CorElementType argTypes[4];
+ CorElementType argType;
+
+ // 'this'
+ argOfs += sizeof(void*);
+ argTypes[argNum] = ELEMENT_TYPE_I8;
+ argNum++;
+
+ do
+ {
+ argType = ELEMENT_TYPE_END;
+
+ switch ((hash >> (2 * argNum)) & 3)
+ {
+ case 0:
+ argType = ELEMENT_TYPE_END;
+ break;
+ case 1:
+ argType = ELEMENT_TYPE_R4;
+
+ // movss dword ptr [rsp + argOfs], xmm?
+ X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ case 2:
+ argType = ELEMENT_TYPE_R8;
+
+ // movsd qword ptr [rsp + argOfs], xmm?
+ X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ default:
+ argType = ELEMENT_TYPE_I;
+ break;
+ }
+
+ argOfs += sizeof(void*);
+ argTypes[argNum] = argType;
+ argNum++;
+ }
+ while (argNum < 4 && ELEMENT_TYPE_END != argType);
+
+ _ASSERTE(4 == argNum || ELEMENT_TYPE_END == argTypes[argNum-1]);
+
+#endif // _TARGET_AMD64_
+
+ // mov ecx, [esi + this] ;; get delegate
+ X86EmitIndexRegLoad(THIS_kREG, kESI, thisRegOffset);
+
+#ifdef _TARGET_AMD64_
+
+ INT32 numStackBytes = (INT32)((hash >> 8) * sizeof(void *));
+
+ INT32 stackUsed, numStackArgs, ofs;
+
+ // Push any stack args, plus an extra location
+ // for rsp alignment if needed
+
+ numStackArgs = numStackBytes / sizeof(void*);
+
+ // 1 push above, so stack is currently misaligned
+ const unsigned STACK_ALIGN_ADJUST = 0;
+
+ if (!numStackArgs)
+ {
+ // sub rsp, 28h ;; 4 reg arg home locs + rsp alignment
+ stackUsed = 0x20 + STACK_ALIGN_ADJUST;
+ X86EmitSubEsp(stackUsed);
+ }
+ else
+ {
+ stackUsed = numStackArgs * sizeof(void*);
+
+ // If the stack is misaligned, then an odd number of arguments
+ // will naturally align the stack.
+ if ( ((numStackArgs & 1) == 0)
+ != (STACK_ALIGN_ADJUST == 0))
+ {
+ X86EmitPushReg(kRAX);
+ stackUsed += sizeof(void*);
+ }
+
+ ofs = SecureDelegateFrame::GetOffsetOfTransitionBlock() +
+ TransitionBlock::GetOffsetOfArgs() + sizeof(ArgumentRegisters) + numStackBytes;
+
+ while (numStackArgs--)
+ {
+ ofs -= sizeof(void*);
+
+ // push [rsi + ofs] ;; Push stack args
+ X86EmitIndexPush(kESI, ofs);
+ }
+
+ // sub rsp, 20h ;; Create 4 reg arg home locations
+ X86EmitSubEsp(0x20);
+
+ stackUsed += 0x20;
+ }
+
+ int thisArgNum = 0;
+
+ for(
+ argNum = 0, argOfs = SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ argNum < 4 && argTypes[argNum] != ELEMENT_TYPE_END;
+ argNum++, argOfs += sizeof(void*)
+ )
+ {
+ switch (argTypes[argNum])
+ {
+ case ELEMENT_TYPE_R4:
+ // movss xmm?, dword ptr [rsi + argOfs]
+ X64EmitMovSSFromMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ case ELEMENT_TYPE_R8:
+ // movsd xmm?, qword ptr [rsi + argOfs]
+ X64EmitMovSDFromMem((X86Reg)argNum, kRSI, argOfs);
+ break;
+ default:
+ if (c_argRegs[argNum] != THIS_kREG)
+ {
+ // mov r*, [rsi + dstOfs]
+ X86EmitIndexRegLoad(c_argRegs[argNum], kESI,argOfs);
+ }
+ break;
+ } // switch
+ }
+
+ // mov SCRATCHREG, [rcx+Delegate._invocationList] ;;fetch the inner delegate
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
+
+ // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
+ X86EmitIndexRegLoad(c_argRegs[thisArgNum], SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
+
+ // call [SCRATCHREG+Delegate.target] ;; call current subscriber
+ X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
+
+ // add rsp, stackUsed ;; Clean up stack
+ X86EmitAddEsp(stackUsed);
+
+#else // _TARGET_AMD64_
+
+ UINT16 numStackBytes = static_cast<UINT16>(hash & ~3);
+
+ // ..repush & reenregister args..
+ INT32 ofs = numStackBytes + SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
+ while (ofs != SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs())
+ {
+ ofs -= sizeof(void*);
+ X86EmitIndexPush(kESI, ofs);
+ }
+
+ #define ARGUMENT_REGISTER(regname) if (k##regname != THIS_kREG) { X86EmitIndexRegLoad(k##regname, kESI, \
+ offsetof(ArgumentRegisters, regname) + SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters()); }
+
+ ENUM_ARGUMENT_REGISTERS_BACKWARD();
+
+ #undef ARGUMENT_REGISTER
+
+ // mov SCRATCHREG, [ecx+Delegate._invocationList] ;;fetch the inner delegate
+ X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
+
+ // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
+ X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
+
+ // call [SCRATCHREG+Delegate.target] ;; call current subscriber
+ X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
+ INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
+ // we know that this is a call that can directly call
+ // managed code
+
+#endif // _TARGET_AMD64_
+
+ // The debugger may need to stop here, so grab the offset of this code.
+ EmitPatchLabel();
+
+ EmitCheckGSCookie(kESI, SecureDelegateFrame::GetOffsetOfGSCookie());
+
+ // Epilog
+ EmitMethodStubEpilog(numStackBytes, SecureDelegateFrame::GetOffsetOfTransitionBlock());
+}
+
+#ifndef FEATURE_ARRAYSTUB_AS_IL
+
+// Little helper to generate code to move nbytes bytes of non Ref memory
+
+void generate_noref_copy (unsigned nbytes, StubLinkerCPU* sl)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_NOTRIGGER;
+ INJECT_FAULT(COMPlusThrowOM(););
+ }
+ CONTRACTL_END;
+
+ // If the size is pointer-aligned, we'll use movsd
+ if (IS_ALIGNED(nbytes, sizeof(void*)))
+ {
+ // If there are less than 4 pointers to copy, "unroll" the "rep movsd"
+ if (nbytes <= 3*sizeof(void*))
+ {
+ while (nbytes > 0)
+ {
+ // movsd
+ sl->X86_64BitOperands();
+ sl->Emit8(0xa5);
+
+ nbytes -= sizeof(void*);
+ }
+ }
+ else
+ {
+ // mov ECX, size / 4
+ sl->Emit8(0xb8+kECX);
+ sl->Emit32(nbytes / sizeof(void*));
+
+ // repe movsd
+ sl->Emit8(0xf3);
+ sl->X86_64BitOperands();
+ sl->Emit8(0xa5);
+ }
+ }
+ else
+ {
+ // mov ECX, size
+ sl->Emit8(0xb8+kECX);
+ sl->Emit32(nbytes);
+
+ // repe movsb
+ sl->Emit16(0xa4f3);
+ }
+}
+
+
+X86Reg LoadArrayOpArg (
+ UINT32 idxloc,
+ StubLinkerCPU *psl,
+ X86Reg kRegIfFromMem,
+ UINT ofsadjust
+ AMD64_ARG(StubLinkerCPU::X86OperandSize OperandSize = StubLinkerCPU::k64BitOp)
+ )
+{
+ STANDARD_VM_CONTRACT;
+
+ if (!TransitionBlock::IsStackArgumentOffset(idxloc))
+ return GetX86ArgumentRegisterFromOffset(idxloc - TransitionBlock::GetOffsetOfArgumentRegisters());
+
+ psl->X86EmitEspOffset(0x8b, kRegIfFromMem, idxloc + ofsadjust AMD64_ARG(OperandSize));
+ return kRegIfFromMem;
+}
+
+VOID StubLinkerCPU::EmitArrayOpStubThrow(unsigned exConst, unsigned cbRetArg)
+{
+ STANDARD_VM_CONTRACT;
+
+ //ArrayOpStub*Exception
+ X86EmitPopReg(kESI);
+ X86EmitPopReg(kEDI);
+
+ //mov CORINFO_NullReferenceException_ASM, %ecx
+ Emit8(0xb8 | kECX);
+ Emit32(exConst);
+ //InternalExceptionWorker
+
+ X86EmitPopReg(kEDX);
+ // add pArrayOpScript->m_cbretpop, %esp (was add %eax, %esp)
+ Emit8(0x81);
+ Emit8(0xc0 | 0x4);
+ Emit32(cbRetArg);
+ X86EmitPushReg(kEDX);
+ X86EmitNearJump(NewExternalCodeLabel((PVOID)JIT_InternalThrow));
+}
+
+//===========================================================================
+// Emits code to do an array operation.
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
+#endif
+VOID StubLinkerCPU::EmitArrayOpStub(const ArrayOpScript* pArrayOpScript)
+{
+ STANDARD_VM_CONTRACT;
+
+ // This is the offset to the parameters/what's already pushed on the stack:
+ // return address.
+ const INT locsize = sizeof(void*);
+
+ // ArrayOpScript's stack offsets are built using ArgIterator, which
+ // assumes a TransitionBlock has been pushed, which is not the case
+ // here. rsp + ofsadjust should point at the first argument. Any further
+ // stack modifications below need to adjust ofsadjust appropriately.
+ // baseofsadjust needs to be the stack adjustment at the entry point -
+ // this is used further below to compute how much stack space was used.
+
+ INT ofsadjust = locsize - (INT)sizeof(TransitionBlock);
+
+ // Register usage
+ //
+ // x86 AMD64
+ // Inputs:
+ // managed array THIS_kREG (ecx) THIS_kREG (rcx)
+ // index 0 edx rdx
+ // index 1/value <stack> r8
+ // index 2/value <stack> r9
+ // expected element type for LOADADDR eax rax rdx
+ // Working registers:
+ // total (accumulates unscaled offset) edi r10
+ // factor (accumulates the slice factor) esi r11
+ X86Reg kArrayRefReg = THIS_kREG;
+#ifdef _TARGET_AMD64_
+ const X86Reg kArrayMTReg = kR10;
+ const X86Reg kTotalReg = kR10;
+ const X86Reg kFactorReg = kR11;
+#else
+ const X86Reg kArrayMTReg = kESI;
+ const X86Reg kTotalReg = kEDI;
+ const X86Reg kFactorReg = kESI;
+#endif
+
+#ifdef _TARGET_AMD64_
+ // Simplifying assumption for fNeedPrologue.
+ _ASSERTE(!pArrayOpScript->m_gcDesc || (pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER));
+ // Simplifying assumption for saving rsi and rdi.
+ _ASSERTE(!(pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER) || ArgIterator::IsArgPassedByRef(pArrayOpScript->m_elemsize));
+
+ // Cases where we need to make calls
+ BOOL fNeedScratchArea = ( (pArrayOpScript->m_flags & (ArrayOpScript::NEEDSTYPECHECK | ArrayOpScript::NEEDSWRITEBARRIER))
+ && ( pArrayOpScript->m_op == ArrayOpScript::STORE
+ || ( pArrayOpScript->m_op == ArrayOpScript::LOAD
+ && (pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER))));
+
+ // Cases where we need to copy large values
+ BOOL fNeedRSIRDI = ( ArgIterator::IsArgPassedByRef(pArrayOpScript->m_elemsize)
+ && ArrayOpScript::LOADADDR != pArrayOpScript->m_op);
+
+ BOOL fNeedPrologue = ( fNeedScratchArea
+ || fNeedRSIRDI);
+#endif
+
+ X86Reg kValueReg;
+
+ CodeLabel *Epilog = NewCodeLabel();
+ CodeLabel *Inner_nullexception = NewCodeLabel();
+ CodeLabel *Inner_rangeexception = NewCodeLabel();
+ CodeLabel *Inner_typeMismatchexception = NULL;
+
+ //
+ // Set up the stack frame.
+ //
+ //
+ // x86:
+ // value
+ // <index n-1>
+ // ...
+ // <index 1>
+ // return address
+ // saved edi
+ // esp -> saved esi
+ //
+ //
+ // AMD64:
+ // value, if rank > 2
+ // ...
+ // + 0x48 more indices
+ // + 0x40 r9 home
+ // + 0x38 r8 home
+ // + 0x30 rdx home
+ // + 0x28 rcx home
+ // + 0x20 return address
+ // + 0x18 scratch area (callee's r9)
+ // + 0x10 scratch area (callee's r8)
+ // + 8 scratch area (callee's rdx)
+ // rsp -> scratch area (callee's rcx)
+ //
+ // If the element type is a value class w/ object references, then rsi
+ // and rdi will also be saved above the scratch area:
+ //
+ // ...
+ // + 0x28 saved rsi
+ // + 0x20 saved rdi
+ // + 0x18 scratch area (callee's r9)
+ // + 0x10 scratch area (callee's r8)
+ // + 8 scratch area (callee's rdx)
+ // rsp -> scratch area (callee's rcx)
+ //
+ // And if no call or movsb is necessary, then the scratch area sits
+ // directly under the MethodDesc*.
+
+ BOOL fSavedESI = FALSE;
+ BOOL fSavedEDI = FALSE;
+
+#ifdef _TARGET_AMD64_
+ if (fNeedPrologue)
+ {
+ // Save argument registers if we'll be making a call before using
+ // them. Note that in this case the element value will always be an
+ // object type, and never be in an xmm register.
+
+ if ( (pArrayOpScript->m_flags & ArrayOpScript::NEEDSTYPECHECK)
+ && ArrayOpScript::STORE == pArrayOpScript->m_op)
+ {
+ // mov [rsp+0x08], rcx
+ X86EmitEspOffset(0x89, kRCX, 0x08);
+ X86EmitEspOffset(0x89, kRDX, 0x10);
+ X86EmitEspOffset(0x89, kR8, 0x18);
+
+ if (pArrayOpScript->m_rank >= 2)
+ X86EmitEspOffset(0x89, kR9, 0x20);
+ }
+
+ if (fNeedRSIRDI)
+ {
+ X86EmitPushReg(kRSI);
+ X86EmitPushReg(kRDI);
+
+ fSavedESI = fSavedEDI = TRUE;
+
+ ofsadjust += 0x10;
+ }
+
+ if (fNeedScratchArea)
+ {
+ // Callee scratch area (0x8 for aligned esp)
+ X86EmitSubEsp(sizeof(ArgumentRegisters) + 0x8);
+ ofsadjust += sizeof(ArgumentRegisters) + 0x8;
+ }
+ }
+#else
+ // Preserve the callee-saved registers
+ // NOTE: if you change the sequence of these pushes, you must also update:
+ // ArrayOpStubNullException
+ // ArrayOpStubRangeException
+ // ArrayOpStubTypeMismatchException
+ _ASSERTE( kTotalReg == kEDI);
+ X86EmitPushReg(kTotalReg);
+ _ASSERTE( kFactorReg == kESI);
+ X86EmitPushReg(kFactorReg);
+
+ fSavedESI = fSavedEDI = TRUE;
+
+ ofsadjust += 2*sizeof(void*);
+#endif
+
+ // Check for null.
+ X86EmitR2ROp(0x85, kArrayRefReg, kArrayRefReg); // TEST ECX, ECX
+ X86EmitCondJump(Inner_nullexception, X86CondCode::kJZ); // jz Inner_nullexception
+
+ // Do Type Check if needed
+ if (pArrayOpScript->m_flags & ArrayOpScript::NEEDSTYPECHECK)
+ {
+ if (pArrayOpScript->m_op == ArrayOpScript::STORE)
+ {
+ // Get the value to be stored.
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kEAX, ofsadjust);
+
+ X86EmitR2ROp(0x85, kValueReg, kValueReg); // TEST kValueReg, kValueReg
+ CodeLabel *CheckPassed = NewCodeLabel();
+ X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // storing NULL is OK
+
+ // mov EAX, element type ; possibly trashes kValueReg
+ X86EmitOp(0x8b, kArrayMTReg, kArrayRefReg, 0 AMD64_ARG(k64BitOp)); // mov ESI/R10, [kArrayRefReg]
+
+ X86EmitOp(0x8b, kEAX, kValueReg, 0 AMD64_ARG(k64BitOp)); // mov EAX, [kValueReg] ; possibly trashes kValueReg
+ // cmp EAX, [ESI/R10+m_ElementType]
+
+ X86EmitOp(0x3b, kEAX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
+ X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // Exact match is OK
+
+ X86EmitRegLoad(kEAX, (UINT_PTR)g_pObjectClass); // mov EAX, g_pObjectMethodTable
+ // cmp EAX, [ESI/R10+m_ElementType]
+
+ X86EmitOp(0x3b, kEAX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
+ X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // Assigning to array of object is OK
+
+ // Try to call the fast helper first ( ObjIsInstanceOfNoGC ).
+ // If that fails we will fall back to calling the slow helper ( ArrayStoreCheck ) that erects a frame.
+ // See also JitInterfaceX86::JIT_Stelem_Ref
+
+#ifdef _TARGET_AMD64_
+ // RCX contains pointer to object to check (Object*)
+ // RDX contains array type handle
+
+ // mov RCX, [rsp+offsetToObject] ; RCX = Object*
+ X86EmitEspOffset(0x8b, kRCX, ofsadjust + pArrayOpScript->m_fValLoc);
+
+ // get Array TypeHandle
+ // mov RDX, [RSP+offsetOfTypeHandle]
+
+ X86EmitEspOffset(0x8b, kRDX, ofsadjust
+ + TransitionBlock::GetOffsetOfArgumentRegisters()
+ + FIELD_OFFSET(ArgumentRegisters, THIS_REG));
+
+ // mov RDX, [kArrayMTReg+offsetof(MethodTable, m_ElementType)]
+ X86EmitIndexRegLoad(kRDX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle());
+
+#else
+ X86EmitPushReg(kEDX); // Save EDX
+ X86EmitPushReg(kECX); // Pass array object
+
+ X86EmitIndexPush(kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle()); // push [kArrayMTReg + m_ElementType] ; Array element type handle
+
+ // get address of value to store
+ _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register
+ X86EmitSPIndexPush(pArrayOpScript->m_fValLoc + ofsadjust + 3*sizeof(void*)); // push [ESP+offset] ; the object pointer
+
+#endif //_AMD64
+
+
+ // emit a call to the fast helper
+ // One side effect of this is that we are going to generate a "jnz Epilog" and we DON'T need it
+ // in the fast path, however there are no side effects in emitting
+ // it in the fast path anyway. the reason for that is that it makes
+ // the cleanup code much easier ( we have only 1 place to cleanup the stack and
+ // restore it to the original state )
+ X86EmitCall(NewExternalCodeLabel((LPVOID)ObjIsInstanceOfNoGC), 0);
+ X86EmitCmpRegImm32( kEAX, TypeHandle::CanCast); // CMP EAX, CanCast ; if ObjIsInstanceOfNoGC returns CanCast, we will go the fast path
+ CodeLabel * Cleanup = NewCodeLabel();
+ X86EmitCondJump(Cleanup, X86CondCode::kJZ);
+
+#ifdef _TARGET_AMD64_
+ // get address of value to store
+ // lea rcx, [rsp+offs]
+ X86EmitEspOffset(0x8d, kRCX, ofsadjust + pArrayOpScript->m_fValLoc);
+
+ // get address of 'this'/rcx
+ // lea rdx, [rsp+offs]
+ X86EmitEspOffset(0x8d, kRDX, ofsadjust
+ + TransitionBlock::GetOffsetOfArgumentRegisters()
+ + FIELD_OFFSET(ArgumentRegisters, THIS_REG));
+
+#else
+ // The stack is already setup correctly for the slow helper.
+ _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register
+ X86EmitEspOffset(0x8d, kECX, pArrayOpScript->m_fValLoc + ofsadjust + 2*sizeof(void*)); // lea ECX, [ESP+offset]
+
+ // get address of 'this'
+ X86EmitEspOffset(0x8d, kEDX, 0); // lea EDX, [ESP] ; (address of ECX)
+
+
+#endif
+ AMD64_ONLY(_ASSERTE(fNeedScratchArea));
+ X86EmitCall(NewExternalCodeLabel((LPVOID)ArrayStoreCheck), 0);
+
+ EmitLabel(Cleanup);
+#ifdef _TARGET_AMD64_
+ X86EmitEspOffset(0x8b, kRCX, 0x00 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
+ X86EmitEspOffset(0x8b, kRDX, 0x08 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
+ X86EmitEspOffset(0x8b, kR8, 0x10 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
+
+ if (pArrayOpScript->m_rank >= 2)
+ X86EmitEspOffset(0x8b, kR9, 0x18 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
+#else
+ X86EmitPopReg(kECX); // restore regs
+ X86EmitPopReg(kEDX);
+
+
+ X86EmitR2ROp(0x3B, kEAX, kEAX); // CMP EAX, EAX
+ X86EmitCondJump(Epilog, X86CondCode::kJNZ); // This branch never taken, but epilog walker uses it
+#endif
+
+ EmitLabel(CheckPassed);
+ }
+ else
+ {
+ _ASSERTE(pArrayOpScript->m_op == ArrayOpScript::LOADADDR);
+
+ // Load up the hidden type parameter into 'typeReg'
+ X86Reg typeReg = LoadArrayOpArg(pArrayOpScript->m_typeParamOffs, this, kEAX, ofsadjust);
+
+ // 'typeReg' holds the typeHandle for the ARRAY. This must be a ArrayTypeDesc*, so
+ // mask off the low two bits to get the TypeDesc*
+ X86EmitR2ROp(0x83, (X86Reg)4, typeReg); // AND typeReg, 0xFFFFFFFC
+ Emit8(0xFC);
+
+ // If 'typeReg' is NULL then we're executing the readonly ::Address and no type check is
+ // needed.
+ CodeLabel *Inner_passedTypeCheck = NewCodeLabel();
+
+ X86EmitCondJump(Inner_passedTypeCheck, X86CondCode::kJZ);
+
+ // Get the parameter of the parameterize type
+ // mov typeReg, [typeReg.m_Arg]
+ X86EmitOp(0x8b, typeReg, typeReg, offsetof(ParamTypeDesc, m_Arg) AMD64_ARG(k64BitOp));
+
+ // Compare this against the element type of the array.
+ // mov ESI/R10, [kArrayRefReg]
+ X86EmitOp(0x8b, kArrayMTReg, kArrayRefReg, 0 AMD64_ARG(k64BitOp));
+ // cmp typeReg, [ESI/R10+m_ElementType];
+ X86EmitOp(0x3b, typeReg, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
+
+ // Throw error if not equal
+ Inner_typeMismatchexception = NewCodeLabel();
+ X86EmitCondJump(Inner_typeMismatchexception, X86CondCode::kJNZ);
+ EmitLabel(Inner_passedTypeCheck);
+ }
+ }
+
+ CodeLabel* DoneCheckLabel = 0;
+ if (pArrayOpScript->m_rank == 1 && pArrayOpScript->m_fHasLowerBounds)
+ {
+ DoneCheckLabel = NewCodeLabel();
+ CodeLabel* NotSZArrayLabel = NewCodeLabel();
+
+ // for rank1 arrays, we might actually have two different layouts depending on
+ // if we are ELEMENT_TYPE_ARRAY or ELEMENT_TYPE_SZARRAY.
+
+ // mov EAX, [ARRAY] // EAX holds the method table
+ X86_64BitOperands();
+ X86EmitOp(0x8b, kEAX, kArrayRefReg);
+
+ // test [EAX + m_dwFlags], enum_flag_Category_IfArrayThenSzArray
+ X86_64BitOperands();
+ X86EmitOffsetModRM(0xf7, (X86Reg)0, kEAX, MethodTable::GetOffsetOfFlags());
+ Emit32(MethodTable::GetIfArrayThenSzArrayFlag());
+
+ // jz NotSZArrayLabel
+ X86EmitCondJump(NotSZArrayLabel, X86CondCode::kJZ);
+
+ //Load the passed-in index into the scratch register.
+ const ArrayOpIndexSpec *pai = pArrayOpScript->GetArrayOpIndexSpecs();
+ X86Reg idxReg = LoadArrayOpArg(pai->m_idxloc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
+
+ // cmp idxReg, [kArrayRefReg + LENGTH]
+ X86EmitOp(0x3b, idxReg, kArrayRefReg, ArrayBase::GetOffsetOfNumComponents());
+
+ // jae Inner_rangeexception
+ X86EmitCondJump(Inner_rangeexception, X86CondCode::kJAE);
+
+ // <TODO> if we cared efficiency of this, this move can be optimized</TODO>
+ X86EmitR2ROp(0x8b, kTotalReg, idxReg AMD64_ARG(k32BitOp));
+
+ // sub ARRAY. 8 // 8 is accounts for the Lower bound and Dim count in the ARRAY
+ X86EmitSubReg(kArrayRefReg, 8); // adjust this pointer so that indexing works out for SZARRAY
+
+ X86EmitNearJump(DoneCheckLabel);
+ EmitLabel(NotSZArrayLabel);
+ }
+
+ // For each index, range-check and mix into accumulated total.
+ UINT idx = pArrayOpScript->m_rank;
+ BOOL firstTime = TRUE;
+ while (idx--)
+ {
+ const ArrayOpIndexSpec *pai = pArrayOpScript->GetArrayOpIndexSpecs() + idx;
+
+ //Load the passed-in index into the scratch register.
+ X86Reg srcreg = LoadArrayOpArg(pai->m_idxloc, this, SCRATCH_REGISTER_X86REG, ofsadjust AMD64_ARG(k32BitOp));
+ if (SCRATCH_REGISTER_X86REG != srcreg)
+ X86EmitR2ROp(0x8b, SCRATCH_REGISTER_X86REG, srcreg AMD64_ARG(k32BitOp));
+
+ // sub SCRATCH, dword ptr [kArrayRefReg + LOWERBOUND]
+ if (pArrayOpScript->m_fHasLowerBounds)
+ {
+ X86EmitOp(0x2b, SCRATCH_REGISTER_X86REG, kArrayRefReg, pai->m_lboundofs);
+ }
+
+ // cmp SCRATCH, dword ptr [kArrayRefReg + LENGTH]
+ X86EmitOp(0x3b, SCRATCH_REGISTER_X86REG, kArrayRefReg, pai->m_lengthofs);
+
+ // jae Inner_rangeexception
+ X86EmitCondJump(Inner_rangeexception, X86CondCode::kJAE);
+
+
+ // SCRATCH == idx - LOWERBOUND
+ //
+ // imul SCRATCH, FACTOR
+ if (!firstTime)
+ {
+ //Can skip the first time since FACTOR==1
+ X86EmitR2ROp(0xaf0f, SCRATCH_REGISTER_X86REG, kFactorReg AMD64_ARG(k32BitOp));
+ }
+
+ // TOTAL += SCRATCH
+ if (firstTime)
+ {
+ // First time, we must zero-init TOTAL. Since
+ // zero-initing and then adding is just equivalent to a
+ // "mov", emit a "mov"
+ // mov TOTAL, SCRATCH
+ X86EmitR2ROp(0x8b, kTotalReg, SCRATCH_REGISTER_X86REG AMD64_ARG(k32BitOp));
+ }
+ else
+ {
+ // add TOTAL, SCRATCH
+ X86EmitR2ROp(0x03, kTotalReg, SCRATCH_REGISTER_X86REG AMD64_ARG(k32BitOp));
+ }
+
+ // FACTOR *= [kArrayRefReg + LENGTH]
+ if (idx != 0)
+ {
+ // No need to update FACTOR on the last iteration
+ // since we won't use it again
+
+ if (firstTime)
+ {
+ // must init FACTOR to 1 first: hence,
+ // the "imul" becomes a "mov"
+ // mov FACTOR, [kArrayRefReg + LENGTH]
+ X86EmitOp(0x8b, kFactorReg, kArrayRefReg, pai->m_lengthofs);
+ }
+ else
+ {
+ // imul FACTOR, [kArrayRefReg + LENGTH]
+ X86EmitOp(0xaf0f, kFactorReg, kArrayRefReg, pai->m_lengthofs);
+ }
+ }
+
+ firstTime = FALSE;
+ }
+
+ if (DoneCheckLabel != 0)
+ EmitLabel(DoneCheckLabel);
+
+ // Pass these values to X86EmitArrayOp() to generate the element address.
+ X86Reg elemBaseReg = kArrayRefReg;
+ X86Reg elemScaledReg = kTotalReg;
+ UINT32 elemSize = pArrayOpScript->m_elemsize;
+ UINT32 elemOfs = pArrayOpScript->m_ofsoffirst;
+
+ if (!(elemSize == 1 || elemSize == 2 || elemSize == 4 || elemSize == 8))
+ {
+ switch (elemSize)
+ {
+ // No way to express this as a SIB byte. Fold the scale
+ // into TOTAL.
+
+ case 16:
+ // shl TOTAL,4
+ X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
+ Emit8(4);
+ break;
+
+ case 32:
+ // shl TOTAL,5
+ X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
+ Emit8(5);
+ break;
+
+ case 64:
+ // shl TOTAL,6
+ X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
+ Emit8(6);
+ break;
+
+ default:
+ // imul TOTAL, elemScale
+ X86EmitR2ROp(0x69, kTotalReg, kTotalReg AMD64_ARG(k32BitOp));
+ Emit32(elemSize);
+ break;
+ }
+ elemSize = 1;
+ }
+
+ _ASSERTE(FitsInU1(elemSize));
+ BYTE elemScale = static_cast<BYTE>(elemSize);
+
+ // Now, do the operation:
+
+ switch (pArrayOpScript->m_op)
+ {
+ case ArrayOpScript::LOADADDR:
+ // lea eax, ELEMADDR
+ X86EmitOp(0x8d, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
+ break;
+
+ case ArrayOpScript::LOAD:
+ if (pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER)
+ {
+ // Ensure that these registers have been saved!
+ _ASSERTE(fSavedESI && fSavedEDI);
+
+ //lea esi, ELEMADDR
+ X86EmitOp(0x8d, kESI, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
+
+ _ASSERTE(!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fRetBufLoc));
+ // mov edi, retbufptr
+ X86EmitR2ROp(0x8b, kEDI, GetX86ArgumentRegisterFromOffset(pArrayOpScript->m_fRetBufLoc - TransitionBlock::GetOffsetOfArgumentRegisters()));
+
+COPY_VALUE_CLASS:
+ {
+ size_t size = pArrayOpScript->m_elemsize;
+ size_t total = 0;
+ if(pArrayOpScript->m_gcDesc)
+ {
+ CGCDescSeries* cur = pArrayOpScript->m_gcDesc->GetHighestSeries();
+ if ((cur->startoffset-elemOfs) > 0)
+ generate_noref_copy ((unsigned) (cur->startoffset - elemOfs), this);
+ total += cur->startoffset - elemOfs;
+
+ SSIZE_T cnt = (SSIZE_T) pArrayOpScript->m_gcDesc->GetNumSeries();
+ // special array encoding
+ _ASSERTE(cnt < 0);
+
+ for (SSIZE_T __i = 0; __i > cnt; __i--)
+ {
+ HALF_SIZE_T skip = cur->val_serie[__i].skip;
+ HALF_SIZE_T nptrs = cur->val_serie[__i].nptrs;
+ total += nptrs*sizeof (DWORD*);
+ do
+ {
+ AMD64_ONLY(_ASSERTE(fNeedScratchArea));
+
+ X86EmitCall(NewExternalCodeLabel((LPVOID) JIT_ByRefWriteBarrier), 0);
+ } while (--nptrs);
+ if (skip > 0)
+ {
+ //check if we are at the end of the series
+ if (__i == (cnt + 1))
+ skip = skip - (HALF_SIZE_T)(cur->startoffset - elemOfs);
+ if (skip > 0)
+ generate_noref_copy (skip, this);
+ }
+ total += skip;
+ }
+
+ _ASSERTE (size == total);
+ }
+ else
+ {
+ // no ref anywhere, just copy the bytes.
+ _ASSERTE (size);
+ generate_noref_copy ((unsigned)size, this);
+ }
+ }
+ }
+ else
+ {
+ switch (pArrayOpScript->m_elemsize)
+ {
+ case 1:
+ // mov[zs]x eax, byte ptr ELEMADDR
+ X86EmitOp(pArrayOpScript->m_signed ? 0xbe0f : 0xb60f, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ break;
+
+ case 2:
+ // mov[zs]x eax, word ptr ELEMADDR
+ X86EmitOp(pArrayOpScript->m_signed ? 0xbf0f : 0xb70f, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ break;
+
+ case 4:
+ if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
+ {
+#ifdef _TARGET_AMD64_
+ // movss xmm0, dword ptr ELEMADDR
+ Emit8(0xf3);
+ X86EmitOp(0x100f, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+#else // !_TARGET_AMD64_
+ // fld dword ptr ELEMADDR
+ X86EmitOp(0xd9, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+#endif // !_TARGET_AMD64_
+ }
+ else
+ {
+ // mov eax, ELEMADDR
+ X86EmitOp(0x8b, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ }
+ break;
+
+ case 8:
+ if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
+ {
+#ifdef _TARGET_AMD64_
+ // movsd xmm0, qword ptr ELEMADDR
+ Emit8(0xf2);
+ X86EmitOp(0x100f, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+#else // !_TARGET_AMD64_
+ // fld qword ptr ELEMADDR
+ X86EmitOp(0xdd, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+#endif // !_TARGET_AMD64_
+ }
+ else
+ {
+ // mov eax, ELEMADDR
+ X86EmitOp(0x8b, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
+#ifdef _TARGET_X86_
+ // mov edx, ELEMADDR + 4
+ X86EmitOp(0x8b, kEDX, elemBaseReg, elemOfs + 4, elemScaledReg, elemScale);
+#endif
+ }
+ break;
+
+ default:
+ _ASSERTE(0);
+ }
+ }
+
+ break;
+
+ case ArrayOpScript::STORE:
+
+ switch (pArrayOpScript->m_elemsize)
+ {
+ case 1:
+ // mov SCRATCH, [esp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
+ // mov byte ptr ELEMADDR, SCRATCH.b
+ X86EmitOp(0x88, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ break;
+ case 2:
+ // mov SCRATCH, [esp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
+ // mov word ptr ELEMADDR, SCRATCH.w
+ Emit8(0x66);
+ X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ break;
+ case 4:
+#ifndef _TARGET_AMD64_
+ if (pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER)
+ {
+ // mov SCRATCH, [esp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
+
+ _ASSERTE(SCRATCH_REGISTER_X86REG == kEAX); // value to store is already in EAX where we want it.
+ // lea edx, ELEMADDR
+ X86EmitOp(0x8d, kEDX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+
+ // call JIT_Writeable_Thunks_Buf.WriteBarrierReg[0] (== EAX)
+ X86EmitCall(NewExternalCodeLabel((LPVOID) &JIT_WriteBarrierEAX), 0);
+ }
+ else
+#else // _TARGET_AMD64_
+ if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
+ {
+ if (!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc))
+ {
+ kValueReg = (X86Reg)TransitionBlock::GetArgumentIndexFromOffset(pArrayOpScript->m_fValLoc);
+ }
+ else
+ {
+ kValueReg = (X86Reg)0; // xmm0
+
+ // movss xmm0, dword ptr [rsp+??]
+ Emit8(0xf3);
+ X86EmitOp(0x100f, kValueReg, (X86Reg)4 /*rsp*/, ofsadjust + pArrayOpScript->m_fValLoc);
+ }
+
+ // movss dword ptr ELEMADDR, xmm?
+ Emit8(0xf3);
+ X86EmitOp(0x110f, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ }
+ else
+#endif // _TARGET_AMD64_
+ {
+ // mov SCRATCH, [esp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust AMD64_ARG(k32BitOp));
+
+ // mov ELEMADDR, SCRATCH
+ X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ }
+ break;
+
+ case 8:
+
+ if (!(pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER))
+ {
+#ifdef _TARGET_AMD64_
+ if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
+ {
+ if (!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc))
+ {
+ kValueReg = (X86Reg)TransitionBlock::GetArgumentIndexFromOffset(pArrayOpScript->m_fValLoc);
+ }
+ else
+ {
+ kValueReg = (X86Reg)0; // xmm0
+
+ // movsd xmm0, qword ptr [rsp+??]
+ Emit8(0xf2);
+ X86EmitOp(0x100f, kValueReg, (X86Reg)4 /*rsp*/, ofsadjust + pArrayOpScript->m_fValLoc);
+ }
+
+ // movsd qword ptr ELEMADDR, xmm?
+ Emit8(0xf2);
+ X86EmitOp(0x110f, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+ }
+ else
+ {
+ // mov SCRATCH, [esp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
+
+ // mov ELEMADDR, SCRATCH
+ X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale, k64BitOp);
+ }
+#else // !_TARGET_AMD64_
+ _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register: so too lazy to implement that case
+ // mov SCRATCH, [esp + valoffset]
+ X86EmitEspOffset(0x8b, SCRATCH_REGISTER_X86REG, pArrayOpScript->m_fValLoc + ofsadjust);
+ // mov ELEMADDR, SCRATCH
+ X86EmitOp(0x89, SCRATCH_REGISTER_X86REG, elemBaseReg, elemOfs, elemScaledReg, elemScale);
+
+ _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register: so too lazy to implement that case
+ // mov SCRATCH, [esp + valoffset + 4]
+ X86EmitEspOffset(0x8b, SCRATCH_REGISTER_X86REG, pArrayOpScript->m_fValLoc + ofsadjust + 4);
+ // mov ELEMADDR+4, SCRATCH
+ X86EmitOp(0x89, SCRATCH_REGISTER_X86REG, elemBaseReg, elemOfs+4, elemScaledReg, elemScale);
+#endif // !_TARGET_AMD64_
+ break;
+ }
+#ifdef _TARGET_AMD64_
+ else
+ {
+ _ASSERTE(SCRATCH_REGISTER_X86REG == kEAX); // value to store is already in EAX where we want it.
+ // lea rcx, ELEMADDR
+ X86EmitOp(0x8d, kRCX, elemBaseReg, elemOfs, elemScaledReg, elemScale, k64BitOp);
+
+ // mov rdx, [rsp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kRDX, ofsadjust);
+ _ASSERT(kRCX != kValueReg);
+ if (kRDX != kValueReg)
+ X86EmitR2ROp(0x8b, kRDX, kValueReg);
+
+ _ASSERTE(fNeedScratchArea);
+ X86EmitCall(NewExternalCodeLabel((PVOID)JIT_WriteBarrier), 0);
+ break;
+ }
+#endif // _TARGET_AMD64_
+ // FALL THROUGH (on x86)
+ default:
+ // Ensure that these registers have been saved!
+ _ASSERTE(fSavedESI && fSavedEDI);
+
+#ifdef _TARGET_AMD64_
+ // mov rsi, [rsp + valoffset]
+ kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kRSI, ofsadjust);
+ if (kRSI != kValueReg)
+ X86EmitR2ROp(0x8b, kRSI, kValueReg);
+#else // !_TARGET_AMD64_
+ _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc));
+ // lea esi, [esp + valoffset]
+ X86EmitEspOffset(0x8d, kESI, pArrayOpScript->m_fValLoc + ofsadjust);
+#endif // !_TARGET_AMD64_
+
+ // lea edi, ELEMADDR
+ X86EmitOp(0x8d, kEDI, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
+ goto COPY_VALUE_CLASS;
+ }
+ break;
+
+ default:
+ _ASSERTE(0);
+ }
+
+ EmitLabel(Epilog);
+
+#ifdef _TARGET_AMD64_
+ if (fNeedPrologue)
+ {
+ if (fNeedScratchArea)
+ {
+ // Throw away scratch area
+ X86EmitAddEsp(sizeof(ArgumentRegisters) + 0x8);
+ }
+
+ if (fSavedEDI)
+ X86EmitPopReg(kRDI);
+
+ if (fSavedESI)
+ X86EmitPopReg(kRSI);
+ }
+
+ X86EmitReturn(0);
+#else // !_TARGET_AMD64_
+ // Restore the callee-saved registers
+ X86EmitPopReg(kFactorReg);
+ X86EmitPopReg(kTotalReg);
+
+ // ret N
+ X86EmitReturn(pArrayOpScript->m_cbretpop);
+#endif // !_TARGET_AMD64_
+
+ // Exception points must clean up the stack for all those extra args.
+ // kFactorReg and kTotalReg will be popped by the jump targets.
+
+ void *pvExceptionThrowFn;
+
+#if defined(_TARGET_AMD64_)
+#define ARRAYOP_EXCEPTION_HELPERS(base) { (PVOID)base, (PVOID)base##_RSIRDI, (PVOID)base##_ScratchArea, (PVOID)base##_RSIRDI_ScratchArea }
+ static void *rgNullExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubNullException);
+ static void *rgRangeExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubRangeException);
+ static void *rgTypeMismatchExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubTypeMismatchException);
+#undef ARRAYOP_EXCEPTION_HELPERS
+
+ UINT iExceptionHelper = (fNeedRSIRDI ? 1 : 0) + (fNeedScratchArea ? 2 : 0);
+#endif // defined(_TARGET_AMD64_)
+
+ EmitLabel(Inner_nullexception);
+
+#ifndef _TARGET_AMD64_
+ pvExceptionThrowFn = (LPVOID)ArrayOpStubNullException;
+
+ Emit8(0xb8); // mov EAX, <stack cleanup>
+ Emit32(pArrayOpScript->m_cbretpop);
+#else //_TARGET_AMD64_
+ pvExceptionThrowFn = rgNullExceptionHelpers[iExceptionHelper];
+#endif //!_TARGET_AMD64_
+ X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
+
+ EmitLabel(Inner_rangeexception);
+#ifndef _TARGET_AMD64_
+ pvExceptionThrowFn = (LPVOID)ArrayOpStubRangeException;
+ Emit8(0xb8); // mov EAX, <stack cleanup>
+ Emit32(pArrayOpScript->m_cbretpop);
+#else //_TARGET_AMD64_
+ pvExceptionThrowFn = rgRangeExceptionHelpers[iExceptionHelper];
+#endif //!_TARGET_AMD64_
+ X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
+
+ if (Inner_typeMismatchexception != NULL)
+ {
+ EmitLabel(Inner_typeMismatchexception);
+#ifndef _TARGET_AMD64_
+ pvExceptionThrowFn = (LPVOID)ArrayOpStubTypeMismatchException;
+ Emit8(0xb8); // mov EAX, <stack cleanup>
+ Emit32(pArrayOpScript->m_cbretpop);
+#else //_TARGET_AMD64_
+ pvExceptionThrowFn = rgTypeMismatchExceptionHelpers[iExceptionHelper];
+#endif //!_TARGET_AMD64_
+ X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
+ }
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#endif // FEATURE_ARRAYSTUB_AS_IL
+
+//===========================================================================
+// Emits code to break into debugger
+VOID StubLinkerCPU::EmitDebugBreak()
+{
+ STANDARD_VM_CONTRACT;
+
+ // int3
+ Emit8(0xCC);
+}
+
+#if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning (disable : 4740) // There is inline asm code in this function, which disables
+ // global optimizations.
+#pragma warning (disable : 4731)
+#endif // _MSC_VER
+Thread* __stdcall CreateThreadBlockReturnHr(ComMethodFrame *pFrame)
+{
+
+ WRAPPER_NO_CONTRACT;
+
+ Thread *pThread = NULL;
+
+ HRESULT hr = S_OK;
+
+ // This means that a thread is FIRST coming in from outside the EE.
+ BEGIN_ENTRYPOINT_THROWS;
+ pThread = SetupThreadNoThrow(&hr);
+ END_ENTRYPOINT_THROWS;
+
+ if (pThread == NULL) {
+ // Unwind stack, and return hr
+ // NOTE: assumes __stdcall
+ // Note that this code does not handle the rare COM signatures that do not return HRESULT
+ // compute the callee pop stack bytes
+ UINT numArgStackBytes = pFrame->GetNumCallerStackBytes();
+ unsigned frameSize = sizeof(Frame) + sizeof(LPVOID);
+ LPBYTE iEsp = ((LPBYTE)pFrame) + ComMethodFrame::GetOffsetOfCalleeSavedRegisters();
+ __asm
+ {
+ mov eax, hr
+ mov edx, numArgStackBytes
+ //*****************************************
+ // reset the stack pointer
+ // none of the locals above can be used in the asm below
+ // if we wack the stack pointer
+ mov esp, iEsp
+ // pop callee saved registers
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ pop ecx ; //return address
+ // pop the callee cleanup stack args
+ add esp, edx ;// callee cleanup of args
+ jmp ecx; // jump to the address to continue execution
+
+ // We will never get here. This "ret" is just so that code-disassembling
+ // profilers know to stop disassembling any further
+ ret
+ }
+ }
+
+ return pThread;
+}
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+#endif // defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
+
+#endif // !defined(CROSSGEN_COMPILE) && !defined(FEATURE_STUBS_AS_IL)
+
+#endif // !DACCESS_COMPILE
+
+
+#ifdef _TARGET_AMD64_
+
+//
+// TailCallFrame Object Scanning
+//
+// This handles scanning/promotion of GC objects that were
+// protected by the TailCallHelper routine. Note that the objects
+// being protected is somewhat dynamic and is dependent upon the
+// the callee...
+//
+
+void TailCallFrame::GcScanRoots(promote_func *fn, ScanContext* sc)
+{
+ WRAPPER_NO_CONTRACT;
+
+ if (m_pGCLayout != NULL)
+ {
+ struct FrameOffsetDecoder {
+ private:
+ TADDR prevOffset;
+ TADDR rangeEnd;
+ BOOL maybeInterior;
+ BOOL atEnd;
+ PTR_SBYTE pbOffsets;
+
+ DWORD ReadNumber() {
+ signed char i;
+ DWORD offset = 0;
+ while ((i = *pbOffsets++) >= 0)
+ {
+ offset = (offset << 7) | i;
+ }
+ offset = (offset << 7) | (i & 0x7F);
+ return offset;
+ }
+
+ public:
+ FrameOffsetDecoder(PTR_GSCookie _base, TADDR offsets)
+ : prevOffset(dac_cast<TADDR>(_base)), rangeEnd(~0LL), atEnd(FALSE), pbOffsets(dac_cast<PTR_SBYTE>(offsets)) { maybeInterior = FALSE;}
+
+ bool MoveNext() {
+ LIMITED_METHOD_CONTRACT;
+
+ if (rangeEnd < prevOffset)
+ {
+ prevOffset -= sizeof(void*);
+ return true;
+ }
+ if (atEnd) return false;
+ DWORD offset = ReadNumber();
+ atEnd = (offset & 1);
+ BOOL range = (offset & 2);
+ maybeInterior = (offset & 0x80000000);
+
+ offset &= 0x7FFFFFFC;
+
+#ifdef _WIN64
+ offset <<= 1;
+#endif
+ offset += sizeof(void*);
+ _ASSERTE(prevOffset > offset);
+ prevOffset -= offset;
+
+ if (range)
+ {
+ _ASSERTE(!atEnd);
+ _ASSERTE(!maybeInterior);
+ DWORD offsetEnd = ReadNumber();
+ atEnd = (offsetEnd & 1);
+ offsetEnd = (offsetEnd & ~1) << 1;
+ // range encoding starts with a range of 3 (2 is better to encode as
+ // 2 offsets), so 0 == 2 (the last offset in the range)
+ offsetEnd += sizeof(void*) * 2;
+ rangeEnd = prevOffset - offsetEnd;
+ }
+
+ return true;
+ }
+
+ BOOL MaybeInterior() const { return maybeInterior; }
+
+ PTR_PTR_Object Current() const { return PTR_PTR_Object(prevOffset); }
+
+ } decoder(GetGSCookiePtr(), m_pGCLayout);
+
+ while (decoder.MoveNext())
+ {
+ PTR_PTR_Object ppRef = decoder.Current();
+
+ LOG((LF_GC, INFO3, "Tail Call Frame Promoting" FMT_ADDR "to",
+ DBG_ADDR(OBJECTREF_TO_UNCHECKED_OBJECTREF(*ppRef)) ));
+ if (decoder.MaybeInterior())
+ PromoteCarefully(fn, ppRef, sc, GC_CALL_INTERIOR|CHECK_APP_DOMAIN);
+ else
+ (*fn)(ppRef, sc, 0);
+ LOG((LF_GC, INFO3, FMT_ADDR "\n", DBG_ADDR(OBJECTREF_TO_UNCHECKED_OBJECTREF(*ppRef)) ));
+ }
+ }
+}
+
+#ifndef DACCESS_COMPILE
+static void EncodeOneGCOffset(CPUSTUBLINKER *pSl, ULONG delta, BOOL maybeInterior, BOOL range, BOOL last)
+{
+ CONTRACTL
+ {
+ THROWS; // From the stublinker
+ MODE_ANY;
+ GC_NOTRIGGER;
+ }
+ CONTRACTL_END;
+
+ // Everything should be pointer aligned
+ // but we use a high bit for interior, and the 0 bit to denote the end of the list
+ // we use the 1 bit to denote a range
+ _ASSERTE((delta % sizeof(void*)) == 0);
+
+#if defined(_WIN64)
+ // For 64-bit, we have 3 bits of alignment, so we allow larger frames
+ // by shifting and gaining a free high-bit.
+ ULONG encodedDelta = delta >> 1;
+#else
+ // For 32-bit, we just limit our frame size to <2GB. (I know, such a bummer!)
+ ULONG encodedDelta = delta;
+#endif
+ _ASSERTE((encodedDelta & 0x80000003) == 0);
+ if (last)
+ {
+ encodedDelta |= 1;
+ }
+
+ if (range)
+ {
+ encodedDelta |= 2;
+ }
+ else if (maybeInterior)
+ {
+ _ASSERTE(!range);
+ encodedDelta |= 0x80000000;
+ }
+
+ BYTE bytes[5];
+ UINT index = 5;
+ bytes[--index] = (BYTE)((encodedDelta & 0x7F) | 0x80);
+ encodedDelta >>= 7;
+ while (encodedDelta > 0)
+ {
+ bytes[--index] = (BYTE)(encodedDelta & 0x7F);
+ encodedDelta >>= 7;
+ }
+ pSl->EmitBytes(&bytes[index], 5 - index);
+}
+
+static void EncodeGCOffsets(CPUSTUBLINKER *pSl, /* const */ ULONGARRAY & gcOffsets)
+{
+ CONTRACTL
+ {
+ THROWS;
+ MODE_ANY;
+ GC_NOTRIGGER;
+ }
+ CONTRACTL_END;
+
+ _ASSERTE(gcOffsets.Count() > 0);
+
+ ULONG prevOffset = 0;
+ int i = 0;
+ BOOL last = FALSE;
+ do {
+ ULONG offset = gcOffsets[i];
+ // Everything should be pointer aligned
+ // but we use the 0-bit to mean maybeInterior, for byrefs.
+ _ASSERTE(((offset % sizeof(void*)) == 0) || ((offset % sizeof(void*)) == 1));
+ BOOL maybeInterior = (offset & 1);
+ offset &= ~1;
+
+ // Encode just deltas because they're smaller (and the list should be sorted)
+ _ASSERTE(offset >= (prevOffset + sizeof(void*)));
+ ULONG delta = offset - (prevOffset + sizeof(void*));
+ if (!maybeInterior && gcOffsets.Count() > i + 2)
+ {
+ // Check for a potential range.
+ // Only do it if we have 3 or more pointers in a row
+ ULONG rangeOffset = offset;
+ int j = i + 1;
+ do {
+ ULONG nextOffset = gcOffsets[j];
+ // interior pointers can't be in ranges
+ if (nextOffset & 1)
+ break;
+ // ranges must be saturated
+ if (nextOffset != (rangeOffset + sizeof(void*)))
+ break;
+ j++;
+ rangeOffset = nextOffset;
+ } while(j < gcOffsets.Count());
+
+ if (j > (i + 2))
+ {
+ EncodeOneGCOffset(pSl, delta, FALSE, TRUE, last);
+ i = j - 1;
+ _ASSERTE(rangeOffset >= (offset + (sizeof(void*) * 2)));
+ delta = rangeOffset - (offset + (sizeof(void*) * 2));
+ offset = rangeOffset;
+ }
+ }
+ last = (++i == gcOffsets.Count());
+
+
+ EncodeOneGCOffset(pSl, delta, maybeInterior, FALSE, last);
+
+ prevOffset = offset;
+ } while (!last);
+}
+
+static void AppendGCLayout(ULONGARRAY &gcLayout, size_t baseOffset, BOOL fIsTypedRef, TypeHandle VMClsHnd)
+{
+ STANDARD_VM_CONTRACT;
+
+ _ASSERTE((baseOffset % 16) == 0);
+ _ASSERTE(FitsInU4(baseOffset));
+
+ if (fIsTypedRef)
+ {
+ *gcLayout.AppendThrowing() = (ULONG)(baseOffset | 1); // "| 1" to mark it as an interior pointer
+ }
+ else if (!VMClsHnd.IsNativeValueType())
+ {
+ MethodTable* pMT = VMClsHnd.GetMethodTable();
+ _ASSERTE(pMT);
+ _ASSERTE(pMT->IsValueType());
+
+ // walk the GC descriptors, reporting the correct offsets
+ if (pMT->ContainsPointers())
+ {
+ // size of instance when unboxed must be adjusted for the syncblock
+ // index and the VTable pointer.
+ DWORD size = pMT->GetBaseSize();
+
+ // we don't include this term in our 'ppstop' calculation below.
+ _ASSERTE(pMT->GetComponentSize() == 0);
+
+ CGCDesc* map = CGCDesc::GetCGCDescFromMT(pMT);
+ CGCDescSeries* cur = map->GetLowestSeries();
+ CGCDescSeries* last = map->GetHighestSeries();
+
+ _ASSERTE(cur <= last);
+ do
+ {
+ // offset to embedded references in this series must be
+ // adjusted by the VTable pointer, when in the unboxed state.
+ size_t adjustOffset = cur->GetSeriesOffset() - sizeof(void *);
+
+ _ASSERTE(baseOffset >= adjustOffset);
+ size_t start = baseOffset - adjustOffset;
+ size_t stop = start - (cur->GetSeriesSize() + size);
+ for (size_t off = stop + sizeof(void*); off <= start; off += sizeof(void*))
+ {
+ _ASSERTE(gcLayout.Count() == 0 || off > gcLayout[gcLayout.Count() - 1]);
+ _ASSERTE(FitsInU4(off));
+ *gcLayout.AppendThrowing() = (ULONG)off;
+ }
+ cur++;
+
+ } while (cur <= last);
+ }
+ }
+}
+
+Stub * StubLinkerCPU::CreateTailCallCopyArgsThunk(CORINFO_SIG_INFO * pSig,
+ CorInfoHelperTailCallSpecialHandling flags)
+{
+ STANDARD_VM_CONTRACT;
+
+ CPUSTUBLINKER sl;
+ CPUSTUBLINKER* pSl = &sl;
+
+ // Generates a function that looks like this:
+ // size_t CopyArguments(va_list args, (RCX)
+ // CONTEXT *pCtx, (RDX)
+ // DWORD64 *pvStack, (R8)
+ // size_t cbStack) (R9)
+ // {
+ // if (pCtx != NULL) {
+ // foreach (arg in args) {
+ // copy into pCtx or pvStack
+ // }
+ // }
+ // return <size of stack needed>;
+ // }
+ //
+
+ CodeLabel *pNullLabel = pSl->NewCodeLabel();
+
+ // test rdx, rdx
+ pSl->X86EmitR2ROp(0x85, kRDX, kRDX);
+
+ // jz NullLabel
+ pSl->X86EmitCondJump(pNullLabel, X86CondCode::kJZ);
+
+ UINT nArgSlot = 0;
+ UINT totalArgs = pSig->totalILArgs() + ((pSig->isVarArg() || pSig->hasTypeArg()) ? 1 : 0);
+ bool fR10Loaded = false;
+ UINT cbArg;
+ static const UINT rgcbArgRegCtxtOffsets[4] = { offsetof(CONTEXT, Rcx), offsetof(CONTEXT, Rdx),
+ offsetof(CONTEXT, R8), offsetof(CONTEXT, R9) };
+ static const UINT rgcbFpArgRegCtxtOffsets[4] = { offsetof(CONTEXT, Xmm0.Low), offsetof(CONTEXT, Xmm1.Low),
+ offsetof(CONTEXT, Xmm2.Low), offsetof(CONTEXT, Xmm3.Low) };
+
+ ULONGARRAY gcLayout;
+
+ // On input to the function R9 contains the size of the buffer
+ // The first time this macro runs, R10 is loaded with the 'top' of the Frame
+ // and R9 is changed to point to the 'top' of the copy buffer.
+ // Then both R9 and R10 are decremented by the size of the struct we're copying
+ // So R10 is the value to put in the argument slot, and R9 is where the data
+ // should be copied to (or zeroed out in the case of the return buffer).
+#define LOAD_STRUCT_OFFSET_IF_NEEDED(cbSize) \
+ { \
+ _ASSERTE(cbSize > 0); \
+ _ASSERTE(FitsInI4(cbSize)); \
+ __int32 offset = (__int32)cbSize; \
+ if (!fR10Loaded) { \
+ /* mov r10, [rdx + offset of RSP] */ \
+ pSl->X86EmitIndexRegLoad(kR10, kRDX, offsetof(CONTEXT, Rsp)); \
+ /* add an extra 8 because RSP is pointing at the return address */ \
+ offset -= 8; \
+ /* add r10, r9 */ \
+ pSl->X86EmitAddRegReg(kR10, kR9); \
+ /* add r9, r8 */ \
+ pSl->X86EmitAddRegReg(kR9, kR8); \
+ fR10Loaded = true; \
+ } \
+ /* sub r10, offset */ \
+ pSl->X86EmitSubReg(kR10, offset); \
+ /* sub r9, cbSize */ \
+ pSl->X86EmitSubReg(kR9, cbSize); \
+ }
+
+
+ if (flags & CORINFO_TAILCALL_STUB_DISPATCH_ARG) {
+ // This is set for stub dispatch
+ // The JIT placed an extra argument in the list that needs to
+ // get shoved into R11, and not counted.
+ // pCtx->R11 = va_arg(args, DWORD64);
+
+ // mov rax, [rcx]
+ pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
+ // add rcx, 8
+ pSl->X86EmitAddReg(kRCX, 8);
+ // mov [rdx + offset of R11], rax
+ pSl->X86EmitIndexRegStore(kRDX, offsetof(CONTEXT, R11), kRAX);
+ }
+
+ ULONG cbStructOffset = 0;
+
+ // First comes the 'this' pointer
+ if (pSig->hasThis()) {
+ // mov rax, [rcx]
+ pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
+ // add rcx, 8
+ pSl->X86EmitAddReg(kRCX, 8);
+ // mov [rdx + offset of RCX/RDX], rax
+ pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
+ }
+
+ // Next the return buffer
+ cbArg = 0;
+ TypeHandle th(pSig->retTypeClass);
+ if ((pSig->retType == CORINFO_TYPE_REFANY) || (pSig->retType == CORINFO_TYPE_VALUECLASS)) {
+ cbArg = th.GetSize();
+ }
+
+ if (ArgIterator::IsArgPassedByRef(cbArg)) {
+ totalArgs++;
+
+ // We always reserve space for the return buffer, and we always zero it out,
+ // so the GC won't complain, but if it's already pointing above the frame,
+ // then we need to pass it in (so it will get passed out).
+ // Otherwise we assume the caller is returning void, so we just pass in
+ // dummy space to be overwritten.
+ UINT cbUsed = (cbArg + 0xF) & ~0xF;
+ LOAD_STRUCT_OFFSET_IF_NEEDED(cbUsed);
+ // now emit a 'memset(r9, 0, cbUsed)'
+ {
+ // xorps xmm0, xmm0
+ pSl->X86EmitR2ROp(X86_INSTR_XORPS, kXMM0, kXMM0);
+ if (cbUsed <= 4 * 16) {
+ // movaps [r9], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0);
+ if (16 < cbUsed) {
+ // movaps [r9 + 16], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 16);
+ if (32 < cbUsed) {
+ // movaps [r9 + 32], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 32);
+ if (48 < cbUsed) {
+ // movaps [r9 + 48], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 48);
+ }
+ }
+ }
+ }
+ else {
+ // a loop (one double-quadword at a time)
+ pSl->X86EmitZeroOutReg(kR11);
+ // LoopLabel:
+ CodeLabel *pLoopLabel = pSl->NewCodeLabel();
+ pSl->EmitLabel(pLoopLabel);
+ // movaps [r9 + r11], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0, kR11, 1);
+ // add r11, 16
+ pSl->X86EmitAddReg(kR11, 16);
+ // cmp r11, cbUsed
+ pSl->X86EmitCmpRegImm32(kR11, cbUsed);
+ // jl LoopLabel
+ pSl->X86EmitCondJump(pLoopLabel, X86CondCode::kJL);
+ }
+ }
+ cbStructOffset += cbUsed;
+ AppendGCLayout(gcLayout, cbStructOffset, pSig->retType == CORINFO_TYPE_REFANY, th);
+
+ // mov rax, [rcx]
+ pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
+ // add rcx, 8
+ pSl->X86EmitAddReg(kRCX, 8);
+ // cmp rax, [rdx + offset of R12]
+ pSl->X86EmitOffsetModRM(0x3B, kRAX, kRDX, offsetof(CONTEXT, R12));
+
+ CodeLabel *pSkipLabel = pSl->NewCodeLabel();
+ // jnb SkipLabel
+ pSl->X86EmitCondJump(pSkipLabel, X86CondCode::kJNB);
+
+ // Also check the lower bound of the stack in case the return buffer is on the GC heap
+ // and the GC heap is below the stack
+ // cmp rax, rsp
+ pSl->X86EmitR2ROp(0x3B, kRAX, (X86Reg)4 /*kRSP*/);
+ // jna SkipLabel
+ pSl->X86EmitCondJump(pSkipLabel, X86CondCode::kJB);
+ // mov rax, r10
+ pSl->X86EmitMovRegReg(kRAX, kR10);
+ // SkipLabel:
+ pSl->EmitLabel(pSkipLabel);
+ // mov [rdx + offset of RCX], rax
+ pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
+ }
+
+ // VarArgs Cookie *or* Generics Instantiation Parameter
+ if (pSig->hasTypeArg() || pSig->isVarArg()) {
+ // mov rax, [rcx]
+ pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
+ // add rcx, 8
+ pSl->X86EmitAddReg(kRCX, 8);
+ // mov [rdx + offset of RCX/RDX], rax
+ pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
+ }
+
+ _ASSERTE(nArgSlot <= 4);
+
+ // Now for *all* the 'real' arguments
+ SigPointer ptr((PCCOR_SIGNATURE)pSig->args);
+ Module * module = GetModule(pSig->scope);
+ Instantiation classInst((TypeHandle*)pSig->sigInst.classInst, pSig->sigInst.classInstCount);
+ Instantiation methodInst((TypeHandle*)pSig->sigInst.methInst, pSig->sigInst.methInstCount);
+ SigTypeContext typeCtxt(classInst, methodInst);
+
+ for( ;nArgSlot < totalArgs; ptr.SkipExactlyOne()) {
+ CorElementType et = ptr.PeekElemTypeNormalized(module, &typeCtxt);
+ if (et == ELEMENT_TYPE_SENTINEL)
+ continue;
+
+ // mov rax, [rcx]
+ pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
+ // add rcx, 8
+ pSl->X86EmitAddReg(kRCX, 8);
+ switch (et) {
+ case ELEMENT_TYPE_INTERNAL:
+ // TODO
+ _ASSERTE(!"Shouldn't see ELEMENT_TYPE_INTERNAL");
+ break;
+ case ELEMENT_TYPE_TYPEDBYREF:
+ case ELEMENT_TYPE_VALUETYPE:
+ th = ptr.GetTypeHandleThrowing(module, &typeCtxt, ClassLoader::LoadTypes, CLASS_LOAD_UNRESTOREDTYPEKEY);
+ _ASSERTE(!th.IsNull());
+ g_IBCLogger.LogEEClassAndMethodTableAccess(th.GetMethodTable());
+ cbArg = (UINT)th.GetSize();
+ if (ArgIterator::IsArgPassedByRef(cbArg)) {
+ UINT cbUsed = (cbArg + 0xF) & ~0xF;
+ LOAD_STRUCT_OFFSET_IF_NEEDED(cbUsed);
+ // rax has the source pointer
+ // r9 has the intermediate copy location
+ // r10 has the final destination
+ if (nArgSlot < 4) {
+ pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kR10);
+ }
+ else {
+ pSl->X86EmitIndexRegStore(kR8, 8 * nArgSlot++, kR10);
+ }
+ // now emit a 'memcpy(rax, r9, cbUsed)'
+ // These structs are supposed to be 16-byte aligned, but
+ // Reflection puts them on the GC heap, which is only 8-byte
+ // aligned. It also means we have to be careful about not
+ // copying too much (because we might cross a page boundary)
+ UINT cbUsed16 = (cbArg + 7) & ~0xF;
+ _ASSERTE((cbUsed16 == cbUsed) || ((cbUsed16 + 16) == cbUsed));
+
+ if (cbArg <= 192) {
+ // Unrolled version (6 x 16 bytes in parallel)
+ UINT offset = 0;
+ while (offset < cbUsed16) {
+ // movups xmm0, [rax + offset]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM0, kRAX, offset);
+ if (offset + 16 < cbUsed16) {
+ // movups xmm1, [rax + offset + 16]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM1, kRAX, offset + 16);
+ if (offset + 32 < cbUsed16) {
+ // movups xmm2, [rax + offset + 32]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM2, kRAX, offset + 32);
+ if (offset + 48 < cbUsed16) {
+ // movups xmm3, [rax + offset + 48]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM3, kRAX, offset + 48);
+ if (offset + 64 < cbUsed16) {
+ // movups xmm4, [rax + offset + 64]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM4, kRAX, offset + 64);
+ if (offset + 80 < cbUsed16) {
+ // movups xmm5, [rax + offset + 80]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM5, kRAX, offset + 80);
+ }
+ }
+ }
+ }
+ }
+ // movaps [r9 + offset], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, offset);
+ offset += 16;
+ if (offset < cbUsed16) {
+ // movaps [r9 + 16], xmm1
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM1, kR9, offset);
+ offset += 16;
+ if (offset < cbUsed16) {
+ // movaps [r9 + 32], xmm2
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM2, kR9, offset);
+ offset += 16;
+ if (offset < cbUsed16) {
+ // movaps [r9 + 48], xmm3
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM3, kR9, offset);
+ offset += 16;
+ if (offset < cbUsed16) {
+ // movaps [r9 + 64], xmm4
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM4, kR9, offset);
+ offset += 16;
+ if (offset < cbUsed16) {
+ // movaps [r9 + 80], xmm5
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM5, kR9, offset);
+ offset += 16;
+ }
+ }
+ }
+ }
+ }
+ }
+ // Copy the last 8 bytes if needed
+ if (cbUsed > cbUsed16) {
+ _ASSERTE(cbUsed16 < cbArg);
+ // movlps xmm0, [rax + offset]
+ pSl->X86EmitOp(X86_INSTR_MOVLPS_R_RM, kXMM0, kRAX, offset);
+ // movlps [r9 + offset], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVLPS_RM_R, kXMM0, kR9, offset);
+ }
+ }
+ else {
+ // a loop (one double-quadword at a time)
+ pSl->X86EmitZeroOutReg(kR11);
+ // LoopLabel:
+ CodeLabel *pLoopLabel = pSl->NewCodeLabel();
+ pSl->EmitLabel(pLoopLabel);
+ // movups xmm0, [rax + r11]
+ pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM0, kRAX, 0, kR11, 1);
+ // movaps [r9 + r11], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0, kR11, 1);
+ // add r11, 16
+ pSl->X86EmitAddReg(kR11, 16);
+ // cmp r11, cbUsed16
+ pSl->X86EmitCmpRegImm32(kR11, cbUsed16);
+ // jl LoopLabel
+ pSl->X86EmitCondJump(pLoopLabel, X86CondCode::kJL);
+ if (cbArg > cbUsed16) {
+ _ASSERTE(cbUsed16 + 8 >= cbArg);
+ // movlps xmm0, [rax + r11]
+ pSl->X86EmitOp(X86_INSTR_MOVLPS_R_RM, kXMM0, kRAX, 0, kR11, 1);
+ // movlps [r9 + r11], xmm0
+ pSl->X86EmitOp(X86_INSTR_MOVLPS_RM_R, kXMM0, kR9, 0, kR11, 1);
+ }
+ }
+ cbStructOffset += cbUsed;
+ AppendGCLayout(gcLayout, cbStructOffset, et == ELEMENT_TYPE_TYPEDBYREF, th);
+ break;
+ }
+
+ //
+ // Explicit Fall-Through for non-IsArgPassedByRef
+ //
+
+ default:
+ if (nArgSlot < 4) {
+ pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot], kRAX);
+ if ((et == ELEMENT_TYPE_R4) || (et == ELEMENT_TYPE_R8)) {
+ pSl->X86EmitIndexRegStore(kRDX, rgcbFpArgRegCtxtOffsets[nArgSlot], kRAX);
+ }
+ }
+ else {
+ pSl->X86EmitIndexRegStore(kR8, 8 * nArgSlot, kRAX);
+ }
+ nArgSlot++;
+ break;
+ }
+ }
+
+#undef LOAD_STRUCT_OFFSET_IF_NEEDED
+
+ // Keep our 4 shadow slots and even number of slots (to keep 16-byte aligned)
+ if (nArgSlot < 4)
+ nArgSlot = 4;
+ else if (nArgSlot & 1)
+ nArgSlot++;
+
+ _ASSERTE((cbStructOffset % 16) == 0);
+
+ // xor eax, eax
+ pSl->X86EmitZeroOutReg(kRAX);
+ // ret
+ pSl->X86EmitReturn(0);
+
+ // NullLabel:
+ pSl->EmitLabel(pNullLabel);
+
+ CodeLabel *pGCLayoutLabel = NULL;
+ if (gcLayout.Count() == 0) {
+ // xor eax, eax
+ pSl->X86EmitZeroOutReg(kRAX);
+ }
+ else {
+ // lea rax, [rip + offset to gclayout]
+ pGCLayoutLabel = pSl->NewCodeLabel();
+ pSl->X86EmitLeaRIP(pGCLayoutLabel, kRAX);
+ }
+ // mov [r9], rax
+ pSl->X86EmitIndexRegStore(kR9, 0, kRAX);
+ // mov rax, cbStackNeeded
+ pSl->X86EmitRegLoad(kRAX, cbStructOffset + nArgSlot * 8);
+ // ret
+ pSl->X86EmitReturn(0);
+
+ if (gcLayout.Count() > 0) {
+ // GCLayout:
+ pSl->EmitLabel(pGCLayoutLabel);
+ EncodeGCOffsets(pSl, gcLayout);
+ }
+
+ return pSl->Link();
+}
+#endif // DACCESS_COMPILE
+
+#endif // _TARGET_AMD64_
+
+
+#ifdef HAS_FIXUP_PRECODE
+
+#ifdef HAS_FIXUP_PRECODE_CHUNKS
+TADDR FixupPrecode::GetMethodDesc()
+{
+ LIMITED_METHOD_CONTRACT;
+ SUPPORTS_DAC;
+
+ // This lookup is also manually inlined in PrecodeFixupThunk assembly code
+ TADDR base = *PTR_TADDR(GetBase());
+ if (base == NULL)
+ return NULL;
+ return base + (m_MethodDescChunkIndex * MethodDesc::ALIGNMENT);
+}
+#endif
+
+#ifdef DACCESS_COMPILE
+void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
+{
+ SUPPORTS_DAC;
+ DacEnumMemoryRegion(dac_cast<TADDR>(this), sizeof(FixupPrecode));
+
+ DacEnumMemoryRegion(GetBase(), sizeof(TADDR));
+}
+#endif // DACCESS_COMPILE
+
+#endif // HAS_FIXUP_PRECODE
+
+#ifndef DACCESS_COMPILE
+
+BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD)
+{
+ CONTRACTL
+ {
+ THROWS; // Creating a JumpStub could throw OutOfMemory
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ BYTE* callAddrAdj = (BYTE*)pRel32 + 4;
+ INT32 expectedRel32 = static_cast<INT32>((BYTE*)expected - callAddrAdj);
+
+ INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD);
+
+ _ASSERTE(IS_ALIGNED(pRel32, sizeof(INT32)));
+ return FastInterlockCompareExchange((LONG*)pRel32, (LONG)targetRel32, (LONG)expectedRel32) == (LONG)expectedRel32;
+}
+
+void StubPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator /* = NULL */,
+ BYTE type /* = StubPrecode::Type */, TADDR target /* = NULL */)
+{
+ WRAPPER_NO_CONTRACT;
+
+ IN_WIN64(m_movR10 = X86_INSTR_MOV_R10_IMM64); // mov r10, pMethodDesc
+ IN_WIN32(m_movEAX = X86_INSTR_MOV_EAX_IMM32); // mov eax, pMethodDesc
+ m_pMethodDesc = (TADDR)pMD;
+ IN_WIN32(m_mov_rm_r = X86_INSTR_MOV_RM_R); // mov reg,reg
+ m_type = type;
+ m_jmp = X86_INSTR_JMP_REL32; // jmp rel32
+
+ if (pLoaderAllocator != NULL)
+ {
+ // Use pMD == NULL in all precode initialization methods to allocate the initial jump stub in non-dynamic heap
+ // that has the same lifetime like as the precode itself
+ if (target == NULL)
+ target = GetPreStubEntryPoint();
+ m_rel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, pLoaderAllocator);
+ }
+}
+
+#ifdef HAS_NDIRECT_IMPORT_PRECODE
+
+void NDirectImportPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
+{
+ WRAPPER_NO_CONTRACT;
+ StubPrecode::Init(pMD, pLoaderAllocator, NDirectImportPrecode::Type, GetEEFuncEntryPoint(NDirectImportThunk));
+}
+
+#endif // HAS_NDIRECT_IMPORT_PRECODE
+
+
+#ifdef HAS_REMOTING_PRECODE
+
+void RemotingPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator /* = NULL */)
+{
+ WRAPPER_NO_CONTRACT;
+
+ IN_WIN64(m_movR10 = X86_INSTR_MOV_R10_IMM64); // mov r10, pMethodDesc
+ IN_WIN32(m_movEAX = X86_INSTR_MOV_EAX_IMM32); // mov eax, pMethodDesc
+ m_pMethodDesc = (TADDR)pMD;
+ m_type = PRECODE_REMOTING; // nop
+ m_call = X86_INSTR_CALL_REL32;
+ m_jmp = X86_INSTR_JMP_REL32; // jmp rel32
+
+ if (pLoaderAllocator != NULL)
+ {
+ m_callRel32 = rel32UsingJumpStub(&m_callRel32,
+ GetEEFuncEntryPoint(PrecodeRemotingThunk), NULL /* pMD */, pLoaderAllocator);
+ m_rel32 = rel32UsingJumpStub(&m_rel32,
+ GetPreStubEntryPoint(), NULL /* pMD */, pLoaderAllocator);
+ }
+}
+
+#endif // HAS_REMOTING_PRECODE
+
+
+#ifdef HAS_FIXUP_PRECODE
+void FixupPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex /*=0*/, int iPrecodeChunkIndex /*=0*/)
+{
+ WRAPPER_NO_CONTRACT;
+
+ m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
+ m_type = FixupPrecode::TypePrestub;
+
+ // Initialize chunk indices only if they are not initialized yet. This is necessary to make MethodDesc::Reset work.
+ if (m_PrecodeChunkIndex == 0)
+ {
+ _ASSERTE(FitsInU1(iPrecodeChunkIndex));
+ m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
+ }
+
+ if (iMethodDescChunkIndex != -1)
+ {
+ if (m_MethodDescChunkIndex == 0)
+ {
+ _ASSERTE(FitsInU1(iMethodDescChunkIndex));
+ m_MethodDescChunkIndex = static_cast<BYTE>(iMethodDescChunkIndex);
+ }
+
+ if (*(void**)GetBase() == NULL)
+ *(void**)GetBase() = (BYTE*)pMD - (iMethodDescChunkIndex * MethodDesc::ALIGNMENT);
+ }
+
+ _ASSERTE(GetMethodDesc() == (TADDR)pMD);
+
+ if (pLoaderAllocator != NULL)
+ {
+ m_rel32 = rel32UsingJumpStub(&m_rel32,
+ GetEEFuncEntryPoint(PrecodeFixupThunk), NULL /* pMD */, pLoaderAllocator);
+ }
+}
+
+BOOL FixupPrecode::SetTargetInterlocked(TADDR target, TADDR expected)
+{
+ CONTRACTL
+ {
+ THROWS; // Creating a JumpStub could throw OutOfMemory
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ INT64 oldValue = *(INT64*)this;
+ BYTE* pOldValue = (BYTE*)&oldValue;
+
+ if (pOldValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] != FixupPrecode::TypePrestub)
+ return FALSE;
+
+ MethodDesc * pMD = (MethodDesc*)GetMethodDesc();
+ g_IBCLogger.LogMethodPrecodeWriteAccess(pMD);
+
+ INT64 newValue = oldValue;
+ BYTE* pNewValue = (BYTE*)&newValue;
+
+ pNewValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] = FixupPrecode::Type;
+
+ pOldValue[offsetof(FixupPrecode,m_op)] = X86_INSTR_CALL_REL32;
+ pNewValue[offsetof(FixupPrecode,m_op)] = X86_INSTR_JMP_REL32;
+
+ *(INT32*)(&pNewValue[offsetof(FixupPrecode,m_rel32)]) = rel32UsingJumpStub(&m_rel32, target, pMD);
+
+ _ASSERTE(IS_ALIGNED(this, sizeof(INT64)));
+ EnsureWritableExecutablePages(this, sizeof(INT64));
+ return FastInterlockCompareExchangeLong((INT64*) this, newValue, oldValue) == oldValue;
+}
+
+#ifdef FEATURE_NATIVE_IMAGE_GENERATION
+// Partial initialization. Used to save regrouped chunks.
+void FixupPrecode::InitForSave(int iPrecodeChunkIndex)
+{
+ m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
+ m_type = FixupPrecode::TypePrestub;
+
+ _ASSERTE(FitsInU1(iPrecodeChunkIndex));
+ m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
+
+ // The rest is initialized in code:FixupPrecode::Fixup
+}
+
+void FixupPrecode::Fixup(DataImage *image, MethodDesc * pMD)
+{
+ STANDARD_VM_CONTRACT;
+
+ // Note that GetMethodDesc() does not return the correct value because of
+ // regrouping of MethodDescs into hot and cold blocks. That's why the caller
+ // has to supply the actual MethodDesc
+
+ SSIZE_T mdChunkOffset;
+ ZapNode * pMDChunkNode = image->GetNodeForStructure(pMD, &mdChunkOffset);
+ ZapNode * pHelperThunk = image->GetHelperThunk(CORINFO_HELP_EE_PRECODE_FIXUP);
+
+ image->FixupFieldToNode(this, offsetof(FixupPrecode, m_rel32),
+ pHelperThunk, 0, IMAGE_REL_BASED_REL32);
+
+ // Set the actual chunk index
+ FixupPrecode * pNewPrecode = (FixupPrecode *)image->GetImagePointer(this);
+
+ size_t mdOffset = mdChunkOffset - sizeof(MethodDescChunk);
+ size_t chunkIndex = mdOffset / MethodDesc::ALIGNMENT;
+ _ASSERTE(FitsInU1(chunkIndex));
+ pNewPrecode->m_MethodDescChunkIndex = (BYTE) chunkIndex;
+
+ // Fixup the base of MethodDescChunk
+ if (m_PrecodeChunkIndex == 0)
+ {
+ image->FixupFieldToNode(this, (BYTE *)GetBase() - (BYTE *)this,
+ pMDChunkNode, sizeof(MethodDescChunk));
+ }
+}
+#endif // FEATURE_NATIVE_IMAGE_GENERATION
+
+#endif // HAS_FIXUP_PRECODE
+
+#endif // !DACCESS_COMPILE
+
+
+#ifdef HAS_THISPTR_RETBUF_PRECODE
+
+// rel32 jmp target that points back to the jump (infinite loop).
+// Used to mark uninitialized ThisPtrRetBufPrecode target
+#define REL32_JMP_SELF (-5)
+
+#ifndef DACCESS_COMPILE
+void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
+{
+ WRAPPER_NO_CONTRACT;
+
+ IN_WIN64(m_nop1 = X86_INSTR_NOP;) // nop
+#ifdef UNIX_AMD64_ABI
+ m_prefix1 = 0x48;
+ m_movScratchArg0 = 0xC78B; // mov rax,rdi
+ m_prefix2 = 0x48;
+ m_movArg0Arg1 = 0xFE8B; // mov rdi,rsi
+ m_prefix3 = 0x48;
+ m_movArg1Scratch = 0xF08B; // mov rsi,rax
+#else
+ IN_WIN64(m_prefix1 = 0x48;)
+ m_movScratchArg0 = 0xC889; // mov r/eax,r/ecx
+ IN_WIN64(m_prefix2 = 0x48;)
+ m_movArg0Arg1 = 0xD189; // mov r/ecx,r/edx
+ IN_WIN64(m_prefix3 = 0x48;)
+ m_movArg1Scratch = 0xC289; // mov r/edx,r/eax
+#endif
+ m_nop2 = X86_INSTR_NOP; // nop
+ m_jmp = X86_INSTR_JMP_REL32; // jmp rel32
+ m_pMethodDesc = (TADDR)pMD;
+
+ // This precode is never patched lazily - avoid unnecessary jump stub allocation
+ m_rel32 = REL32_JMP_SELF;
+}
+
+BOOL ThisPtrRetBufPrecode::SetTargetInterlocked(TADDR target, TADDR expected)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ // This precode is never patched lazily - the interlocked semantics is not required.
+ _ASSERTE(m_rel32 == REL32_JMP_SELF);
+
+ // Use pMD == NULL to allocate the jump stub in non-dynamic heap that has the same lifetime as the precode itself
+ m_rel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, ((MethodDesc *)GetMethodDesc())->GetLoaderAllocatorForCode());
+
+ return TRUE;
+}
+#endif // !DACCESS_COMPILE
+
+PCODE ThisPtrRetBufPrecode::GetTarget()
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ // This precode is never patched lazily - pretend that the uninitialized m_rel32 points to prestub
+ if (m_rel32 == REL32_JMP_SELF)
+ return GetPreStubEntryPoint();
+
+ return rel32Decode(PTR_HOST_MEMBER_TADDR(ThisPtrRetBufPrecode, this, m_rel32));
+}
+
+#endif // HAS_THISPTR_RETBUF_PRECODE