summaryrefslogtreecommitdiff
path: root/src/vm/amd64/virtualcallstubcpu.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm/amd64/virtualcallstubcpu.hpp')
-rw-r--r--src/vm/amd64/virtualcallstubcpu.hpp790
1 files changed, 790 insertions, 0 deletions
diff --git a/src/vm/amd64/virtualcallstubcpu.hpp b/src/vm/amd64/virtualcallstubcpu.hpp
new file mode 100644
index 0000000000..ee2e2ca719
--- /dev/null
+++ b/src/vm/amd64/virtualcallstubcpu.hpp
@@ -0,0 +1,790 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// File: AMD64/VirtualCallStubCpu.hpp
+//
+
+
+
+//
+
+// See code:VirtualCallStubManager for details
+//
+// ============================================================================
+
+#ifndef _VIRTUAL_CALL_STUB_AMD64_H
+#define _VIRTUAL_CALL_STUB_AMD64_H
+
+#include "dbginterface.h"
+
+//#define STUB_LOGGING
+
+#pragma pack(push, 1)
+// since we are placing code, we want byte packing of the structs
+
+#define USES_LOOKUP_STUBS 1
+
+/*********************************************************************************************
+Stubs that contain code are all part of larger structs called Holders. There is a
+Holder for each kind of stub, i.e XXXStub is contained with XXXHolder. Holders are
+essentially an implementation trick that allowed rearranging the code sequences more
+easily while trying out different alternatives, and for dealing with any alignment
+issues in a way that was mostly immune to the actually code sequences. These Holders
+should be revisited when the stub code sequences are fixed, since in many cases they
+add extra space to a stub that is not really needed.
+
+Stubs are placed in cache and hash tables. Since unaligned access of data in memory
+is very slow, the keys used in those tables should be aligned. The things used as keys
+typically also occur in the generated code, e.g. a token as an immediate part of an instruction.
+For now, to avoid alignment computations as different code strategies are tried out, the key
+fields are all in the Holders. Eventually, many of these fields should be dropped, and the instruction
+streams aligned so that the immediate fields fall on aligned boundaries.
+*/
+
+#if USES_LOOKUP_STUBS
+
+struct LookupStub;
+struct LookupHolder;
+
+/*LookupStub**************************************************************************************
+Virtual and interface call sites are initially setup to point at LookupStubs.
+This is because the runtime type of the <this> pointer is not yet known,
+so the target cannot be resolved. Note: if the jit is able to determine the runtime type
+of the <this> pointer, it should be generating a direct call not a virtual or interface call.
+This stub pushes a lookup token onto the stack to identify the sought after method, and then
+jumps into the EE (VirtualCallStubManager::ResolveWorkerStub) to effectuate the lookup and
+transfer of control to the appropriate target method implementation, perhaps patching of the call site
+along the way to point to a more appropriate stub. Hence callsites that point to LookupStubs
+get quickly changed to point to another kind of stub.
+*/
+struct LookupStub
+{
+ inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
+
+ inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
+ inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
+
+private:
+ friend struct LookupHolder;
+
+ // The lookup entry point starts with a nop in order to allow us to quickly see
+ // if the stub is lookup stub or a dispatch stub. We can read thye first byte
+ // of a stub to find out what kind of a stub we have.
+
+ BYTE _entryPoint [3]; // 90 nop
+ // 48 B8 mov rax,
+ size_t _token; // xx xx xx xx xx xx xx xx 64-bit address
+ BYTE part2 [3]; // 50 push rax
+ // 48 B8 mov rax,
+ size_t _resolveWorkerAddr; // xx xx xx xx xx xx xx xx 64-bit address
+ BYTE part3 [2]; // FF E0 jmp rax
+};
+
+/* LookupHolders are the containers for LookupStubs, they provide for any alignment of
+stubs as necessary. In the case of LookupStubs, alignment is necessary since
+LookupStubs are placed in a hash table keyed by token. */
+struct LookupHolder
+{
+ static void InitializeStatic();
+
+ void Initialize(PCODE resolveWorkerTarget, size_t dispatchToken);
+
+ LookupStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
+
+ static LookupHolder* FromLookupEntry(PCODE lookupEntry);
+
+private:
+ friend struct LookupStub;
+
+ LookupStub _stub;
+};
+
+#endif // USES_LOOKUP_STUBS
+
+struct DispatchStub;
+struct DispatchStubShort;
+struct DispatchStubLong;
+struct DispatchHolder;
+
+/*DispatchStub**************************************************************************************
+The structure of a full dispatch stub in memory is a DispatchStub followed contiguously in memory
+by either a DispatchStubShort of a DispatchStubLong. DispatchStubShort is used when the resolve
+stub (failTarget()) is reachable by a rel32 (DISPL) jump. We make a pretty good effort to make sure
+that the stub heaps are set up so that this is the case. If we allocate enough stubs that the heap
+end up allocating in a new block that is further away than a DISPL jump can go, then we end up using
+a DispatchStubLong which is bigger but is a full 64-bit jump. */
+
+/*DispatchStubShort*********************************************************************************
+This is the logical continuation of DispatchStub for the case when the failure target is within
+a rel32 jump (DISPL). */
+struct DispatchStubShort
+{
+ friend struct DispatchHolder;
+ friend struct DispatchStub;
+
+ static BOOL isShortStub(LPCBYTE pCode);
+ inline PCODE implTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _implTarget; }
+ inline PCODE failTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) &_failDispl + sizeof(DISPL) + _failDispl; }
+
+private:
+ BYTE part1 [2]; // 0f 85 jne
+ DISPL _failDispl; // xx xx xx xx failEntry ;must be forward jmp for perf reasons
+ BYTE part2 [2]; // 48 B8 mov rax,
+ size_t _implTarget; // xx xx xx xx xx xx xx xx 64-bit address
+ BYTE part3 [2]; // FF E0 jmp rax
+
+ // 31 bytes long, need 1 byte of padding to 8-byte align.
+ BYTE alignPad [1]; // cc
+};
+
+inline BOOL DispatchStubShort::isShortStub(LPCBYTE pCode)
+{
+ LIMITED_METHOD_CONTRACT;
+ return reinterpret_cast<DispatchStubShort const *>(pCode)->part1[0] == 0x0f;
+}
+
+
+/*DispatchStubLong**********************************************************************************
+This is the logical continuation of DispatchStub for the case when the failure target is not
+reachable by a rel32 jump (DISPL). */
+struct DispatchStubLong
+{
+ friend struct DispatchHolder;
+ friend struct DispatchStub;
+
+ static inline BOOL isLongStub(LPCBYTE pCode);
+ inline PCODE implTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _implTarget; }
+ inline PCODE failTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _failTarget; }
+
+private:
+ BYTE part1 [1]; // 75 jne
+ BYTE _failDispl; // xx failLabel
+ BYTE part2 [2]; // 48 B8 mov rax,
+ size_t _implTarget; // xx xx xx xx xx xx xx xx 64-bit address
+ BYTE part3 [2]; // FF E0 jmp rax
+ // failLabel:
+ BYTE part4 [2]; // 48 B8 mov rax,
+ size_t _failTarget; // xx xx xx xx xx xx xx xx 64-bit address
+ BYTE part5 [2]; // FF E0 jmp rax
+
+ // 39 bytes long, need 1 byte of padding to 8-byte align.
+ BYTE alignPad [1]; // cc
+};
+
+inline BOOL DispatchStubLong::isLongStub(LPCBYTE pCode)
+{
+ LIMITED_METHOD_CONTRACT;
+ return reinterpret_cast<DispatchStubLong const *>(pCode)->part1[0] == 0x75;
+}
+
+/*DispatchStub**************************************************************************************
+Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs.
+A dispatch stub has an expected type (expectedMT), target address (target) and fail address (failure).
+If the calling frame does in fact have the <this> type be of the expected type, then
+control is transfered to the target address, the method implementation. If not,
+then control is transfered to the fail address, a fail stub (see below) where a polymorphic
+lookup is done to find the correct address to go to.
+
+implementation note: Order, choice of instructions, and branch directions
+should be carefully tuned since it can have an inordinate effect on performance. Particular
+attention needs to be paid to the effects on the BTB and branch prediction, both in the small
+and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
+Note that since this stub is only used for mostly monomorphic callsites (ones that are not, get patched
+to something else), therefore the conditional jump "jne failure" is mostly not taken, and hence it is important
+that the branch prediction staticly predict this, which means it must be a forward jump. The alternative
+is to reverse the order of the jumps and make sure that the resulting conditional jump "je implTarget"
+is statically predicted as taken, i.e a backward jump. The current choice was taken since it was easier
+to control the placement of the stubs than control the placement of the jitted code and the stubs. */
+struct DispatchStub
+{
+ friend struct DispatchHolder;
+
+ enum DispatchStubType
+ {
+ e_TYPE_SHORT,
+ e_TYPE_LONG,
+ };
+
+ inline DispatchStubType const type() const
+ {
+ LIMITED_METHOD_CONTRACT;
+ CONSISTENCY_CHECK(DispatchStubShort::isShortStub(reinterpret_cast<LPCBYTE>(this + 1))
+ || DispatchStubLong::isLongStub(reinterpret_cast<LPCBYTE>(this + 1)));
+ return DispatchStubShort::isShortStub((BYTE *)(this + 1)) ? e_TYPE_SHORT : e_TYPE_LONG;
+ }
+
+ inline static size_t size(DispatchStubType type)
+ {
+ STATIC_CONTRACT_LEAF;
+ return sizeof(DispatchStub) +
+ ((type == e_TYPE_SHORT) ? sizeof(DispatchStubShort) : sizeof(DispatchStubLong));
+ }
+
+ inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
+ inline size_t expectedMT() const { LIMITED_METHOD_CONTRACT; return _expectedMT; }
+ inline size_t size() const { WRAPPER_NO_CONTRACT; return size(type()); }
+
+ inline PCODE implTarget() const
+ {
+ LIMITED_METHOD_CONTRACT;
+ if (type() == e_TYPE_SHORT)
+ return getShortStub()->implTarget();
+ else
+ return getLongStub()->implTarget();
+ }
+
+ inline PCODE failTarget() const
+ {
+ if (type() == e_TYPE_SHORT)
+ return getShortStub()->failTarget();
+ else
+ return getLongStub()->failTarget();
+ }
+
+private:
+ inline DispatchStubShort const *getShortStub() const
+ { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStubShort const *>(this + 1); }
+
+ inline DispatchStubLong const *getLongStub() const
+ { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStubLong const *>(this + 1); }
+
+ BYTE _entryPoint [2]; // 48 B8 mov rax,
+ size_t _expectedMT; // xx xx xx xx xx xx xx xx 64-bit address
+ BYTE part1 [3]; // 48 39 XX cmp [THIS_REG], rax
+
+ // Followed by either DispatchStubShort or DispatchStubLong, depending
+ // on whether we were able to make a rel32 or had to make an abs64 jump
+ // to the resolve stub on failure.
+
+};
+
+/* DispatchHolders are the containers for DispatchStubs, they provide for any alignment of
+stubs as necessary. DispatchStubs are placed in a hashtable and in a cache. The keys for both
+are the pair expectedMT and token. Efficiency of the of the hash table is not a big issue,
+since lookups in it are fairly rare. Efficiency of the cache is paramount since it is accessed frequently
+(see ResolveStub below). Currently we are storing both of these fields in the DispatchHolder to simplify
+alignment issues. If inlineMT in the stub itself was aligned, then it could be the expectedMT field.
+While the token field can be logically gotten by following the failure target to the failEntryPoint
+of the ResolveStub and then to the token over there, for perf reasons of cache access, it is duplicated here.
+This allows us to use DispatchStubs in the cache. The alternative is to provide some other immutable struct
+for the cache composed of the triplet (expectedMT, token, target) and some sort of reclaimation scheme when
+they are thrown out of the cache via overwrites (since concurrency will make the obvious approaches invalid).
+*/
+
+/* @workaround for ee resolution - Since the EE does not currently have a resolver function that
+does what we want, see notes in implementation of VirtualCallStubManager::Resolver, we are
+using dispatch stubs to siumulate what we want. That means that inlineTarget, which should be immutable
+is in fact written. Hence we have moved target out into the holder and aligned it so we can
+atomically update it. When we get a resolver function that does what we want, we can drop this field,
+and live with just the inlineTarget field in the stub itself, since immutability will hold.*/
+struct DispatchHolder
+{
+ static void InitializeStatic();
+
+ void Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT,
+ DispatchStub::DispatchStubType type);
+
+ static size_t GetHolderSize(DispatchStub::DispatchStubType type)
+ { STATIC_CONTRACT_WRAPPER; return DispatchStub::size(type); }
+
+ static BOOL CanShortJumpDispatchStubReachFailTarget(PCODE failTarget, LPCBYTE stubMemory)
+ {
+ STATIC_CONTRACT_WRAPPER;
+ LPCBYTE pFrom = stubMemory + sizeof(DispatchStub) + offsetof(DispatchStubShort, part2[0]);
+ size_t cbRelJump = failTarget - (PCODE)pFrom;
+ return FitsInI4(cbRelJump);
+ }
+
+ DispatchStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStub *>(this); }
+
+ static DispatchHolder* FromDispatchEntry(PCODE dispatchEntry);
+
+private:
+ // DispatchStub follows here. It is dynamically sized on allocation
+ // because it could be a DispatchStubLong or a DispatchStubShort
+};
+
+struct ResolveStub;
+struct ResolveHolder;
+
+/*ResolveStub**************************************************************************************
+Polymorphic call sites and monomorphic calls that fail end up in a ResolverStub. There is only
+one resolver stub built for any given token, even though there may be many call sites that
+use that token and many distinct <this> types that are used in the calling call frames. A resolver stub
+actually has two entry points, one for polymorphic call sites and one for dispatch stubs that fail on their
+expectedMT test. There is a third part of the resolver stub that enters the ee when a decision should
+be made about changing the callsite. Therefore, we have defined the resolver stub as three distinct pieces,
+even though they are actually allocated as a single contiguous block of memory. These pieces are:
+
+A ResolveStub has two entry points:
+
+FailEntry - where the dispatch stub goes if the expected MT test fails. This piece of the stub does
+a check to see how often we are actually failing. If failures are frequent, control transfers to the
+patch piece to cause the call site to be changed from a mostly monomorphic callsite
+(calls dispatch stub) to a polymorphic callsize (calls resolve stub). If failures are rare, control
+transfers to the resolve piece (see ResolveStub). The failEntryPoint decrements a counter
+every time it is entered. The ee at various times will add a large chunk to the counter.
+
+ResolveEntry - does a lookup via in a cache by hashing the actual type of the calling frame s
+<this> and the token identifying the (contract,method) pair desired. If found, control is transfered
+to the method implementation. If not found in the cache, the token is pushed and the ee is entered via
+the ResolveWorkerStub to do a full lookup and eventual transfer to the correct method implementation. Since
+there is a different resolve stub for every token, the token can be inlined and the token can be pre-hashed.
+The effectiveness of this approach is highly sensitive to the effectiveness of the hashing algorithm used,
+as well as its speed. It turns out it is very important to make the hash function sensitive to all
+of the bits of the method table, as method tables are laid out in memory in a very non-random way. Before
+making any changes to the code sequences here, it is very important to measure and tune them as perf
+can vary greatly, in unexpected ways, with seeming minor changes.
+
+Implementation note - Order, choice of instructions, and branch directions
+should be carefully tuned since it can have an inordinate effect on performance. Particular
+attention needs to be paid to the effects on the BTB and branch prediction, both in the small
+and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
+Note that this stub is called in highly polymorphic cases, but the cache should have been sized
+and the hash function chosen to maximize the cache hit case. Hence the cmp/jcc instructions should
+mostly be going down the cache hit route, and it is important that this be statically predicted as so.
+Hence the 3 jcc instrs need to be forward jumps. As structured, there is only one jmp/jcc that typically
+gets put in the BTB since all the others typically fall straight thru. Minimizing potential BTB entries
+is important. */
+
+struct ResolveStub
+{
+ inline PCODE failEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[0]; }
+ inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; }
+ inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; }
+
+ inline INT32* pCounter() { LIMITED_METHOD_CONTRACT; return _pCounter; }
+ inline UINT32 hashedToken() { LIMITED_METHOD_CONTRACT; return _hashedToken >> LOG2_PTRSIZE; }
+ inline size_t cacheAddress() { LIMITED_METHOD_CONTRACT; return _cacheAddress; }
+ inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
+ inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
+
+private:
+ friend struct ResolveHolder;
+
+ BYTE _resolveEntryPoint[3];// resolveStub:
+ // 52 push rdx
+ // 49 BA mov r10,
+ size_t _cacheAddress; // xx xx xx xx xx xx xx xx 64-bit address
+ BYTE part1 [15]; // 48 8B XX mov rax, [THIS_REG] ; Compute hash = ((MT + MT>>12) ^ prehash)
+ // 48 8B D0 mov rdx, rax ; rdx <- current MethodTable
+ // 48 C1 E8 0C shr rax, 12
+ // 48 03 C2 add rax, rdx
+ // 48 35 xor rax,
+ UINT32 _hashedToken; // xx xx xx xx hashedtoken ; xor with pre-hashed token
+ BYTE part2 [2]; // 48 25 and rax,
+ UINT32 mask; // xx xx xx xx cache_mask ; and with cache mask
+ BYTE part3 [6]; // 4A 8B 04 10 mov rax, [r10 + rax] ; get cache entry address
+ // 49 BA mov r10,
+ size_t _token; // xx xx xx xx xx xx xx xx 64-bit address
+ BYTE part4 [3]; // 48 3B 50 cmp rdx, [rax+ ; compare our MT vs. cache MT
+ BYTE mtOffset; // xx ResolverCacheElem.pMT]
+ BYTE part5 [1]; // 75 jne
+ BYTE toMiss1; // xx miss ; must be forward jump, for perf reasons
+ BYTE part6 [3]; // 4C 3B 50 cmp r10, [rax+ ; compare our token vs. cache token
+ BYTE tokenOffset; // xx ResolverCacheElem.token]
+ BYTE part7 [1]; // 75 jne
+ BYTE toMiss2; // xx miss ; must be forward jump, for perf reasons
+ BYTE part8 [3]; // 48 8B 40 mov rax, [rax+ ; setup rax with method impl address
+ BYTE targetOffset; // xx ResolverCacheElem.target]
+ BYTE part9 [3]; // 5A pop rdx
+ // FF E0 jmp rax
+ // failStub:
+ BYTE _failEntryPoint [2]; // 48 B8 mov rax,
+ INT32* _pCounter; // xx xx xx xx xx xx xx xx 64-bit address
+ BYTE part11 [4]; // 83 00 FF add dword ptr [rax], -1
+ // 7d jnl
+ BYTE toResolveStub1; // xx resolveStub
+ BYTE part12 [4]; // 49 83 CB 01 or r11, 1
+ BYTE _slowEntryPoint [3]; // 52 slow: push rdx
+ // 49 BA mov r10,
+ size_t _tokenSlow; // xx xx xx xx xx xx xx xx 64-bit address
+// BYTE miss [5]; // 5A miss: pop rdx ; don't pop rdx
+// // 41 52 push r10 ; don't push r10 leave it setup with token
+ BYTE miss [3]; // 50 push rax ; push ptr to cache elem
+ // 48 B8 mov rax,
+ size_t _resolveWorker; // xx xx xx xx xx xx xx xx 64-bit address
+ BYTE part10 [2]; // FF E0 jmp rax
+};
+
+/* ResolveHolders are the containers for ResolveStubs, They provide
+for any alignment of the stubs as necessary. The stubs are placed in a hash table keyed by
+the token for which they are built. Efficiency of access requires that this token be aligned.
+For now, we have copied that field into the ResolveHolder itself, if the resolve stub is arranged such that
+any of its inlined tokens (non-prehashed) is aligned, then the token field in the ResolveHolder
+is not needed. */
+struct ResolveHolder
+{
+ static void InitializeStatic();
+
+ void Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
+ size_t dispatchToken, UINT32 hashedToken,
+ void * cacheAddr, INT32* counterAddr);
+
+ ResolveStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
+
+ static ResolveHolder* FromFailEntry(PCODE resolveEntry);
+ static ResolveHolder* FromResolveEntry(PCODE resolveEntry);
+
+private:
+ ResolveStub _stub;
+};
+#pragma pack(pop)
+
+#ifdef DECLARE_DATA
+
+LookupStub lookupInit;
+DispatchStub dispatchInit;
+DispatchStubShort dispatchShortInit;
+DispatchStubLong dispatchLongInit;
+ResolveStub resolveInit;
+
+#define INSTR_INT3 0xcc
+#define INSTR_NOP 0x90
+
+#ifndef DACCESS_COMPILE
+
+#include "asmconstants.h"
+
+#ifdef STUB_LOGGING
+extern size_t g_lookup_inline_counter;
+extern size_t g_call_inline_counter;
+extern size_t g_miss_inline_counter;
+extern size_t g_call_cache_counter;
+extern size_t g_miss_cache_counter;
+#endif
+
+/* Template used to generate the stub. We generate a stub by allocating a block of
+ memory and copy the template over it and just update the specific fields that need
+ to be changed.
+*/
+
+void LookupHolder::InitializeStatic()
+{
+ static_assert_no_msg((sizeof(LookupHolder) % sizeof(void*)) == 0);
+
+ // The first instruction of a LookupStub is nop
+ // and we use it in order to differentiate the first two bytes
+ // of a LookupStub and a ResolveStub
+ lookupInit._entryPoint [0] = INSTR_NOP;
+ lookupInit._entryPoint [1] = 0x48;
+ lookupInit._entryPoint [2] = 0xB8;
+ lookupInit._token = 0xcccccccccccccccc;
+ lookupInit.part2 [0] = 0x50;
+ lookupInit.part2 [1] = 0x48;
+ lookupInit.part2 [2] = 0xB8;
+ lookupInit._resolveWorkerAddr = 0xcccccccccccccccc;
+ lookupInit.part3 [0] = 0xFF;
+ lookupInit.part3 [1] = 0xE0;
+}
+
+void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken)
+{
+ _stub = lookupInit;
+
+ //fill in the stub specific fields
+ _stub._token = dispatchToken;
+ _stub._resolveWorkerAddr = (size_t) resolveWorkerTarget;
+}
+
+/* Template used to generate the stub. We generate a stub by allocating a block of
+ memory and copy the template over it and just update the specific fields that need
+ to be changed.
+*/
+
+void DispatchHolder::InitializeStatic()
+{
+ // Check that _expectedMT is aligned in the DispatchHolder
+ static_assert_no_msg(((sizeof(DispatchStub)+sizeof(DispatchStubShort)) % sizeof(void*)) == 0);
+ static_assert_no_msg(((sizeof(DispatchStub)+sizeof(DispatchStubLong)) % sizeof(void*)) == 0);
+ CONSISTENCY_CHECK((offsetof(DispatchStubLong, part4[0]) - offsetof(DispatchStubLong, part2[0])) < INT8_MAX);
+
+ // Common dispatch stub initialization
+ dispatchInit._entryPoint [0] = 0x48;
+ dispatchInit._entryPoint [1] = 0xB8;
+ dispatchInit._expectedMT = 0xcccccccccccccccc;
+ dispatchInit.part1 [0] = 0x48;
+ dispatchInit.part1 [1] = 0x39;
+#ifdef UNIX_AMD64_ABI
+ dispatchInit.part1 [2] = 0x07; // RDI
+#else
+ dispatchInit.part1 [2] = 0x01; // RCX
+#endif
+
+ // Short dispatch stub initialization
+ dispatchShortInit.part1 [0] = 0x0F;
+ dispatchShortInit.part1 [1] = 0x85;
+ dispatchShortInit._failDispl = 0xcccccccc;
+ dispatchShortInit.part2 [0] = 0x48;
+ dispatchShortInit.part2 [1] = 0xb8;
+ dispatchShortInit._implTarget = 0xcccccccccccccccc;
+ dispatchShortInit.part3 [0] = 0xFF;
+ dispatchShortInit.part3 [1] = 0xE0;
+ dispatchShortInit.alignPad [0] = INSTR_INT3;
+
+ // Long dispatch stub initialization
+ dispatchLongInit.part1 [0] = 0x75;
+ dispatchLongInit._failDispl = BYTE(&dispatchLongInit.part4[0] - &dispatchLongInit.part2[0]);
+ dispatchLongInit.part2 [0] = 0x48;
+ dispatchLongInit.part2 [1] = 0xb8;
+ dispatchLongInit._implTarget = 0xcccccccccccccccc;
+ dispatchLongInit.part3 [0] = 0xFF;
+ dispatchLongInit.part3 [1] = 0xE0;
+ // failLabel:
+ dispatchLongInit.part4 [0] = 0x48;
+ dispatchLongInit.part4 [1] = 0xb8;
+ dispatchLongInit._failTarget = 0xcccccccccccccccc;
+ dispatchLongInit.part5 [0] = 0xFF;
+ dispatchLongInit.part5 [1] = 0xE0;
+ dispatchLongInit.alignPad [0] = INSTR_INT3;
+};
+
+void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT,
+ DispatchStub::DispatchStubType type)
+{
+ //
+ // Initialize the common area
+ //
+
+ // initialize the static data
+ *stub() = dispatchInit;
+
+ // fill in the dynamic data
+ stub()->_expectedMT = expectedMT;
+
+ //
+ // Initialize the short/long areas
+ //
+ if (type == DispatchStub::e_TYPE_SHORT)
+ {
+ DispatchStubShort *shortStub = const_cast<DispatchStubShort *>(stub()->getShortStub());
+
+ // initialize the static data
+ *shortStub = dispatchShortInit;
+
+ // fill in the dynamic data
+ size_t displ = (failTarget - ((PCODE) &shortStub->_failDispl + sizeof(DISPL)));
+ CONSISTENCY_CHECK(FitsInI4(displ));
+ shortStub->_failDispl = (DISPL) displ;
+ shortStub->_implTarget = (size_t) implTarget;
+ CONSISTENCY_CHECK((PCODE)&shortStub->_failDispl + sizeof(DISPL) + shortStub->_failDispl == failTarget);
+ }
+ else
+ {
+ CONSISTENCY_CHECK(type == DispatchStub::e_TYPE_LONG);
+ DispatchStubLong *longStub = const_cast<DispatchStubLong *>(stub()->getLongStub());
+
+ // initialize the static data
+ *longStub = dispatchLongInit;
+
+ // fill in the dynamic data
+ longStub->_implTarget = implTarget;
+ longStub->_failTarget = failTarget;
+ }
+}
+
+/* Template used to generate the stub. We generate a stub by allocating a block of
+ memory and copy the template over it and just update the specific fields that need
+ to be changed.
+*/
+
+void ResolveHolder::InitializeStatic()
+{
+ static_assert_no_msg((sizeof(ResolveHolder) % sizeof(void*)) == 0);
+
+ resolveInit._resolveEntryPoint [0] = 0x52;
+ resolveInit._resolveEntryPoint [1] = 0x49;
+ resolveInit._resolveEntryPoint [2] = 0xBA;
+ resolveInit._cacheAddress = 0xcccccccccccccccc;
+ resolveInit.part1 [ 0] = 0x48;
+ resolveInit.part1 [ 1] = 0x8B;
+#ifdef UNIX_AMD64_ABI
+ resolveInit.part1 [ 2] = 0x07; // RDI
+#else
+ resolveInit.part1 [ 2] = 0x01; // RCX
+#endif
+ resolveInit.part1 [ 3] = 0x48;
+ resolveInit.part1 [ 4] = 0x8B;
+ resolveInit.part1 [ 5] = 0xD0;
+ resolveInit.part1 [ 6] = 0x48;
+ resolveInit.part1 [ 7] = 0xC1;
+ resolveInit.part1 [ 8] = 0xE8;
+ resolveInit.part1 [ 9] = CALL_STUB_CACHE_NUM_BITS;
+ resolveInit.part1 [10] = 0x48;
+ resolveInit.part1 [11] = 0x03;
+ resolveInit.part1 [12] = 0xC2;
+ resolveInit.part1 [13] = 0x48;
+ resolveInit.part1 [14] = 0x35;
+// Review truncation from unsigned __int64 to UINT32 of a constant value.
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable:4305 4309)
+#endif // defined(_MSC_VER)
+
+ resolveInit._hashedToken = 0xcccccccc;
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif // defined(_MSC_VER)
+
+ resolveInit.part2 [ 0] = 0x48;
+ resolveInit.part2 [ 1] = 0x25;
+ resolveInit.mask = CALL_STUB_CACHE_MASK*sizeof(void *);
+ resolveInit.part3 [0] = 0x4A;
+ resolveInit.part3 [1] = 0x8B;
+ resolveInit.part3 [2] = 0x04;
+ resolveInit.part3 [3] = 0x10;
+ resolveInit.part3 [4] = 0x49;
+ resolveInit.part3 [5] = 0xBA;
+ resolveInit._token = 0xcccccccccccccccc;
+ resolveInit.part4 [0] = 0x48;
+ resolveInit.part4 [1] = 0x3B;
+ resolveInit.part4 [2] = 0x50;
+ resolveInit.mtOffset = offsetof(ResolveCacheElem,pMT) & 0xFF;
+ resolveInit.part5 [0] = 0x75;
+ resolveInit.toMiss1 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss1)+1) & 0xFF;
+ resolveInit.part6 [0] = 0x4C;
+ resolveInit.part6 [1] = 0x3B;
+ resolveInit.part6 [2] = 0x50;
+ resolveInit.tokenOffset = offsetof(ResolveCacheElem,token) & 0xFF;
+ resolveInit.part7 [0] = 0x75;
+ resolveInit.toMiss2 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss2)+1) & 0xFF;
+ resolveInit.part8 [0] = 0x48;
+ resolveInit.part8 [1] = 0x8B;
+ resolveInit.part8 [2] = 0x40;
+ resolveInit.targetOffset = offsetof(ResolveCacheElem,target) & 0xFF;
+ resolveInit.part9 [0] = 0x5A;
+ resolveInit.part9 [1] = 0xFF;
+ resolveInit.part9 [2] = 0xE0;
+ resolveInit._failEntryPoint [0] = 0x48;
+ resolveInit._failEntryPoint [1] = 0xB8;
+ resolveInit._pCounter = (INT32*) (size_t) 0xcccccccccccccccc;
+ resolveInit.part11 [0] = 0x83;
+ resolveInit.part11 [1] = 0x00;
+ resolveInit.part11 [2] = 0xFF;
+ resolveInit.part11 [3] = 0x7D;
+ resolveInit.toResolveStub1 = (offsetof(ResolveStub, _resolveEntryPoint) - (offsetof(ResolveStub, toResolveStub1)+1)) & 0xFF;
+ resolveInit.part12 [0] = 0x49;
+ resolveInit.part12 [1] = 0x83;
+ resolveInit.part12 [2] = 0xCB;
+ resolveInit.part12 [3] = 0x01;
+ resolveInit._slowEntryPoint [0] = 0x52;
+ resolveInit._slowEntryPoint [1] = 0x49;
+ resolveInit._slowEntryPoint [2] = 0xBA;
+ resolveInit._tokenSlow = 0xcccccccccccccccc;
+ resolveInit.miss [0] = 0x50;
+ resolveInit.miss [1] = 0x48;
+ resolveInit.miss [2] = 0xB8;
+ resolveInit._resolveWorker = 0xcccccccccccccccc;
+ resolveInit.part10 [0] = 0xFF;
+ resolveInit.part10 [1] = 0xE0;
+};
+
+void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
+ size_t dispatchToken, UINT32 hashedToken,
+ void * cacheAddr, INT32* counterAddr)
+{
+ _stub = resolveInit;
+
+ //fill in the stub specific fields
+ _stub._cacheAddress = (size_t) cacheAddr;
+ _stub._hashedToken = hashedToken << LOG2_PTRSIZE;
+ _stub._token = dispatchToken;
+ _stub._tokenSlow = dispatchToken;
+ _stub._resolveWorker = (size_t) resolveWorkerTarget;
+ _stub._pCounter = counterAddr;
+}
+
+ResolveHolder* ResolveHolder::FromFailEntry(PCODE failEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ ResolveHolder* resolveHolder = (ResolveHolder*) ( failEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _failEntryPoint) );
+ _ASSERTE(resolveHolder->_stub._resolveEntryPoint[1] == resolveInit._resolveEntryPoint[1]);
+ return resolveHolder;
+}
+
+#endif // DACCESS_COMPILE
+
+LookupHolder* LookupHolder::FromLookupEntry(PCODE lookupEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ LookupHolder* lookupHolder = (LookupHolder*) ( lookupEntry - offsetof(LookupHolder, _stub) - offsetof(LookupStub, _entryPoint) );
+ _ASSERTE(lookupHolder->_stub._entryPoint[2] == lookupInit._entryPoint[2]);
+ return lookupHolder;
+}
+
+
+DispatchHolder* DispatchHolder::FromDispatchEntry(PCODE dispatchEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ DispatchHolder* dispatchHolder = (DispatchHolder*) ( dispatchEntry - offsetof(DispatchStub, _entryPoint) );
+ _ASSERTE(dispatchHolder->stub()->_entryPoint[1] == dispatchInit._entryPoint[1]);
+ return dispatchHolder;
+}
+
+
+ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ ResolveHolder* resolveHolder = (ResolveHolder*) ( resolveEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _resolveEntryPoint) );
+ _ASSERTE(resolveHolder->_stub._resolveEntryPoint[1] == resolveInit._resolveEntryPoint[1]);
+ return resolveHolder;
+}
+
+VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress)
+{
+#ifdef DACCESS_COMPILE
+ return SK_BREAKPOINT; // Dac always uses the slower lookup
+#else
+ StubKind stubKind = SK_UNKNOWN;
+
+ EX_TRY
+ {
+ // If stubStartAddress is completely bogus, then this might AV,
+ // so we protect it with SEH. An AV here is OK.
+ AVInRuntimeImplOkayHolder AVOkay;
+
+ WORD firstWord = *((WORD*) stubStartAddress);
+
+ if (firstWord == 0xB848)
+ {
+ stubKind = SK_DISPATCH;
+ }
+ else if (firstWord == 0x4890)
+ {
+ stubKind = SK_LOOKUP;
+ }
+ else if (firstWord == 0x4952)
+ {
+ stubKind = SK_RESOLVE;
+ }
+ else if (firstWord == 0x48F8)
+ {
+ stubKind = SK_LOOKUP;
+ }
+ else
+ {
+ BYTE firstByte = ((BYTE*) stubStartAddress)[0];
+ BYTE secondByte = ((BYTE*) stubStartAddress)[1];
+
+ if ((firstByte == INSTR_INT3) || (secondByte == INSTR_INT3))
+ {
+ stubKind = SK_BREAKPOINT;
+ }
+ }
+ }
+ EX_CATCH
+ {
+ stubKind = SK_UNKNOWN;
+ }
+ EX_END_CATCH(SwallowAllExceptions);
+
+ return stubKind;
+
+#endif // DACCESS_COMPILE
+}
+
+#endif //DECLARE_DATA
+
+#endif // _VIRTUAL_CALL_STUB_AMD64_H