summaryrefslogtreecommitdiff
path: root/src/vm/i386/virtualcallstubcpu.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm/i386/virtualcallstubcpu.hpp')
-rw-r--r--src/vm/i386/virtualcallstubcpu.hpp1077
1 files changed, 1077 insertions, 0 deletions
diff --git a/src/vm/i386/virtualcallstubcpu.hpp b/src/vm/i386/virtualcallstubcpu.hpp
new file mode 100644
index 0000000000..33ce8199b9
--- /dev/null
+++ b/src/vm/i386/virtualcallstubcpu.hpp
@@ -0,0 +1,1077 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// File: virtualcallstubcpu.hpp
+//
+
+
+//
+
+//
+// ============================================================================
+
+#ifndef _VIRTUAL_CALL_STUB_X86_H
+#define _VIRTUAL_CALL_STUB_X86_H
+
+#ifdef DECLARE_DATA
+#include "asmconstants.h"
+#ifdef FEATURE_REMOTING
+#include "remoting.h"
+#endif
+#endif
+
+#include <pshpack1.h> // Since we are placing code, we want byte packing of the structs
+
+#define USES_LOOKUP_STUBS 1
+
+/*********************************************************************************************
+Stubs that contain code are all part of larger structs called Holders. There is a
+Holder for each kind of stub, i.e XXXStub is contained with XXXHolder. Holders are
+essentially an implementation trick that allowed rearranging the code sequences more
+easily while trying out different alternatives, and for dealing with any alignment
+issues in a way that was mostly immune to the actually code sequences. These Holders
+should be revisited when the stub code sequences are fixed, since in many cases they
+add extra space to a stub that is not really needed.
+
+Stubs are placed in cache and hash tables. Since unaligned access of data in memory
+is very slow, the keys used in those tables should be aligned. The things used as keys
+typically also occur in the generated code, e.g. a token as an immediate part of an instruction.
+For now, to avoid alignment computations as different code strategies are tried out, the key
+fields are all in the Holders. Eventually, many of these fields should be dropped, and the instruction
+streams aligned so that the immediate fields fall on aligned boundaries.
+*/
+
+#if USES_LOOKUP_STUBS
+
+struct LookupStub;
+struct LookupHolder;
+
+/*LookupStub**************************************************************************************
+Virtual and interface call sites are initially setup to point at LookupStubs.
+This is because the runtime type of the <this> pointer is not yet known,
+so the target cannot be resolved. Note: if the jit is able to determine the runtime type
+of the <this> pointer, it should be generating a direct call not a virtual or interface call.
+This stub pushes a lookup token onto the stack to identify the sought after method, and then
+jumps into the EE (VirtualCallStubManager::ResolveWorkerStub) to effectuate the lookup and
+transfer of control to the appropriate target method implementation, perhaps patching of the call site
+along the way to point to a more appropriate stub. Hence callsites that point to LookupStubs
+get quickly changed to point to another kind of stub.
+*/
+struct LookupStub
+{
+ inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
+ inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
+ inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
+
+private:
+ friend struct LookupHolder;
+
+ // DispatchStub:: _entryPoint expects:
+ // ecx: object (the "this" pointer)
+ // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
+ BYTE _entryPoint [2]; // 50 push eax ;save siteAddrForRegisterIndirect - this may be an indirect call
+ // 68 push
+ size_t _token; // xx xx xx xx 32-bit constant
+#ifdef STUB_LOGGING
+ BYTE cntr2[2]; // ff 05 inc
+ size_t* c_lookup; // xx xx xx xx [call_lookup_counter]
+#endif //STUB_LOGGING
+ BYTE part2 [1]; // e9 jmp
+ DISPL _resolveWorkerDispl;// xx xx xx xx pc-rel displ
+};
+
+/* LookupHolders are the containers for LookupStubs, they provide for any alignment of
+stubs as necessary. In the case of LookupStubs, alignment is necessary since
+LookupStubs are placed in a hash table keyed by token. */
+struct LookupHolder
+{
+ static void InitializeStatic();
+
+ void Initialize(PCODE resolveWorkerTarget, size_t dispatchToken);
+
+ LookupStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
+
+ static LookupHolder* FromLookupEntry(PCODE lookupEntry);
+
+private:
+ friend struct LookupStub;
+
+ BYTE align[(sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*)))%sizeof(void*)];
+ LookupStub _stub;
+ BYTE pad[sizeof(void*) -
+ ((sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*))) +
+ (sizeof(LookupStub))
+ ) % sizeof(void*)]; //complete DWORD
+
+ static_assert_no_msg((sizeof(void*) -
+ ((sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*))) +
+ (sizeof(LookupStub))
+ ) % sizeof(void*)) != 0);
+};
+
+#endif // USES_LOOKUP_STUBS
+
+struct DispatchStub;
+struct DispatchHolder;
+
+/*DispatchStub**************************************************************************************
+Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs.
+A dispatch stub has an expected type (expectedMT), target address (target) and fail address (failure).
+If the calling frame does in fact have the <this> type be of the expected type, then
+control is transfered to the target address, the method implementation. If not,
+then control is transfered to the fail address, a fail stub (see below) where a polymorphic
+lookup is done to find the correct address to go to.
+
+implementation note: Order, choice of instructions, and branch directions
+should be carefully tuned since it can have an inordinate effect on performance. Particular
+attention needs to be paid to the effects on the BTB and branch prediction, both in the small
+and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
+Note that since this stub is only used for mostly monomorphic callsites (ones that are not, get patched
+to something else), therefore the conditional jump "jne failure" is mostly not taken, and hence it is important
+that the branch prediction staticly predict this, which means it must be a forward jump. The alternative
+is to reverse the order of the jumps and make sure that the resulting conditional jump "je implTarget"
+is statically predicted as taken, i.e a backward jump. The current choice was taken since it was easier
+to control the placement of the stubs than control the placement of the jitted code and the stubs. */
+struct DispatchStub
+{
+ inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
+
+ inline size_t expectedMT() { LIMITED_METHOD_CONTRACT; return _expectedMT; }
+ inline PCODE implTarget() { LIMITED_METHOD_CONTRACT; return (PCODE) &_implDispl + sizeof(DISPL) + _implDispl; }
+ inline PCODE failTarget() { LIMITED_METHOD_CONTRACT; return (PCODE) &_failDispl + sizeof(DISPL) + _failDispl; }
+ inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(DispatchStub); }
+
+private:
+ friend struct DispatchHolder;
+
+ // DispatchStub:: _entryPoint expects:
+ // ecx: object (the "this" pointer)
+ // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
+#ifndef STUB_LOGGING
+ BYTE _entryPoint [2]; // 81 39 cmp [ecx], ; This is the place where we are going to fault on null this.
+ size_t _expectedMT; // xx xx xx xx expectedMT ; If you change it, change also AdjustContextForVirtualStub in excep.cpp!!!
+ BYTE jmpOp1[2]; // 0f 85 jne
+ DISPL _failDispl; // xx xx xx xx failEntry ;must be forward jmp for perf reasons
+ BYTE jmpOp2; // e9 jmp
+ DISPL _implDispl; // xx xx xx xx implTarget
+#else //STUB_LOGGING
+ BYTE _entryPoint [2]; // ff 05 inc
+ size_t* d_call; // xx xx xx xx [call_mono_counter]
+ BYTE cmpOp [2]; // 81 39 cmp [ecx],
+ size_t _expectedMT; // xx xx xx xx expectedMT
+ BYTE jmpOp1[2]; // 0f 84 je
+ DISPL _implDispl; // xx xx xx xx implTarget ;during logging, perf is not so important
+ BYTE fail [2]; // ff 05 inc
+ size_t* d_miss; // xx xx xx xx [miss_mono_counter]
+ BYTE jmpFail; // e9 jmp
+ DISPL _failDispl; // xx xx xx xx failEntry
+#endif //STUB_LOGGING
+};
+
+/* DispatchHolders are the containers for DispatchStubs, they provide for any alignment of
+stubs as necessary. DispatchStubs are placed in a hashtable and in a cache. The keys for both
+are the pair expectedMT and token. Efficiency of the of the hash table is not a big issue,
+since lookups in it are fairly rare. Efficiency of the cache is paramount since it is accessed frequently
+o(see ResolveStub below). Currently we are storing both of these fields in the DispatchHolder to simplify
+alignment issues. If inlineMT in the stub itself was aligned, then it could be the expectedMT field.
+While the token field can be logically gotten by following the failure target to the failEntryPoint
+of the ResolveStub and then to the token over there, for perf reasons of cache access, it is duplicated here.
+This allows us to use DispatchStubs in the cache. The alternative is to provide some other immutable struct
+for the cache composed of the triplet (expectedMT, token, target) and some sort of reclaimation scheme when
+they are thrown out of the cache via overwrites (since concurrency will make the obvious approaches invalid).
+*/
+
+/* @workaround for ee resolution - Since the EE does not currently have a resolver function that
+does what we want, see notes in implementation of VirtualCallStubManager::Resolver, we are
+using dispatch stubs to siumulate what we want. That means that inlineTarget, which should be immutable
+is in fact written. Hence we have moved target out into the holder and aligned it so we can
+atomically update it. When we get a resolver function that does what we want, we can drop this field,
+and live with just the inlineTarget field in the stub itself, since immutability will hold.*/
+struct DispatchHolder
+{
+ static void InitializeStatic();
+
+ void Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT);
+
+ DispatchStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
+
+ static DispatchHolder* FromDispatchEntry(PCODE dispatchEntry);
+
+private:
+ //force expectedMT to be aligned since used as key in hash tables.
+#ifndef STUB_LOGGING
+ BYTE align[(sizeof(void*)-(offsetof(DispatchStub,_expectedMT)%sizeof(void*)))%sizeof(void*)];
+#endif
+ DispatchStub _stub;
+ BYTE pad[(sizeof(void*)-(sizeof(DispatchStub)%sizeof(void*))+offsetof(DispatchStub,_expectedMT))%sizeof(void*)]; //complete DWORD
+};
+
+struct ResolveStub;
+struct ResolveHolder;
+
+/*ResolveStub**************************************************************************************
+Polymorphic call sites and monomorphic calls that fail end up in a ResolverStub. There is only
+one resolver stub built for any given token, even though there may be many call sites that
+use that token and many distinct <this> types that are used in the calling call frames. A resolver stub
+actually has two entry points, one for polymorphic call sites and one for dispatch stubs that fail on their
+expectedMT test. There is a third part of the resolver stub that enters the ee when a decision should
+be made about changing the callsite. Therefore, we have defined the resolver stub as three distinct pieces,
+even though they are actually allocated as a single contiguous block of memory. These pieces are:
+
+A ResolveStub has two entry points:
+
+FailEntry - where the dispatch stub goes if the expected MT test fails. This piece of the stub does
+a check to see how often we are actually failing. If failures are frequent, control transfers to the
+patch piece to cause the call site to be changed from a mostly monomorphic callsite
+(calls dispatch stub) to a polymorphic callsize (calls resolve stub). If failures are rare, control
+transfers to the resolve piece (see ResolveStub). The failEntryPoint decrements a counter
+every time it is entered. The ee at various times will add a large chunk to the counter.
+
+ResolveEntry - does a lookup via in a cache by hashing the actual type of the calling frame s
+<this> and the token identifying the (contract,method) pair desired. If found, control is transfered
+to the method implementation. If not found in the cache, the token is pushed and the ee is entered via
+the ResolveWorkerStub to do a full lookup and eventual transfer to the correct method implementation. Since
+there is a different resolve stub for every token, the token can be inlined and the token can be pre-hashed.
+The effectiveness of this approach is highly sensitive to the effectiveness of the hashing algorithm used,
+as well as its speed. It turns out it is very important to make the hash function sensitive to all
+of the bits of the method table, as method tables are laid out in memory in a very non-random way. Before
+making any changes to the code sequences here, it is very important to measure and tune them as perf
+can vary greatly, in unexpected ways, with seeming minor changes.
+
+Implementation note - Order, choice of instructions, and branch directions
+should be carefully tuned since it can have an inordinate effect on performance. Particular
+attention needs to be paid to the effects on the BTB and branch prediction, both in the small
+and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
+Note that this stub is called in highly polymorphic cases, but the cache should have been sized
+and the hash function chosen to maximize the cache hit case. Hence the cmp/jcc instructions should
+mostly be going down the cache hit route, and it is important that this be statically predicted as so.
+Hence the 3 jcc instrs need to be forward jumps. As structured, there is only one jmp/jcc that typically
+gets put in the BTB since all the others typically fall straight thru. Minimizing potential BTB entries
+is important. */
+
+struct ResolveStub
+{
+ inline PCODE failEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[0]; }
+ inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; }
+ inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; }
+
+ inline INT32* pCounter() { LIMITED_METHOD_CONTRACT; return _pCounter; }
+ inline UINT32 hashedToken() { LIMITED_METHOD_CONTRACT; return _hashedToken >> LOG2_PTRSIZE; }
+ inline size_t cacheAddress() { LIMITED_METHOD_CONTRACT; return _cacheAddress; }
+ inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
+ inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(ResolveStub); }
+
+private:
+ friend struct ResolveHolder;
+
+ // ResolveStub::_failEntryPoint expects:
+ // ecx: object (the "this" pointer)
+ // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
+ BYTE _failEntryPoint [2]; // 83 2d sub
+ INT32* _pCounter; // xx xx xx xx [counter],
+ BYTE part0 [2]; // 01 01
+ // 7c jl
+ BYTE toPatcher; // xx backpatcher ;must be forward jump, for perf reasons
+ // ;fall into the resolver stub
+
+ // ResolveStub::_resolveEntryPoint expects:
+ // ecx: object (the "this" pointer)
+ // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
+ BYTE _resolveEntryPoint[6]; // 50 push eax ;save siteAddrForRegisterIndirect - this may be an indirect call
+ // 8b 01 mov eax,[ecx] ;get the method table from the "this" pointer. This is the place
+ // ; where we are going to fault on null this. If you change it,
+ // ; change also AdjustContextForVirtualStub in excep.cpp!!!
+ // 52 push edx
+ // 8b d0 mov edx, eax
+ BYTE part1 [6]; // c1 e8 0C shr eax,12 ;we are adding upper bits into lower bits of mt
+ // 03 c2 add eax,edx
+ // 35 xor eax,
+ UINT32 _hashedToken; // xx xx xx xx hashedToken ;along with pre-hashed token
+ BYTE part2 [1]; // 25 and eax,
+ size_t mask; // xx xx xx xx cache_mask
+ BYTE part3 [2]; // 8b 80 mov eax, [eax+
+ size_t _cacheAddress; // xx xx xx xx lookupCache]
+#ifdef STUB_LOGGING
+ BYTE cntr1[2]; // ff 05 inc
+ size_t* c_call; // xx xx xx xx [call_cache_counter]
+#endif //STUB_LOGGING
+ BYTE part4 [2]; // 3b 10 cmp edx,[eax+
+ // BYTE mtOffset; // ResolverCacheElem.pMT]
+ BYTE part5 [1]; // 75 jne
+ BYTE toMiss1; // xx miss ;must be forward jump, for perf reasons
+ BYTE part6 [2]; // 81 78 cmp [eax+
+ BYTE tokenOffset; // xx ResolverCacheElem.token],
+ size_t _token; // xx xx xx xx token
+ BYTE part7 [1]; // 75 jne
+ BYTE toMiss2; // xx miss ;must be forward jump, for perf reasons
+ BYTE part8 [2]; // 8B 40 xx mov eax,[eax+
+ BYTE targetOffset; // ResolverCacheElem.target]
+ BYTE part9 [6]; // 5a pop edx
+ // 83 c4 04 add esp,4 ;throw away siteAddrForRegisterIndirect - we don't need it now
+ // ff e0 jmp eax
+ // miss:
+ BYTE miss [1]; // 5a pop edx ; don't pop siteAddrForRegisterIndirect - leave it on the stack for use by ResolveWorkerChainLookupAsmStub and/or ResolveWorkerAsmStub
+ BYTE _slowEntryPoint[1]; // 68 push
+ size_t _tokenPush; // xx xx xx xx token
+#ifdef STUB_LOGGING
+ BYTE cntr2[2]; // ff 05 inc
+ size_t* c_miss; // xx xx xx xx [miss_cache_counter]
+#endif //STUB_LOGGING
+ BYTE part10 [1]; // e9 jmp
+ DISPL _resolveWorkerDispl; // xx xx xx xx resolveWorker == ResolveWorkerChainLookupAsmStub or ResolveWorkerAsmStub
+ BYTE patch[1]; // e8 call
+ DISPL _backpatcherDispl; // xx xx xx xx backpatcherWorker == BackPatchWorkerAsmStub
+ BYTE part11 [1]; // eb jmp
+ BYTE toResolveStub; // xx resolveStub, i.e. go back to _resolveEntryPoint
+};
+
+/* ResolveHolders are the containers for ResolveStubs, They provide
+for any alignment of the stubs as necessary. The stubs are placed in a hash table keyed by
+the token for which they are built. Efficiency of access requires that this token be aligned.
+For now, we have copied that field into the ResolveHolder itself, if the resolve stub is arranged such that
+any of its inlined tokens (non-prehashed) is aligned, then the token field in the ResolveHolder
+is not needed. */
+struct ResolveHolder
+{
+ static void InitializeStatic();
+
+ void Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
+ size_t dispatchToken, UINT32 hashedToken,
+ void * cacheAddr, INT32 * counterAddr);
+
+ ResolveStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
+
+ static ResolveHolder* FromFailEntry(PCODE failEntry);
+ static ResolveHolder* FromResolveEntry(PCODE resolveEntry);
+
+private:
+ //align _token in resolve stub
+
+ BYTE align[(sizeof(void*)-((offsetof(ResolveStub,_token))%sizeof(void*)))%sizeof(void*)
+#ifdef STUB_LOGGING // This turns out to be zero-sized in stub_logging case, and is an error. So round up.
+ +sizeof(void*)
+#endif
+ ];
+
+ ResolveStub _stub;
+
+//#ifdef STUB_LOGGING // This turns out to be zero-sized in non stub_logging case, and is an error. So remove
+ BYTE pad[(sizeof(void*)-((sizeof(ResolveStub))%sizeof(void*))+offsetof(ResolveStub,_token))%sizeof(void*)]; //fill out DWORD
+//#endif
+};
+#include <poppack.h>
+
+
+#ifdef DECLARE_DATA
+
+#ifndef DACCESS_COMPILE
+
+#ifdef _MSC_VER
+
+#ifdef CHAIN_LOOKUP
+/* This will perform a chained lookup of the entry if the initial cache lookup fails
+
+ Entry stack:
+ dispatch token
+ siteAddrForRegisterIndirect (used only if this is a RegisterIndirect dispatch call)
+ return address of caller to stub
+ Also, EAX contains the pointer to the first ResolveCacheElem pointer for the calculated
+ bucket in the cache table.
+*/
+__declspec (naked) void ResolveWorkerChainLookupAsmStub()
+{
+ enum
+ {
+ e_token_size = 4,
+ e_indirect_addr_size = 4,
+ e_caller_ret_addr_size = 4,
+ };
+ enum
+ {
+ // this is the part of the stack that is present as we enter this function:
+ e_token = 0,
+ e_indirect_addr = e_token + e_token_size,
+ e_caller_ret_addr = e_indirect_addr + e_indirect_addr_size,
+ e_ret_esp = e_caller_ret_addr + e_caller_ret_addr_size,
+ };
+ enum
+ {
+ e_spilled_reg_size = 8,
+ };
+
+ // main loop setup
+ __asm {
+#ifdef STUB_LOGGING
+ inc g_chained_lookup_call_counter
+#endif
+ // spill regs
+ push edx
+ push ecx
+ // move the token into edx
+ mov edx,[esp+e_spilled_reg_size+e_token]
+ // move the MT into ecx
+ mov ecx,[ecx]
+ }
+ main_loop:
+ __asm {
+ // get the next entry in the chain (don't bother checking the first entry again)
+ mov eax,[eax+e_resolveCacheElem_offset_next]
+ // test if we hit a terminating NULL
+ test eax,eax
+ jz fail
+ // compare the MT of the ResolveCacheElem
+ cmp ecx,[eax+e_resolveCacheElem_offset_mt]
+ jne main_loop
+ // compare the token of the ResolveCacheElem
+ cmp edx,[eax+e_resolveCacheElem_offset_token]
+ jne main_loop
+ // success
+ // decrement success counter and move entry to start if necessary
+ sub g_dispatch_cache_chain_success_counter,1
+ //@TODO: Perhaps this should be a jl for better branch prediction?
+ jge nopromote
+ // be quick to reset the counter so we don't get a bunch of contending threads
+ add g_dispatch_cache_chain_success_counter,CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT
+ // promote the entry to the beginning of the chain
+ mov ecx,eax
+ call VirtualCallStubManager::PromoteChainEntry
+ }
+ nopromote:
+ __asm {
+ // clean up the stack and jump to the target
+ pop ecx
+ pop edx
+ add esp,(e_caller_ret_addr - e_token)
+ mov eax,[eax+e_resolveCacheElem_offset_target]
+ jmp eax
+ }
+ fail:
+ __asm {
+#ifdef STUB_LOGGING
+ inc g_chained_lookup_miss_counter
+#endif
+ // restore registers
+ pop ecx
+ pop edx
+ jmp ResolveWorkerAsmStub
+ }
+}
+#endif
+
+/* Call the resolver, it will return where we are supposed to go.
+ There is a little stack magic here, in that we are entered with one
+ of the arguments for the resolver (the token) on the stack already.
+ We just push the other arguments, <this> in the call frame and the call site pointer,
+ and call the resolver.
+
+ On return we have the stack frame restored to the way it was when the ResolveStub
+ was called, i.e. as it was at the actual call site. The return value from
+ the resolver is the address we need to transfer control to, simulating a direct
+ call from the original call site. If we get passed back NULL, it means that the
+ resolution failed, an unimpelemented method is being called.
+
+ Entry stack:
+ dispatch token
+ siteAddrForRegisterIndirect (used only if this is a RegisterIndirect dispatch call)
+ return address of caller to stub
+
+ Call stack:
+ pointer to TransitionBlock
+ call site
+ dispatch token
+ TransitionBlock
+ ArgumentRegisters (ecx, edx)
+ CalleeSavedRegisters (ebp, ebx, esi, edi)
+ return address of caller to stub
+ */
+__declspec (naked) void ResolveWorkerAsmStub()
+{
+ CANNOT_HAVE_CONTRACT;
+
+ __asm {
+ //
+ // The stub arguments are where we want to setup the TransitionBlock. We will
+ // setup the TransitionBlock later once we can trash them
+ //
+ // push ebp-frame
+ // push ebp
+ // mov ebp,esp
+
+ // save CalleeSavedRegisters
+ // push ebx
+
+ push esi
+ push edi
+
+ // push ArgumentRegisters
+ push ecx
+ push edx
+
+ mov esi, esp
+
+ push [esi + 4*4] // dispatch token
+ push [esi + 5*4] // siteAddrForRegisterIndirect
+ push esi // pTransitionBlock
+
+ // Setup up proper EBP frame now that the stub arguments can be trashed
+ mov [esi + 4*4],ebx
+ mov [esi + 5*4],ebp
+ lea ebp, [esi + 5*4]
+
+ // Make the call
+ call VSD_ResolveWorker
+
+ // From here on, mustn't trash eax
+
+ // pop ArgumentRegisters
+ pop edx
+ pop ecx
+
+ // pop CalleeSavedRegisters
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+
+ // Now jump to the target
+ jmp eax // continue on into the method
+ }
+}
+
+#ifdef FEATURE_REMOTING
+/* For an in-context dispatch, we will find the target. This
+ is the slow path, and erects a MachState structure for
+ creating a HelperMethodFrame
+
+ Entry stack:
+ dispatch token
+ return address of caller to stub
+
+ Call stack:
+ pointer to StubDispatchFrame
+ call site
+ dispatch token
+ StubDispatchFrame
+ GSCookie
+ negspace
+ vptr
+ datum
+ ArgumentRegisters (ecx, edx)
+ CalleeSavedRegisters (ebp, ebx, esi, edi)
+ return address of caller to stub
+*/
+__declspec (naked) void InContextTPDispatchAsmStub()
+{
+ CANNOT_HAVE_CONTRACT;
+
+ __asm {
+ // Pop dispatch token
+ pop eax
+
+ // push ebp-frame
+ push ebp
+ mov ebp,esp
+
+ // save CalleeSavedRegisters
+ push ebx
+ push esi
+ push edi
+
+ // push ArgumentRegisters
+ push ecx
+ push edx
+
+ mov esi, esp
+
+ push eax // token
+ push esi // pTransitionContext
+
+ // Make the call
+ call VSD_GetTargetForTPWorker
+
+ // From here on, mustn't trash eax
+
+ // pop ArgumentRegisters
+ pop edx
+ pop ecx
+
+ // pop CalleeSavedRegisters
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+
+ // Now jump to the target
+ jmp eax // continue on into the method
+ }
+}
+
+/* For an in-context dispatch, we will try to find the target in
+ the resolve cache. If this fails, we will jump to the full
+ version of InContextTPDispatchAsmStub
+
+ Entry stack:
+ dispatch slot number of interface MD
+ caller return address
+ ECX: this object
+*/
+__declspec (naked) void InContextTPQuickDispatchAsmStub()
+{
+ CANNOT_HAVE_CONTRACT;
+
+ __asm {
+ // Spill registers
+ push ecx
+ push edx
+
+ // Arg 2 - token
+ mov eax, [esp + 8]
+ push eax
+
+ // Arg 1 - this
+ push ecx
+
+ // Make the call
+ call VSD_GetTargetForTPWorkerQuick
+
+ // Restore registers
+ pop edx
+ pop ecx
+
+ // Test to see if we found a target
+ test eax, eax
+ jnz TargetFound
+
+ // If no target, jump to the slow worker
+ jmp InContextTPDispatchAsmStub
+
+ TargetFound:
+ // We got a target, so pop off the token and jump to it
+ add esp,4
+ jmp eax
+ }
+}
+#endif // FEATURE_REMOTING
+
+/* Call the callsite back patcher. The fail stub piece of the resolver is being
+call too often, i.e. dispatch stubs are failing the expect MT test too often.
+In this stub wraps the call to the BackPatchWorker to take care of any stack magic
+needed.
+*/
+__declspec (naked) void BackPatchWorkerAsmStub()
+{
+ CANNOT_HAVE_CONTRACT;
+
+ __asm {
+ push EBP
+ mov ebp,esp
+ push EAX // it may contain siteAddrForRegisterIndirect
+ push ECX
+ push EDX
+ push EAX // push any indirect call address as the second arg to BackPatchWorker
+ push [EBP+8] // and push return address as the first arg to BackPatchWorker
+ call VirtualCallStubManager::BackPatchWorkerStatic
+ pop EDX
+ pop ECX
+ pop EAX
+ mov esp,ebp
+ pop ebp
+ ret
+ }
+}
+
+#endif // _MSC_VER
+
+#ifdef _DEBUG
+//
+// This function verifies that a pointer to an indirection cell lives inside a delegate object.
+// In the delegate case the indirection cell is held by the delegate itself in _methodPtrAux, when the delegate Invoke is
+// called the shuffle thunk is first invoked and that will call into the virtual dispatch stub.
+// Before control is given to the virtual dispatch stub a pointer to the indirection cell (thus an interior pointer to the delegate)
+// is pushed in EAX
+//
+BOOL isDelegateCall(BYTE *interiorPtr)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ if (GCHeap::GetGCHeap()->IsHeapPointer((void*)interiorPtr))
+ {
+ Object *delegate = (Object*)(interiorPtr - DelegateObject::GetOffsetOfMethodPtrAux());
+ VALIDATEOBJECTREF(ObjectToOBJECTREF(delegate));
+ _ASSERTE(delegate->GetMethodTable()->IsDelegate());
+
+ return TRUE;
+ }
+ return FALSE;
+}
+#endif
+
+StubCallSite::StubCallSite(TADDR siteAddrForRegisterIndirect, PCODE returnAddr)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ // Not used
+ // if (isCallRelative(returnAddr))
+ // {
+ // m_siteAddr = returnAddr - sizeof(DISPL);
+ // }
+ // else
+ if (isCallRelativeIndirect((BYTE *)returnAddr))
+ {
+ m_siteAddr = *dac_cast<PTR_PTR_PCODE>(returnAddr - sizeof(PCODE));
+ }
+ else
+ {
+ _ASSERTE(isCallRegisterIndirect((BYTE *)returnAddr) || isDelegateCall((BYTE *)siteAddrForRegisterIndirect));
+ m_siteAddr = dac_cast<PTR_PCODE>(siteAddrForRegisterIndirect);
+ }
+}
+
+// the special return address for VSD tailcalls
+extern "C" void STDCALL JIT_TailCallReturnFromVSD();
+
+PCODE StubCallSite::GetCallerAddress()
+{
+ LIMITED_METHOD_CONTRACT;
+ if (m_returnAddr != (PCODE)JIT_TailCallReturnFromVSD)
+ return m_returnAddr;
+
+ // Find the tailcallframe in the frame chain and get the actual caller from the first TailCallFrame
+ return TailCallFrame::FindTailCallFrame(GetThread()->GetFrame())->GetCallerAddress();
+}
+
+#ifdef STUB_LOGGING
+extern size_t g_lookup_inline_counter;
+extern size_t g_mono_call_counter;
+extern size_t g_mono_miss_counter;
+extern size_t g_poly_call_counter;
+extern size_t g_poly_miss_counter;
+#endif
+
+/* Template used to generate the stub. We generate a stub by allocating a block of
+ memory and copy the template over it and just update the specific fields that need
+ to be changed.
+*/
+LookupStub lookupInit;
+
+void LookupHolder::InitializeStatic()
+{
+ static_assert_no_msg(((offsetof(LookupStub, _token)+offsetof(LookupHolder, _stub)) % sizeof(void*)) == 0);
+ static_assert_no_msg((sizeof(LookupHolder) % sizeof(void*)) == 0);
+
+ lookupInit._entryPoint [0] = 0x50;
+ lookupInit._entryPoint [1] = 0x68;
+ static_assert_no_msg(sizeof(lookupInit._entryPoint) == 2);
+ lookupInit._token = 0xcccccccc;
+#ifdef STUB_LOGGING
+ lookupInit.cntr2 [0] = 0xff;
+ lookupInit.cntr2 [1] = 0x05;
+ static_assert_no_msg(sizeof(lookupInit.cntr2) == 2);
+ lookupInit.c_lookup = &g_call_lookup_counter;
+#endif //STUB_LOGGING
+ lookupInit.part2 [0] = 0xe9;
+ static_assert_no_msg(sizeof(lookupInit.part2) == 1);
+ lookupInit._resolveWorkerDispl = 0xcccccccc;
+}
+
+void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken)
+{
+ _stub = lookupInit;
+
+ //fill in the stub specific fields
+ //@TODO: Get rid of this duplication of data.
+ _stub._token = dispatchToken;
+ _stub._resolveWorkerDispl = resolveWorkerTarget - ((PCODE) &_stub._resolveWorkerDispl + sizeof(DISPL));
+}
+
+LookupHolder* LookupHolder::FromLookupEntry(PCODE lookupEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ LookupHolder* lookupHolder = (LookupHolder*) ( lookupEntry - offsetof(LookupHolder, _stub) - offsetof(LookupStub, _entryPoint) );
+ // _ASSERTE(lookupHolder->_stub._entryPoint[0] == lookupInit._entryPoint[0]);
+ return lookupHolder;
+}
+
+
+/* Template used to generate the stub. We generate a stub by allocating a block of
+ memory and copy the template over it and just update the specific fields that need
+ to be changed.
+*/
+DispatchStub dispatchInit;
+
+void DispatchHolder::InitializeStatic()
+{
+ // Check that _expectedMT is aligned in the DispatchHolder
+ static_assert_no_msg(((offsetof(DispatchHolder, _stub) + offsetof(DispatchStub,_expectedMT)) % sizeof(void*)) == 0);
+ static_assert_no_msg((sizeof(DispatchHolder) % sizeof(void*)) == 0);
+
+#ifndef STUB_LOGGING
+ dispatchInit._entryPoint [0] = 0x81;
+ dispatchInit._entryPoint [1] = 0x39;
+ static_assert_no_msg(sizeof(dispatchInit._entryPoint) == 2);
+
+ dispatchInit._expectedMT = 0xcccccccc;
+ dispatchInit.jmpOp1 [0] = 0x0f;
+ dispatchInit.jmpOp1 [1] = 0x85;
+ static_assert_no_msg(sizeof(dispatchInit.jmpOp1) == 2);
+
+ dispatchInit._failDispl = 0xcccccccc;
+ dispatchInit.jmpOp2 = 0xe9;
+ dispatchInit._implDispl = 0xcccccccc;
+#else //STUB_LOGGING
+ dispatchInit._entryPoint [0] = 0xff;
+ dispatchInit._entryPoint [1] = 0x05;
+ static_assert_no_msg(sizeof(dispatchInit._entryPoint) == 2);
+
+ dispatchInit.d_call = &g_mono_call_counter;
+ dispatchInit.cmpOp [0] = 0x81;
+ dispatchInit.cmpOp [1] = 0x39;
+ static_assert_no_msg(sizeof(dispatchInit.cmpOp) == 2);
+
+ dispatchInit._expectedMT = 0xcccccccc;
+ dispatchInit.jmpOp1 [0] = 0x0f;
+ dispatchInit.jmpOp1 [1] = 0x84;
+ static_assert_no_msg(sizeof(dispatchInit.jmpOp1) == 2);
+
+ dispatchInit._implDispl = 0xcccccccc;
+ dispatchInit.fail [0] = 0xff;
+ dispatchInit.fail [1] = 0x05;
+ static_assert_no_msg(sizeof(dispatchInit.fail) == 2);
+
+ dispatchInit.d_miss = &g_mono_miss_counter;
+ dispatchInit.jmpFail = 0xe9;
+ dispatchInit._failDispl = 0xcccccccc;
+#endif //STUB_LOGGING
+};
+
+void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT)
+{
+ _stub = dispatchInit;
+
+ //fill in the stub specific fields
+ _stub._expectedMT = (size_t) expectedMT;
+ _stub._failDispl = failTarget - ((PCODE) &_stub._failDispl + sizeof(DISPL));
+ _stub._implDispl = implTarget - ((PCODE) &_stub._implDispl + sizeof(DISPL));
+}
+
+DispatchHolder* DispatchHolder::FromDispatchEntry(PCODE dispatchEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ DispatchHolder* dispatchHolder = (DispatchHolder*) ( dispatchEntry - offsetof(DispatchHolder, _stub) - offsetof(DispatchStub, _entryPoint) );
+ // _ASSERTE(dispatchHolder->_stub._entryPoint[0] == dispatchInit._entryPoint[0]);
+ return dispatchHolder;
+}
+
+
+/* Template used to generate the stub. We generate a stub by allocating a block of
+ memory and copy the template over it and just update the specific fields that need
+ to be changed.
+*/
+
+ResolveStub resolveInit;
+
+void ResolveHolder::InitializeStatic()
+{
+ //Check that _token is aligned in ResolveHolder
+ static_assert_no_msg(((offsetof(ResolveHolder, _stub) + offsetof(ResolveStub, _token)) % sizeof(void*)) == 0);
+ static_assert_no_msg((sizeof(ResolveHolder) % sizeof(void*)) == 0);
+
+ resolveInit._failEntryPoint [0] = 0x83;
+ resolveInit._failEntryPoint [1] = 0x2d;
+ static_assert_no_msg(sizeof(resolveInit._failEntryPoint) == 2);
+
+ resolveInit._pCounter = (INT32 *) (size_t) 0xcccccccc;
+ resolveInit.part0 [0] = 0x01;
+ resolveInit.part0 [1] = 0x7c;
+ static_assert_no_msg(sizeof(resolveInit.part0) == 2);
+
+ resolveInit.toPatcher = (offsetof(ResolveStub, patch) - (offsetof(ResolveStub, toPatcher) + 1)) & 0xFF;
+
+ resolveInit._resolveEntryPoint [0] = 0x50;
+ resolveInit._resolveEntryPoint [1] = 0x8b;
+ resolveInit._resolveEntryPoint [2] = 0x01;
+ resolveInit._resolveEntryPoint [3] = 0x52;
+ resolveInit._resolveEntryPoint [4] = 0x8b;
+ resolveInit._resolveEntryPoint [5] = 0xd0;
+ static_assert_no_msg(sizeof(resolveInit._resolveEntryPoint) == 6);
+
+ resolveInit.part1 [0] = 0xc1;
+ resolveInit.part1 [1] = 0xe8;
+ resolveInit.part1 [2] = CALL_STUB_CACHE_NUM_BITS;
+ resolveInit.part1 [3] = 0x03;
+ resolveInit.part1 [4] = 0xc2;
+ resolveInit.part1 [5] = 0x35;
+ static_assert_no_msg(sizeof(resolveInit.part1) == 6);
+
+ resolveInit._hashedToken = 0xcccccccc;
+ resolveInit.part2 [0] = 0x25;
+ static_assert_no_msg(sizeof(resolveInit.part2) == 1);
+
+ resolveInit.mask = (CALL_STUB_CACHE_MASK << LOG2_PTRSIZE);
+ resolveInit.part3 [0] = 0x8b;
+ resolveInit.part3 [1] = 0x80;;
+ static_assert_no_msg(sizeof(resolveInit.part3) == 2);
+
+ resolveInit._cacheAddress = 0xcccccccc;
+#ifdef STUB_LOGGING
+ resolveInit.cntr1 [0] = 0xff;
+ resolveInit.cntr1 [1] = 0x05;
+ static_assert_no_msg(sizeof(resolveInit.cntr1) == 2);
+
+ resolveInit.c_call = &g_poly_call_counter;
+#endif //STUB_LOGGING
+ resolveInit.part4 [0] = 0x3b;
+ resolveInit.part4 [1] = 0x10;
+ static_assert_no_msg(sizeof(resolveInit.part4) == 2);
+
+ // resolveInit.mtOffset = offsetof(ResolveCacheElem,pMT) & 0xFF;
+ static_assert_no_msg(offsetof(ResolveCacheElem,pMT) == 0);
+
+ resolveInit.part5 [0] = 0x75;
+ static_assert_no_msg(sizeof(resolveInit.part5) == 1);
+
+ resolveInit.toMiss1 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss1)+1);
+
+ resolveInit.part6 [0] = 0x81;
+ resolveInit.part6 [1] = 0x78;
+ static_assert_no_msg(sizeof(resolveInit.part6) == 2);
+
+ resolveInit.tokenOffset = offsetof(ResolveCacheElem,token) & 0xFF;
+
+ resolveInit._token = 0xcccccccc;
+
+ resolveInit.part7 [0] = 0x75;
+ static_assert_no_msg(sizeof(resolveInit.part7) == 1);
+
+ resolveInit.part8 [0] = 0x8b;
+ resolveInit.part8 [1] = 0x40;
+ static_assert_no_msg(sizeof(resolveInit.part8) == 2);
+
+ resolveInit.targetOffset = offsetof(ResolveCacheElem,target) & 0xFF;
+
+ resolveInit.toMiss2 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss2)+1);
+
+ resolveInit.part9 [0] = 0x5a;
+ resolveInit.part9 [1] = 0x83;
+ resolveInit.part9 [2] = 0xc4;
+ resolveInit.part9 [3] = 0x04;
+ resolveInit.part9 [4] = 0xff;
+ resolveInit.part9 [5] = 0xe0;
+ static_assert_no_msg(sizeof(resolveInit.part9) == 6);
+
+ resolveInit.miss [0] = 0x5a;
+// resolveInit.miss [1] = 0xb8;
+// resolveInit._hashedTokenMov = 0xcccccccc;
+ resolveInit._slowEntryPoint [0] = 0x68;
+ resolveInit._tokenPush = 0xcccccccc;
+#ifdef STUB_LOGGING
+ resolveInit.cntr2 [0] = 0xff;
+ resolveInit.cntr2 [1] = 0x05;
+ resolveInit.c_miss = &g_poly_miss_counter;
+#endif //STUB_LOGGING
+ resolveInit.part10 [0] = 0xe9;
+ resolveInit._resolveWorkerDispl = 0xcccccccc;
+
+ resolveInit.patch [0] = 0xe8;
+ resolveInit._backpatcherDispl = 0xcccccccc;
+ resolveInit.part11 [0] = 0xeb;
+ resolveInit.toResolveStub = (offsetof(ResolveStub, _resolveEntryPoint) - (offsetof(ResolveStub, toResolveStub) + 1)) & 0xFF;
+};
+
+void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
+ size_t dispatchToken, UINT32 hashedToken,
+ void * cacheAddr, INT32 * counterAddr)
+{
+ _stub = resolveInit;
+
+ //fill in the stub specific fields
+ _stub._pCounter = counterAddr;
+ _stub._hashedToken = hashedToken << LOG2_PTRSIZE;
+ _stub._cacheAddress = (size_t) cacheAddr;
+ _stub._token = dispatchToken;
+// _stub._hashedTokenMov = hashedToken;
+ _stub._tokenPush = dispatchToken;
+ _stub._resolveWorkerDispl = resolveWorkerTarget - ((PCODE) &_stub._resolveWorkerDispl + sizeof(DISPL));
+ _stub._backpatcherDispl = patcherTarget - ((PCODE) &_stub._backpatcherDispl + sizeof(DISPL));
+}
+
+ResolveHolder* ResolveHolder::FromFailEntry(PCODE failEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ ResolveHolder* resolveHolder = (ResolveHolder*) ( failEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _failEntryPoint) );
+ // _ASSERTE(resolveHolder->_stub._resolveEntryPoint[0] == resolveInit._resolveEntryPoint[0]);
+ return resolveHolder;
+}
+
+ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry)
+{
+ LIMITED_METHOD_CONTRACT;
+ ResolveHolder* resolveHolder = (ResolveHolder*) ( resolveEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _resolveEntryPoint) );
+ // _ASSERTE(resolveHolder->_stub._resolveEntryPoint[0] == resolveInit._resolveEntryPoint[0]);
+ return resolveHolder;
+}
+
+#endif // DACCESS_COMPILE
+
+VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress)
+{
+ SUPPORTS_DAC;
+#ifdef DACCESS_COMPILE
+
+ return SK_BREAKPOINT; // Dac always uses the slower lookup
+
+#else
+
+ StubKind stubKind = SK_UNKNOWN;
+
+ EX_TRY
+ {
+ // If stubStartAddress is completely bogus, then this might AV,
+ // so we protect it with SEH. An AV here is OK.
+ AVInRuntimeImplOkayHolder AVOkay;
+
+ WORD firstWord = *((WORD*) stubStartAddress);
+
+#ifndef STUB_LOGGING
+ if (firstWord == 0x3981)
+#else //STUB_LOGGING
+ if (firstWord == 0x05ff)
+#endif
+ {
+ stubKind = SK_DISPATCH;
+ }
+ else if (firstWord == 0x6850)
+ {
+ stubKind = SK_LOOKUP;
+ }
+ else if (firstWord == 0x8b50)
+ {
+ stubKind = SK_RESOLVE;
+ }
+ else
+ {
+ BYTE firstByte = ((BYTE*) stubStartAddress)[0];
+ BYTE secondByte = ((BYTE*) stubStartAddress)[1];
+
+ if ((firstByte == X86_INSTR_INT3) ||
+ (secondByte == X86_INSTR_INT3))
+ {
+ stubKind = SK_BREAKPOINT;
+ }
+ }
+ }
+ EX_CATCH
+ {
+ stubKind = SK_UNKNOWN;
+ }
+ EX_END_CATCH(SwallowAllExceptions);
+
+ return stubKind;
+
+#endif // DACCESS_COMPILE
+}
+
+#endif //DECLARE_DATA
+
+#endif // _VIRTUAL_CALL_STUB_X86_H