diff options
Diffstat (limited to 'src/vm/i386/virtualcallstubcpu.hpp')
-rw-r--r-- | src/vm/i386/virtualcallstubcpu.hpp | 1077 |
1 files changed, 1077 insertions, 0 deletions
diff --git a/src/vm/i386/virtualcallstubcpu.hpp b/src/vm/i386/virtualcallstubcpu.hpp new file mode 100644 index 0000000000..33ce8199b9 --- /dev/null +++ b/src/vm/i386/virtualcallstubcpu.hpp @@ -0,0 +1,1077 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// File: virtualcallstubcpu.hpp +// + + +// + +// +// ============================================================================ + +#ifndef _VIRTUAL_CALL_STUB_X86_H +#define _VIRTUAL_CALL_STUB_X86_H + +#ifdef DECLARE_DATA +#include "asmconstants.h" +#ifdef FEATURE_REMOTING +#include "remoting.h" +#endif +#endif + +#include <pshpack1.h> // Since we are placing code, we want byte packing of the structs + +#define USES_LOOKUP_STUBS 1 + +/********************************************************************************************* +Stubs that contain code are all part of larger structs called Holders. There is a +Holder for each kind of stub, i.e XXXStub is contained with XXXHolder. Holders are +essentially an implementation trick that allowed rearranging the code sequences more +easily while trying out different alternatives, and for dealing with any alignment +issues in a way that was mostly immune to the actually code sequences. These Holders +should be revisited when the stub code sequences are fixed, since in many cases they +add extra space to a stub that is not really needed. + +Stubs are placed in cache and hash tables. Since unaligned access of data in memory +is very slow, the keys used in those tables should be aligned. The things used as keys +typically also occur in the generated code, e.g. a token as an immediate part of an instruction. +For now, to avoid alignment computations as different code strategies are tried out, the key +fields are all in the Holders. Eventually, many of these fields should be dropped, and the instruction +streams aligned so that the immediate fields fall on aligned boundaries. +*/ + +#if USES_LOOKUP_STUBS + +struct LookupStub; +struct LookupHolder; + +/*LookupStub************************************************************************************** +Virtual and interface call sites are initially setup to point at LookupStubs. +This is because the runtime type of the <this> pointer is not yet known, +so the target cannot be resolved. Note: if the jit is able to determine the runtime type +of the <this> pointer, it should be generating a direct call not a virtual or interface call. +This stub pushes a lookup token onto the stack to identify the sought after method, and then +jumps into the EE (VirtualCallStubManager::ResolveWorkerStub) to effectuate the lookup and +transfer of control to the appropriate target method implementation, perhaps patching of the call site +along the way to point to a more appropriate stub. Hence callsites that point to LookupStubs +get quickly changed to point to another kind of stub. +*/ +struct LookupStub +{ + inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } + inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); } + +private: + friend struct LookupHolder; + + // DispatchStub:: _entryPoint expects: + // ecx: object (the "this" pointer) + // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call + BYTE _entryPoint [2]; // 50 push eax ;save siteAddrForRegisterIndirect - this may be an indirect call + // 68 push + size_t _token; // xx xx xx xx 32-bit constant +#ifdef STUB_LOGGING + BYTE cntr2[2]; // ff 05 inc + size_t* c_lookup; // xx xx xx xx [call_lookup_counter] +#endif //STUB_LOGGING + BYTE part2 [1]; // e9 jmp + DISPL _resolveWorkerDispl;// xx xx xx xx pc-rel displ +}; + +/* LookupHolders are the containers for LookupStubs, they provide for any alignment of +stubs as necessary. In the case of LookupStubs, alignment is necessary since +LookupStubs are placed in a hash table keyed by token. */ +struct LookupHolder +{ + static void InitializeStatic(); + + void Initialize(PCODE resolveWorkerTarget, size_t dispatchToken); + + LookupStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } + + static LookupHolder* FromLookupEntry(PCODE lookupEntry); + +private: + friend struct LookupStub; + + BYTE align[(sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*)))%sizeof(void*)]; + LookupStub _stub; + BYTE pad[sizeof(void*) - + ((sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*))) + + (sizeof(LookupStub)) + ) % sizeof(void*)]; //complete DWORD + + static_assert_no_msg((sizeof(void*) - + ((sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*))) + + (sizeof(LookupStub)) + ) % sizeof(void*)) != 0); +}; + +#endif // USES_LOOKUP_STUBS + +struct DispatchStub; +struct DispatchHolder; + +/*DispatchStub************************************************************************************** +Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs. +A dispatch stub has an expected type (expectedMT), target address (target) and fail address (failure). +If the calling frame does in fact have the <this> type be of the expected type, then +control is transfered to the target address, the method implementation. If not, +then control is transfered to the fail address, a fail stub (see below) where a polymorphic +lookup is done to find the correct address to go to. + +implementation note: Order, choice of instructions, and branch directions +should be carefully tuned since it can have an inordinate effect on performance. Particular +attention needs to be paid to the effects on the BTB and branch prediction, both in the small +and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions. +Note that since this stub is only used for mostly monomorphic callsites (ones that are not, get patched +to something else), therefore the conditional jump "jne failure" is mostly not taken, and hence it is important +that the branch prediction staticly predict this, which means it must be a forward jump. The alternative +is to reverse the order of the jumps and make sure that the resulting conditional jump "je implTarget" +is statically predicted as taken, i.e a backward jump. The current choice was taken since it was easier +to control the placement of the stubs than control the placement of the jitted code and the stubs. */ +struct DispatchStub +{ + inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } + + inline size_t expectedMT() { LIMITED_METHOD_CONTRACT; return _expectedMT; } + inline PCODE implTarget() { LIMITED_METHOD_CONTRACT; return (PCODE) &_implDispl + sizeof(DISPL) + _implDispl; } + inline PCODE failTarget() { LIMITED_METHOD_CONTRACT; return (PCODE) &_failDispl + sizeof(DISPL) + _failDispl; } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(DispatchStub); } + +private: + friend struct DispatchHolder; + + // DispatchStub:: _entryPoint expects: + // ecx: object (the "this" pointer) + // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call +#ifndef STUB_LOGGING + BYTE _entryPoint [2]; // 81 39 cmp [ecx], ; This is the place where we are going to fault on null this. + size_t _expectedMT; // xx xx xx xx expectedMT ; If you change it, change also AdjustContextForVirtualStub in excep.cpp!!! + BYTE jmpOp1[2]; // 0f 85 jne + DISPL _failDispl; // xx xx xx xx failEntry ;must be forward jmp for perf reasons + BYTE jmpOp2; // e9 jmp + DISPL _implDispl; // xx xx xx xx implTarget +#else //STUB_LOGGING + BYTE _entryPoint [2]; // ff 05 inc + size_t* d_call; // xx xx xx xx [call_mono_counter] + BYTE cmpOp [2]; // 81 39 cmp [ecx], + size_t _expectedMT; // xx xx xx xx expectedMT + BYTE jmpOp1[2]; // 0f 84 je + DISPL _implDispl; // xx xx xx xx implTarget ;during logging, perf is not so important + BYTE fail [2]; // ff 05 inc + size_t* d_miss; // xx xx xx xx [miss_mono_counter] + BYTE jmpFail; // e9 jmp + DISPL _failDispl; // xx xx xx xx failEntry +#endif //STUB_LOGGING +}; + +/* DispatchHolders are the containers for DispatchStubs, they provide for any alignment of +stubs as necessary. DispatchStubs are placed in a hashtable and in a cache. The keys for both +are the pair expectedMT and token. Efficiency of the of the hash table is not a big issue, +since lookups in it are fairly rare. Efficiency of the cache is paramount since it is accessed frequently +o(see ResolveStub below). Currently we are storing both of these fields in the DispatchHolder to simplify +alignment issues. If inlineMT in the stub itself was aligned, then it could be the expectedMT field. +While the token field can be logically gotten by following the failure target to the failEntryPoint +of the ResolveStub and then to the token over there, for perf reasons of cache access, it is duplicated here. +This allows us to use DispatchStubs in the cache. The alternative is to provide some other immutable struct +for the cache composed of the triplet (expectedMT, token, target) and some sort of reclaimation scheme when +they are thrown out of the cache via overwrites (since concurrency will make the obvious approaches invalid). +*/ + +/* @workaround for ee resolution - Since the EE does not currently have a resolver function that +does what we want, see notes in implementation of VirtualCallStubManager::Resolver, we are +using dispatch stubs to siumulate what we want. That means that inlineTarget, which should be immutable +is in fact written. Hence we have moved target out into the holder and aligned it so we can +atomically update it. When we get a resolver function that does what we want, we can drop this field, +and live with just the inlineTarget field in the stub itself, since immutability will hold.*/ +struct DispatchHolder +{ + static void InitializeStatic(); + + void Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT); + + DispatchStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } + + static DispatchHolder* FromDispatchEntry(PCODE dispatchEntry); + +private: + //force expectedMT to be aligned since used as key in hash tables. +#ifndef STUB_LOGGING + BYTE align[(sizeof(void*)-(offsetof(DispatchStub,_expectedMT)%sizeof(void*)))%sizeof(void*)]; +#endif + DispatchStub _stub; + BYTE pad[(sizeof(void*)-(sizeof(DispatchStub)%sizeof(void*))+offsetof(DispatchStub,_expectedMT))%sizeof(void*)]; //complete DWORD +}; + +struct ResolveStub; +struct ResolveHolder; + +/*ResolveStub************************************************************************************** +Polymorphic call sites and monomorphic calls that fail end up in a ResolverStub. There is only +one resolver stub built for any given token, even though there may be many call sites that +use that token and many distinct <this> types that are used in the calling call frames. A resolver stub +actually has two entry points, one for polymorphic call sites and one for dispatch stubs that fail on their +expectedMT test. There is a third part of the resolver stub that enters the ee when a decision should +be made about changing the callsite. Therefore, we have defined the resolver stub as three distinct pieces, +even though they are actually allocated as a single contiguous block of memory. These pieces are: + +A ResolveStub has two entry points: + +FailEntry - where the dispatch stub goes if the expected MT test fails. This piece of the stub does +a check to see how often we are actually failing. If failures are frequent, control transfers to the +patch piece to cause the call site to be changed from a mostly monomorphic callsite +(calls dispatch stub) to a polymorphic callsize (calls resolve stub). If failures are rare, control +transfers to the resolve piece (see ResolveStub). The failEntryPoint decrements a counter +every time it is entered. The ee at various times will add a large chunk to the counter. + +ResolveEntry - does a lookup via in a cache by hashing the actual type of the calling frame s +<this> and the token identifying the (contract,method) pair desired. If found, control is transfered +to the method implementation. If not found in the cache, the token is pushed and the ee is entered via +the ResolveWorkerStub to do a full lookup and eventual transfer to the correct method implementation. Since +there is a different resolve stub for every token, the token can be inlined and the token can be pre-hashed. +The effectiveness of this approach is highly sensitive to the effectiveness of the hashing algorithm used, +as well as its speed. It turns out it is very important to make the hash function sensitive to all +of the bits of the method table, as method tables are laid out in memory in a very non-random way. Before +making any changes to the code sequences here, it is very important to measure and tune them as perf +can vary greatly, in unexpected ways, with seeming minor changes. + +Implementation note - Order, choice of instructions, and branch directions +should be carefully tuned since it can have an inordinate effect on performance. Particular +attention needs to be paid to the effects on the BTB and branch prediction, both in the small +and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions. +Note that this stub is called in highly polymorphic cases, but the cache should have been sized +and the hash function chosen to maximize the cache hit case. Hence the cmp/jcc instructions should +mostly be going down the cache hit route, and it is important that this be statically predicted as so. +Hence the 3 jcc instrs need to be forward jumps. As structured, there is only one jmp/jcc that typically +gets put in the BTB since all the others typically fall straight thru. Minimizing potential BTB entries +is important. */ + +struct ResolveStub +{ + inline PCODE failEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[0]; } + inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; } + inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; } + + inline INT32* pCounter() { LIMITED_METHOD_CONTRACT; return _pCounter; } + inline UINT32 hashedToken() { LIMITED_METHOD_CONTRACT; return _hashedToken >> LOG2_PTRSIZE; } + inline size_t cacheAddress() { LIMITED_METHOD_CONTRACT; return _cacheAddress; } + inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(ResolveStub); } + +private: + friend struct ResolveHolder; + + // ResolveStub::_failEntryPoint expects: + // ecx: object (the "this" pointer) + // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call + BYTE _failEntryPoint [2]; // 83 2d sub + INT32* _pCounter; // xx xx xx xx [counter], + BYTE part0 [2]; // 01 01 + // 7c jl + BYTE toPatcher; // xx backpatcher ;must be forward jump, for perf reasons + // ;fall into the resolver stub + + // ResolveStub::_resolveEntryPoint expects: + // ecx: object (the "this" pointer) + // eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call + BYTE _resolveEntryPoint[6]; // 50 push eax ;save siteAddrForRegisterIndirect - this may be an indirect call + // 8b 01 mov eax,[ecx] ;get the method table from the "this" pointer. This is the place + // ; where we are going to fault on null this. If you change it, + // ; change also AdjustContextForVirtualStub in excep.cpp!!! + // 52 push edx + // 8b d0 mov edx, eax + BYTE part1 [6]; // c1 e8 0C shr eax,12 ;we are adding upper bits into lower bits of mt + // 03 c2 add eax,edx + // 35 xor eax, + UINT32 _hashedToken; // xx xx xx xx hashedToken ;along with pre-hashed token + BYTE part2 [1]; // 25 and eax, + size_t mask; // xx xx xx xx cache_mask + BYTE part3 [2]; // 8b 80 mov eax, [eax+ + size_t _cacheAddress; // xx xx xx xx lookupCache] +#ifdef STUB_LOGGING + BYTE cntr1[2]; // ff 05 inc + size_t* c_call; // xx xx xx xx [call_cache_counter] +#endif //STUB_LOGGING + BYTE part4 [2]; // 3b 10 cmp edx,[eax+ + // BYTE mtOffset; // ResolverCacheElem.pMT] + BYTE part5 [1]; // 75 jne + BYTE toMiss1; // xx miss ;must be forward jump, for perf reasons + BYTE part6 [2]; // 81 78 cmp [eax+ + BYTE tokenOffset; // xx ResolverCacheElem.token], + size_t _token; // xx xx xx xx token + BYTE part7 [1]; // 75 jne + BYTE toMiss2; // xx miss ;must be forward jump, for perf reasons + BYTE part8 [2]; // 8B 40 xx mov eax,[eax+ + BYTE targetOffset; // ResolverCacheElem.target] + BYTE part9 [6]; // 5a pop edx + // 83 c4 04 add esp,4 ;throw away siteAddrForRegisterIndirect - we don't need it now + // ff e0 jmp eax + // miss: + BYTE miss [1]; // 5a pop edx ; don't pop siteAddrForRegisterIndirect - leave it on the stack for use by ResolveWorkerChainLookupAsmStub and/or ResolveWorkerAsmStub + BYTE _slowEntryPoint[1]; // 68 push + size_t _tokenPush; // xx xx xx xx token +#ifdef STUB_LOGGING + BYTE cntr2[2]; // ff 05 inc + size_t* c_miss; // xx xx xx xx [miss_cache_counter] +#endif //STUB_LOGGING + BYTE part10 [1]; // e9 jmp + DISPL _resolveWorkerDispl; // xx xx xx xx resolveWorker == ResolveWorkerChainLookupAsmStub or ResolveWorkerAsmStub + BYTE patch[1]; // e8 call + DISPL _backpatcherDispl; // xx xx xx xx backpatcherWorker == BackPatchWorkerAsmStub + BYTE part11 [1]; // eb jmp + BYTE toResolveStub; // xx resolveStub, i.e. go back to _resolveEntryPoint +}; + +/* ResolveHolders are the containers for ResolveStubs, They provide +for any alignment of the stubs as necessary. The stubs are placed in a hash table keyed by +the token for which they are built. Efficiency of access requires that this token be aligned. +For now, we have copied that field into the ResolveHolder itself, if the resolve stub is arranged such that +any of its inlined tokens (non-prehashed) is aligned, then the token field in the ResolveHolder +is not needed. */ +struct ResolveHolder +{ + static void InitializeStatic(); + + void Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget, + size_t dispatchToken, UINT32 hashedToken, + void * cacheAddr, INT32 * counterAddr); + + ResolveStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } + + static ResolveHolder* FromFailEntry(PCODE failEntry); + static ResolveHolder* FromResolveEntry(PCODE resolveEntry); + +private: + //align _token in resolve stub + + BYTE align[(sizeof(void*)-((offsetof(ResolveStub,_token))%sizeof(void*)))%sizeof(void*) +#ifdef STUB_LOGGING // This turns out to be zero-sized in stub_logging case, and is an error. So round up. + +sizeof(void*) +#endif + ]; + + ResolveStub _stub; + +//#ifdef STUB_LOGGING // This turns out to be zero-sized in non stub_logging case, and is an error. So remove + BYTE pad[(sizeof(void*)-((sizeof(ResolveStub))%sizeof(void*))+offsetof(ResolveStub,_token))%sizeof(void*)]; //fill out DWORD +//#endif +}; +#include <poppack.h> + + +#ifdef DECLARE_DATA + +#ifndef DACCESS_COMPILE + +#ifdef _MSC_VER + +#ifdef CHAIN_LOOKUP +/* This will perform a chained lookup of the entry if the initial cache lookup fails + + Entry stack: + dispatch token + siteAddrForRegisterIndirect (used only if this is a RegisterIndirect dispatch call) + return address of caller to stub + Also, EAX contains the pointer to the first ResolveCacheElem pointer for the calculated + bucket in the cache table. +*/ +__declspec (naked) void ResolveWorkerChainLookupAsmStub() +{ + enum + { + e_token_size = 4, + e_indirect_addr_size = 4, + e_caller_ret_addr_size = 4, + }; + enum + { + // this is the part of the stack that is present as we enter this function: + e_token = 0, + e_indirect_addr = e_token + e_token_size, + e_caller_ret_addr = e_indirect_addr + e_indirect_addr_size, + e_ret_esp = e_caller_ret_addr + e_caller_ret_addr_size, + }; + enum + { + e_spilled_reg_size = 8, + }; + + // main loop setup + __asm { +#ifdef STUB_LOGGING + inc g_chained_lookup_call_counter +#endif + // spill regs + push edx + push ecx + // move the token into edx + mov edx,[esp+e_spilled_reg_size+e_token] + // move the MT into ecx + mov ecx,[ecx] + } + main_loop: + __asm { + // get the next entry in the chain (don't bother checking the first entry again) + mov eax,[eax+e_resolveCacheElem_offset_next] + // test if we hit a terminating NULL + test eax,eax + jz fail + // compare the MT of the ResolveCacheElem + cmp ecx,[eax+e_resolveCacheElem_offset_mt] + jne main_loop + // compare the token of the ResolveCacheElem + cmp edx,[eax+e_resolveCacheElem_offset_token] + jne main_loop + // success + // decrement success counter and move entry to start if necessary + sub g_dispatch_cache_chain_success_counter,1 + //@TODO: Perhaps this should be a jl for better branch prediction? + jge nopromote + // be quick to reset the counter so we don't get a bunch of contending threads + add g_dispatch_cache_chain_success_counter,CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT + // promote the entry to the beginning of the chain + mov ecx,eax + call VirtualCallStubManager::PromoteChainEntry + } + nopromote: + __asm { + // clean up the stack and jump to the target + pop ecx + pop edx + add esp,(e_caller_ret_addr - e_token) + mov eax,[eax+e_resolveCacheElem_offset_target] + jmp eax + } + fail: + __asm { +#ifdef STUB_LOGGING + inc g_chained_lookup_miss_counter +#endif + // restore registers + pop ecx + pop edx + jmp ResolveWorkerAsmStub + } +} +#endif + +/* Call the resolver, it will return where we are supposed to go. + There is a little stack magic here, in that we are entered with one + of the arguments for the resolver (the token) on the stack already. + We just push the other arguments, <this> in the call frame and the call site pointer, + and call the resolver. + + On return we have the stack frame restored to the way it was when the ResolveStub + was called, i.e. as it was at the actual call site. The return value from + the resolver is the address we need to transfer control to, simulating a direct + call from the original call site. If we get passed back NULL, it means that the + resolution failed, an unimpelemented method is being called. + + Entry stack: + dispatch token + siteAddrForRegisterIndirect (used only if this is a RegisterIndirect dispatch call) + return address of caller to stub + + Call stack: + pointer to TransitionBlock + call site + dispatch token + TransitionBlock + ArgumentRegisters (ecx, edx) + CalleeSavedRegisters (ebp, ebx, esi, edi) + return address of caller to stub + */ +__declspec (naked) void ResolveWorkerAsmStub() +{ + CANNOT_HAVE_CONTRACT; + + __asm { + // + // The stub arguments are where we want to setup the TransitionBlock. We will + // setup the TransitionBlock later once we can trash them + // + // push ebp-frame + // push ebp + // mov ebp,esp + + // save CalleeSavedRegisters + // push ebx + + push esi + push edi + + // push ArgumentRegisters + push ecx + push edx + + mov esi, esp + + push [esi + 4*4] // dispatch token + push [esi + 5*4] // siteAddrForRegisterIndirect + push esi // pTransitionBlock + + // Setup up proper EBP frame now that the stub arguments can be trashed + mov [esi + 4*4],ebx + mov [esi + 5*4],ebp + lea ebp, [esi + 5*4] + + // Make the call + call VSD_ResolveWorker + + // From here on, mustn't trash eax + + // pop ArgumentRegisters + pop edx + pop ecx + + // pop CalleeSavedRegisters + pop edi + pop esi + pop ebx + pop ebp + + // Now jump to the target + jmp eax // continue on into the method + } +} + +#ifdef FEATURE_REMOTING +/* For an in-context dispatch, we will find the target. This + is the slow path, and erects a MachState structure for + creating a HelperMethodFrame + + Entry stack: + dispatch token + return address of caller to stub + + Call stack: + pointer to StubDispatchFrame + call site + dispatch token + StubDispatchFrame + GSCookie + negspace + vptr + datum + ArgumentRegisters (ecx, edx) + CalleeSavedRegisters (ebp, ebx, esi, edi) + return address of caller to stub +*/ +__declspec (naked) void InContextTPDispatchAsmStub() +{ + CANNOT_HAVE_CONTRACT; + + __asm { + // Pop dispatch token + pop eax + + // push ebp-frame + push ebp + mov ebp,esp + + // save CalleeSavedRegisters + push ebx + push esi + push edi + + // push ArgumentRegisters + push ecx + push edx + + mov esi, esp + + push eax // token + push esi // pTransitionContext + + // Make the call + call VSD_GetTargetForTPWorker + + // From here on, mustn't trash eax + + // pop ArgumentRegisters + pop edx + pop ecx + + // pop CalleeSavedRegisters + pop edi + pop esi + pop ebx + pop ebp + + // Now jump to the target + jmp eax // continue on into the method + } +} + +/* For an in-context dispatch, we will try to find the target in + the resolve cache. If this fails, we will jump to the full + version of InContextTPDispatchAsmStub + + Entry stack: + dispatch slot number of interface MD + caller return address + ECX: this object +*/ +__declspec (naked) void InContextTPQuickDispatchAsmStub() +{ + CANNOT_HAVE_CONTRACT; + + __asm { + // Spill registers + push ecx + push edx + + // Arg 2 - token + mov eax, [esp + 8] + push eax + + // Arg 1 - this + push ecx + + // Make the call + call VSD_GetTargetForTPWorkerQuick + + // Restore registers + pop edx + pop ecx + + // Test to see if we found a target + test eax, eax + jnz TargetFound + + // If no target, jump to the slow worker + jmp InContextTPDispatchAsmStub + + TargetFound: + // We got a target, so pop off the token and jump to it + add esp,4 + jmp eax + } +} +#endif // FEATURE_REMOTING + +/* Call the callsite back patcher. The fail stub piece of the resolver is being +call too often, i.e. dispatch stubs are failing the expect MT test too often. +In this stub wraps the call to the BackPatchWorker to take care of any stack magic +needed. +*/ +__declspec (naked) void BackPatchWorkerAsmStub() +{ + CANNOT_HAVE_CONTRACT; + + __asm { + push EBP + mov ebp,esp + push EAX // it may contain siteAddrForRegisterIndirect + push ECX + push EDX + push EAX // push any indirect call address as the second arg to BackPatchWorker + push [EBP+8] // and push return address as the first arg to BackPatchWorker + call VirtualCallStubManager::BackPatchWorkerStatic + pop EDX + pop ECX + pop EAX + mov esp,ebp + pop ebp + ret + } +} + +#endif // _MSC_VER + +#ifdef _DEBUG +// +// This function verifies that a pointer to an indirection cell lives inside a delegate object. +// In the delegate case the indirection cell is held by the delegate itself in _methodPtrAux, when the delegate Invoke is +// called the shuffle thunk is first invoked and that will call into the virtual dispatch stub. +// Before control is given to the virtual dispatch stub a pointer to the indirection cell (thus an interior pointer to the delegate) +// is pushed in EAX +// +BOOL isDelegateCall(BYTE *interiorPtr) +{ + LIMITED_METHOD_CONTRACT; + + if (GCHeap::GetGCHeap()->IsHeapPointer((void*)interiorPtr)) + { + Object *delegate = (Object*)(interiorPtr - DelegateObject::GetOffsetOfMethodPtrAux()); + VALIDATEOBJECTREF(ObjectToOBJECTREF(delegate)); + _ASSERTE(delegate->GetMethodTable()->IsDelegate()); + + return TRUE; + } + return FALSE; +} +#endif + +StubCallSite::StubCallSite(TADDR siteAddrForRegisterIndirect, PCODE returnAddr) +{ + LIMITED_METHOD_CONTRACT; + + // Not used + // if (isCallRelative(returnAddr)) + // { + // m_siteAddr = returnAddr - sizeof(DISPL); + // } + // else + if (isCallRelativeIndirect((BYTE *)returnAddr)) + { + m_siteAddr = *dac_cast<PTR_PTR_PCODE>(returnAddr - sizeof(PCODE)); + } + else + { + _ASSERTE(isCallRegisterIndirect((BYTE *)returnAddr) || isDelegateCall((BYTE *)siteAddrForRegisterIndirect)); + m_siteAddr = dac_cast<PTR_PCODE>(siteAddrForRegisterIndirect); + } +} + +// the special return address for VSD tailcalls +extern "C" void STDCALL JIT_TailCallReturnFromVSD(); + +PCODE StubCallSite::GetCallerAddress() +{ + LIMITED_METHOD_CONTRACT; + if (m_returnAddr != (PCODE)JIT_TailCallReturnFromVSD) + return m_returnAddr; + + // Find the tailcallframe in the frame chain and get the actual caller from the first TailCallFrame + return TailCallFrame::FindTailCallFrame(GetThread()->GetFrame())->GetCallerAddress(); +} + +#ifdef STUB_LOGGING +extern size_t g_lookup_inline_counter; +extern size_t g_mono_call_counter; +extern size_t g_mono_miss_counter; +extern size_t g_poly_call_counter; +extern size_t g_poly_miss_counter; +#endif + +/* Template used to generate the stub. We generate a stub by allocating a block of + memory and copy the template over it and just update the specific fields that need + to be changed. +*/ +LookupStub lookupInit; + +void LookupHolder::InitializeStatic() +{ + static_assert_no_msg(((offsetof(LookupStub, _token)+offsetof(LookupHolder, _stub)) % sizeof(void*)) == 0); + static_assert_no_msg((sizeof(LookupHolder) % sizeof(void*)) == 0); + + lookupInit._entryPoint [0] = 0x50; + lookupInit._entryPoint [1] = 0x68; + static_assert_no_msg(sizeof(lookupInit._entryPoint) == 2); + lookupInit._token = 0xcccccccc; +#ifdef STUB_LOGGING + lookupInit.cntr2 [0] = 0xff; + lookupInit.cntr2 [1] = 0x05; + static_assert_no_msg(sizeof(lookupInit.cntr2) == 2); + lookupInit.c_lookup = &g_call_lookup_counter; +#endif //STUB_LOGGING + lookupInit.part2 [0] = 0xe9; + static_assert_no_msg(sizeof(lookupInit.part2) == 1); + lookupInit._resolveWorkerDispl = 0xcccccccc; +} + +void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken) +{ + _stub = lookupInit; + + //fill in the stub specific fields + //@TODO: Get rid of this duplication of data. + _stub._token = dispatchToken; + _stub._resolveWorkerDispl = resolveWorkerTarget - ((PCODE) &_stub._resolveWorkerDispl + sizeof(DISPL)); +} + +LookupHolder* LookupHolder::FromLookupEntry(PCODE lookupEntry) +{ + LIMITED_METHOD_CONTRACT; + LookupHolder* lookupHolder = (LookupHolder*) ( lookupEntry - offsetof(LookupHolder, _stub) - offsetof(LookupStub, _entryPoint) ); + // _ASSERTE(lookupHolder->_stub._entryPoint[0] == lookupInit._entryPoint[0]); + return lookupHolder; +} + + +/* Template used to generate the stub. We generate a stub by allocating a block of + memory and copy the template over it and just update the specific fields that need + to be changed. +*/ +DispatchStub dispatchInit; + +void DispatchHolder::InitializeStatic() +{ + // Check that _expectedMT is aligned in the DispatchHolder + static_assert_no_msg(((offsetof(DispatchHolder, _stub) + offsetof(DispatchStub,_expectedMT)) % sizeof(void*)) == 0); + static_assert_no_msg((sizeof(DispatchHolder) % sizeof(void*)) == 0); + +#ifndef STUB_LOGGING + dispatchInit._entryPoint [0] = 0x81; + dispatchInit._entryPoint [1] = 0x39; + static_assert_no_msg(sizeof(dispatchInit._entryPoint) == 2); + + dispatchInit._expectedMT = 0xcccccccc; + dispatchInit.jmpOp1 [0] = 0x0f; + dispatchInit.jmpOp1 [1] = 0x85; + static_assert_no_msg(sizeof(dispatchInit.jmpOp1) == 2); + + dispatchInit._failDispl = 0xcccccccc; + dispatchInit.jmpOp2 = 0xe9; + dispatchInit._implDispl = 0xcccccccc; +#else //STUB_LOGGING + dispatchInit._entryPoint [0] = 0xff; + dispatchInit._entryPoint [1] = 0x05; + static_assert_no_msg(sizeof(dispatchInit._entryPoint) == 2); + + dispatchInit.d_call = &g_mono_call_counter; + dispatchInit.cmpOp [0] = 0x81; + dispatchInit.cmpOp [1] = 0x39; + static_assert_no_msg(sizeof(dispatchInit.cmpOp) == 2); + + dispatchInit._expectedMT = 0xcccccccc; + dispatchInit.jmpOp1 [0] = 0x0f; + dispatchInit.jmpOp1 [1] = 0x84; + static_assert_no_msg(sizeof(dispatchInit.jmpOp1) == 2); + + dispatchInit._implDispl = 0xcccccccc; + dispatchInit.fail [0] = 0xff; + dispatchInit.fail [1] = 0x05; + static_assert_no_msg(sizeof(dispatchInit.fail) == 2); + + dispatchInit.d_miss = &g_mono_miss_counter; + dispatchInit.jmpFail = 0xe9; + dispatchInit._failDispl = 0xcccccccc; +#endif //STUB_LOGGING +}; + +void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT) +{ + _stub = dispatchInit; + + //fill in the stub specific fields + _stub._expectedMT = (size_t) expectedMT; + _stub._failDispl = failTarget - ((PCODE) &_stub._failDispl + sizeof(DISPL)); + _stub._implDispl = implTarget - ((PCODE) &_stub._implDispl + sizeof(DISPL)); +} + +DispatchHolder* DispatchHolder::FromDispatchEntry(PCODE dispatchEntry) +{ + LIMITED_METHOD_CONTRACT; + DispatchHolder* dispatchHolder = (DispatchHolder*) ( dispatchEntry - offsetof(DispatchHolder, _stub) - offsetof(DispatchStub, _entryPoint) ); + // _ASSERTE(dispatchHolder->_stub._entryPoint[0] == dispatchInit._entryPoint[0]); + return dispatchHolder; +} + + +/* Template used to generate the stub. We generate a stub by allocating a block of + memory and copy the template over it and just update the specific fields that need + to be changed. +*/ + +ResolveStub resolveInit; + +void ResolveHolder::InitializeStatic() +{ + //Check that _token is aligned in ResolveHolder + static_assert_no_msg(((offsetof(ResolveHolder, _stub) + offsetof(ResolveStub, _token)) % sizeof(void*)) == 0); + static_assert_no_msg((sizeof(ResolveHolder) % sizeof(void*)) == 0); + + resolveInit._failEntryPoint [0] = 0x83; + resolveInit._failEntryPoint [1] = 0x2d; + static_assert_no_msg(sizeof(resolveInit._failEntryPoint) == 2); + + resolveInit._pCounter = (INT32 *) (size_t) 0xcccccccc; + resolveInit.part0 [0] = 0x01; + resolveInit.part0 [1] = 0x7c; + static_assert_no_msg(sizeof(resolveInit.part0) == 2); + + resolveInit.toPatcher = (offsetof(ResolveStub, patch) - (offsetof(ResolveStub, toPatcher) + 1)) & 0xFF; + + resolveInit._resolveEntryPoint [0] = 0x50; + resolveInit._resolveEntryPoint [1] = 0x8b; + resolveInit._resolveEntryPoint [2] = 0x01; + resolveInit._resolveEntryPoint [3] = 0x52; + resolveInit._resolveEntryPoint [4] = 0x8b; + resolveInit._resolveEntryPoint [5] = 0xd0; + static_assert_no_msg(sizeof(resolveInit._resolveEntryPoint) == 6); + + resolveInit.part1 [0] = 0xc1; + resolveInit.part1 [1] = 0xe8; + resolveInit.part1 [2] = CALL_STUB_CACHE_NUM_BITS; + resolveInit.part1 [3] = 0x03; + resolveInit.part1 [4] = 0xc2; + resolveInit.part1 [5] = 0x35; + static_assert_no_msg(sizeof(resolveInit.part1) == 6); + + resolveInit._hashedToken = 0xcccccccc; + resolveInit.part2 [0] = 0x25; + static_assert_no_msg(sizeof(resolveInit.part2) == 1); + + resolveInit.mask = (CALL_STUB_CACHE_MASK << LOG2_PTRSIZE); + resolveInit.part3 [0] = 0x8b; + resolveInit.part3 [1] = 0x80;; + static_assert_no_msg(sizeof(resolveInit.part3) == 2); + + resolveInit._cacheAddress = 0xcccccccc; +#ifdef STUB_LOGGING + resolveInit.cntr1 [0] = 0xff; + resolveInit.cntr1 [1] = 0x05; + static_assert_no_msg(sizeof(resolveInit.cntr1) == 2); + + resolveInit.c_call = &g_poly_call_counter; +#endif //STUB_LOGGING + resolveInit.part4 [0] = 0x3b; + resolveInit.part4 [1] = 0x10; + static_assert_no_msg(sizeof(resolveInit.part4) == 2); + + // resolveInit.mtOffset = offsetof(ResolveCacheElem,pMT) & 0xFF; + static_assert_no_msg(offsetof(ResolveCacheElem,pMT) == 0); + + resolveInit.part5 [0] = 0x75; + static_assert_no_msg(sizeof(resolveInit.part5) == 1); + + resolveInit.toMiss1 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss1)+1); + + resolveInit.part6 [0] = 0x81; + resolveInit.part6 [1] = 0x78; + static_assert_no_msg(sizeof(resolveInit.part6) == 2); + + resolveInit.tokenOffset = offsetof(ResolveCacheElem,token) & 0xFF; + + resolveInit._token = 0xcccccccc; + + resolveInit.part7 [0] = 0x75; + static_assert_no_msg(sizeof(resolveInit.part7) == 1); + + resolveInit.part8 [0] = 0x8b; + resolveInit.part8 [1] = 0x40; + static_assert_no_msg(sizeof(resolveInit.part8) == 2); + + resolveInit.targetOffset = offsetof(ResolveCacheElem,target) & 0xFF; + + resolveInit.toMiss2 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss2)+1); + + resolveInit.part9 [0] = 0x5a; + resolveInit.part9 [1] = 0x83; + resolveInit.part9 [2] = 0xc4; + resolveInit.part9 [3] = 0x04; + resolveInit.part9 [4] = 0xff; + resolveInit.part9 [5] = 0xe0; + static_assert_no_msg(sizeof(resolveInit.part9) == 6); + + resolveInit.miss [0] = 0x5a; +// resolveInit.miss [1] = 0xb8; +// resolveInit._hashedTokenMov = 0xcccccccc; + resolveInit._slowEntryPoint [0] = 0x68; + resolveInit._tokenPush = 0xcccccccc; +#ifdef STUB_LOGGING + resolveInit.cntr2 [0] = 0xff; + resolveInit.cntr2 [1] = 0x05; + resolveInit.c_miss = &g_poly_miss_counter; +#endif //STUB_LOGGING + resolveInit.part10 [0] = 0xe9; + resolveInit._resolveWorkerDispl = 0xcccccccc; + + resolveInit.patch [0] = 0xe8; + resolveInit._backpatcherDispl = 0xcccccccc; + resolveInit.part11 [0] = 0xeb; + resolveInit.toResolveStub = (offsetof(ResolveStub, _resolveEntryPoint) - (offsetof(ResolveStub, toResolveStub) + 1)) & 0xFF; +}; + +void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget, + size_t dispatchToken, UINT32 hashedToken, + void * cacheAddr, INT32 * counterAddr) +{ + _stub = resolveInit; + + //fill in the stub specific fields + _stub._pCounter = counterAddr; + _stub._hashedToken = hashedToken << LOG2_PTRSIZE; + _stub._cacheAddress = (size_t) cacheAddr; + _stub._token = dispatchToken; +// _stub._hashedTokenMov = hashedToken; + _stub._tokenPush = dispatchToken; + _stub._resolveWorkerDispl = resolveWorkerTarget - ((PCODE) &_stub._resolveWorkerDispl + sizeof(DISPL)); + _stub._backpatcherDispl = patcherTarget - ((PCODE) &_stub._backpatcherDispl + sizeof(DISPL)); +} + +ResolveHolder* ResolveHolder::FromFailEntry(PCODE failEntry) +{ + LIMITED_METHOD_CONTRACT; + ResolveHolder* resolveHolder = (ResolveHolder*) ( failEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _failEntryPoint) ); + // _ASSERTE(resolveHolder->_stub._resolveEntryPoint[0] == resolveInit._resolveEntryPoint[0]); + return resolveHolder; +} + +ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry) +{ + LIMITED_METHOD_CONTRACT; + ResolveHolder* resolveHolder = (ResolveHolder*) ( resolveEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _resolveEntryPoint) ); + // _ASSERTE(resolveHolder->_stub._resolveEntryPoint[0] == resolveInit._resolveEntryPoint[0]); + return resolveHolder; +} + +#endif // DACCESS_COMPILE + +VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress) +{ + SUPPORTS_DAC; +#ifdef DACCESS_COMPILE + + return SK_BREAKPOINT; // Dac always uses the slower lookup + +#else + + StubKind stubKind = SK_UNKNOWN; + + EX_TRY + { + // If stubStartAddress is completely bogus, then this might AV, + // so we protect it with SEH. An AV here is OK. + AVInRuntimeImplOkayHolder AVOkay; + + WORD firstWord = *((WORD*) stubStartAddress); + +#ifndef STUB_LOGGING + if (firstWord == 0x3981) +#else //STUB_LOGGING + if (firstWord == 0x05ff) +#endif + { + stubKind = SK_DISPATCH; + } + else if (firstWord == 0x6850) + { + stubKind = SK_LOOKUP; + } + else if (firstWord == 0x8b50) + { + stubKind = SK_RESOLVE; + } + else + { + BYTE firstByte = ((BYTE*) stubStartAddress)[0]; + BYTE secondByte = ((BYTE*) stubStartAddress)[1]; + + if ((firstByte == X86_INSTR_INT3) || + (secondByte == X86_INSTR_INT3)) + { + stubKind = SK_BREAKPOINT; + } + } + } + EX_CATCH + { + stubKind = SK_UNKNOWN; + } + EX_END_CATCH(SwallowAllExceptions); + + return stubKind; + +#endif // DACCESS_COMPILE +} + +#endif //DECLARE_DATA + +#endif // _VIRTUAL_CALL_STUB_X86_H |