diff options
Diffstat (limited to 'src/vm/virtualcallstub.h')
-rw-r--r-- | src/vm/virtualcallstub.h | 1625 |
1 files changed, 1625 insertions, 0 deletions
diff --git a/src/vm/virtualcallstub.h b/src/vm/virtualcallstub.h new file mode 100644 index 0000000000..b3f736fcae --- /dev/null +++ b/src/vm/virtualcallstub.h @@ -0,0 +1,1625 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// File: VirtualCallStub.h +// + + + +// +// See code:VirtualCallStubManager for details +// +// ============================================================================ + +#ifndef _VIRTUAL_CALL_STUB_H +#define _VIRTUAL_CALL_STUB_H + +#ifndef CROSSGEN_COMPILE + +#define CHAIN_LOOKUP + +#if defined(_TARGET_X86_) +// If this is uncommented, leaves a file "StubLog_<pid>.log" with statistics on the behavior +// of stub-based interface dispatch. +//#define STUB_LOGGING +#endif + +#include "stubmgr.h" + +///////////////////////////////////////////////////////////////////////////////////// +// Forward class declarations +class FastTable; +class BucketTable; +class Entry; +class Prober; +class VirtualCallStubManager; +class VirtualCallStubManagerManager; +struct LookupHolder; +struct DispatchHolder; +struct ResolveHolder; + +///////////////////////////////////////////////////////////////////////////////////// +// Forward function declarations +extern "C" void InContextTPQuickDispatchAsmStub(); + +extern "C" PCODE STDCALL StubDispatchFixupWorker(TransitionBlock * pTransitionBlock, + TADDR siteAddrForRegisterIndirect, + DWORD sectionIndex, + Module * pModule); + +extern "C" PCODE STDCALL VSD_ResolveWorker(TransitionBlock * pTransitionBlock, + TADDR siteAddrForRegisterIndirect, + size_t token +#ifndef _TARGET_X86_ + , UINT_PTR flags +#endif + ); + +#ifdef FEATURE_REMOTING +// This is used by TransparentProxyWorkerStub to take a stub address (token), and +// MethodTable and return the target. It will look in the cache first, and if not found +// will call the resolver and then put the result into the cache. +extern "C" PCODE STDCALL VSD_GetTargetForTPWorkerQuick(TransparentProxyObject * orTP, size_t token); +extern "C" PCODE STDCALL VSD_GetTargetForTPWorker(TransitionBlock * pTransitionBlock, size_t token); +#endif + +///////////////////////////////////////////////////////////////////////////////////// +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) +typedef INT32 DISPL; +#endif + +///////////////////////////////////////////////////////////////////////////////////// +// Represents the struct that is added to the resolve cache +// NOTE: If you change the layout of this struct, you'll need to update various +// ASM helpers in VirtualCallStubCpu that rely on offsets of members. +// +struct ResolveCacheElem +{ + void *pMT; + size_t token; // DispatchToken + void *target; + + // These are used for chaining + ResolveCacheElem *pNext; + ResolveCacheElem *Next() + { LIMITED_METHOD_CONTRACT; return VolatileLoad(&pNext); } + +#ifdef _DEBUG + UINT16 debug_hash; + UINT16 debug_index; +#endif // _DEBUG + + BOOL Equals(size_t token, void *pMT) + { LIMITED_METHOD_CONTRACT; return (this->pMT == pMT && this->token == token); } + + BOOL Equals(ResolveCacheElem *pElem) + { WRAPPER_NO_CONTRACT; return Equals(pElem->token, pElem->pMT); } + +}; + +enum +{ + e_resolveCacheElem_sizeof_mt = sizeof(void *), + e_resolveCacheElem_sizeof_token = sizeof(size_t), + e_resolveCacheElem_sizeof_target = sizeof(void *), + e_resolveCacheElem_sizeof_next = sizeof(ResolveCacheElem *), + + e_resolveCacheElem_offset_mt = 0, + e_resolveCacheElem_offset_token = e_resolveCacheElem_offset_mt + e_resolveCacheElem_sizeof_mt, + e_resolveCacheElem_offset_target = e_resolveCacheElem_offset_token + e_resolveCacheElem_sizeof_token, + e_resolveCacheElem_offset_next = e_resolveCacheElem_offset_target + e_resolveCacheElem_sizeof_target, +}; + +///////////////////////////////////////////////////////////////////////////////////// +// A utility class to help manipulate a call site +struct StubCallSite +{ + friend class VirtualCallStubManager; + +private: + + // On x86 are four possible kinds of callsites when you take into account all features + // Relative: direct call, e.g. "call addr". Not used currently. + // RelativeIndirect (JmpRel): indirect call through a relative address, e.g. "call [addr]" + // RegisterIndirect: indirect call through a register, e.g. "call [eax]" + // DelegateCallSite: anything else, tail called through a register by shuffle thunk, e.g. "jmp [eax]" + // + // On all other platforms we always use an indirect call through an indirection cell + // In these cases all calls are made by the platform equivalent of "call [addr]". + // + // DelegateCallSite are particular in that they can come in a variety of forms: + // a direct delegate call has a sequence defined by the jit but a multicast or secure delegate + // are defined in a stub and have a different shape + // + PTR_PCODE m_siteAddr; // Stores the address of an indirection cell + PCODE m_returnAddr; + +public: + +#if defined(_TARGET_X86_) + StubCallSite(TADDR siteAddrForRegisterIndirect, PCODE returnAddr); + + PCODE GetCallerAddress(); +#else // !defined(_TARGET_X86_) + // On platforms where we always use an indirection cell things + // are much simpler - the siteAddr always stores a pointer to a + // value that in turn points to the indirection cell. + + StubCallSite(TADDR siteAddr, PCODE returnAddr) + { LIMITED_METHOD_CONTRACT; m_siteAddr = dac_cast<PTR_PCODE>(siteAddr); m_returnAddr = returnAddr; } + + PCODE GetCallerAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; } +#endif // !defined(_TARGET_X86_) + + PCODE GetSiteTarget() { WRAPPER_NO_CONTRACT; return *(GetIndirectCell()); } + void SetSiteTarget(PCODE newTarget); + PTR_PCODE GetIndirectCell() { LIMITED_METHOD_CONTRACT; return dac_cast<PTR_PCODE>(m_siteAddr); } + PTR_PCODE * GetIndirectCellAddress() { LIMITED_METHOD_CONTRACT; return &m_siteAddr; } + + PCODE GetReturnAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; } +}; + +#ifdef FEATURE_PREJIT +extern "C" void StubDispatchFixupStub(); // for lazy fixup of ngen call sites +#endif + +// These are the assembly language entry points that the stubs use when they want to go into the EE + +extern "C" void ResolveWorkerAsmStub(); // resolve a token and transfer control to that method +extern "C" void ResolveWorkerChainLookupAsmStub(); // for chaining of entries in the cache + +#ifdef _TARGET_X86_ +extern "C" void BackPatchWorkerAsmStub(); // backpatch a call site to point to a different stub +#endif // _TARGET_X86_ + + +typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager; + +// VirtualCallStubManager is the heart of the stub dispatch logic. See the book of the runtime entry +// +// file:../../doc/BookOfTheRuntime/ClassLoader/VirtualStubDispatchDesign.doc +// +// The basic idea is that a call to an interface (it could also be used for virtual calls in general, but we +// do not do this), is simply the code +// +// call [DispatchCell] +// +// Where we make sure 'DispatchCell' points at stubs that will do the right thing. DispatchCell is writable +// so we can udpate the code over time. There are three basic types of stubs that the dispatch cell can point +// to. +// * Lookup: The intial stub that has no 'fast path' and simply pushes a ID for interface being called +// and calls into the runtime at code:VirtualCallStubManager.ResolveWorkerStatic. +// * Dispatch: Lookup stubs are patched to this stub which has a fast path that checks for a particular +// Method Table and if that fails jumps to code that +// * Decrements a 'missCount' (starts out as code:STUB_MISS_COUNT_VALUE). If this count goes to zero +// code:VirtualCallStubManager.BackPatchWorkerStatic is called, morphs it into a resolve stub +// (however since this decrementing logic is SHARED among all dispatch stubs, it may take +// multiples of code:STUB_MISS_COUNT_VALUE if mulitple call sites are actively polymorphic (this +// seems unlikley). +// * Calls a resolve stub (Whenever a dispatch stub is created, it always has a cooresponding resolve +// stub (but the resolve stubs are shared among many dispatch stubs). +// * Resolve: see code:ResolveStub. This looks up the Method table in a process wide cache (see +// code:ResolveCacheElem, and if found, jumps to it. This code path is about 17 instructions long (so +// pretty fast, but certainly much slower than a normal call). If the method table is not found in +// the cache, it calls into the runtime code:VirtualCallStubManager.ResolveWorkerStatic, which +// populates it. +// So the general progression is call site's cells +// * start out life pointing to a lookup stub +// * On first call they get updated into a dispatch stub. When this misses, it calls a resolve stub, +// which populates a resovle stub's cache, but does not update the call site' cell (thus it is still +// pointing at the dispatch cell. +// * After code:STUB_MISS_COUNT_VALUE misses, we update the call site's cell to point directly at the +// resolve stub (thus avoiding the overhead of the quick check that always seems to be failing and +// the miss count update). +// +// QUESTION: What is the lifetimes of the various stubs and hash table entries? +// +// QUESTION: There does not seem to be any logic that will change a call site's cell once it becomes a +// Resolve stub. Thus once a particular call site becomes a Resolve stub we live with the Resolve stub's +// (in)efficiency forever. +// +// see code:#StubDispatchNotes for more +class VirtualCallStubManager : public StubManager +{ + friend class ClrDataAccess; + friend class VirtualCallStubManagerManager; + friend class VirtualCallStubManagerIterator; + + VPTR_VTABLE_CLASS(VirtualCallStubManager, StubManager) + +public: +#ifdef _DEBUG + virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManager"; } +#endif + + // The reason for our existence, return a callstub for type id and slot number + // where type id = 0 for the class contract (i.e. a virtual call), and type id > 0 for an + // interface invoke where the id indicates which interface it is. + // + // The function is idempotent, i.e. + // you'll get the same callstub twice if you call it with identical inputs. + PCODE GetCallStub(TypeHandle ownerType, MethodDesc *pMD); + PCODE GetCallStub(TypeHandle ownerType, DWORD slot); + + // Generate an fresh indirection cell. + BYTE* GenerateStubIndirection(PCODE stub, BOOL fUseRecycledCell = FALSE); + + // Set up static data structures - called during EEStartup + static void InitStatic(); + static void UninitStatic(); + + // Per instance initialization - called during AppDomain::Init and ::Uninit and for collectible loader allocators + void Init(BaseDomain* pDomain, LoaderAllocator *pLoaderAllocator); + void Uninit(); + + //@TODO: the logging should be tied into the VMs normal loggin mechanisms, + //@TODO: for now we just always write a short log file called "StubLog_<pid>.log" + static void StartupLogging(); + static void LoggingDump(); + static void FinishLogging(); + + static void ResetCache(); + + // Reclaim/rearrange any structures that can only be done during a gc sync point. + // This is the mechanism we are using to avoid synchronization of alot of our + // cache and hash table accesses. We are requiring that during a gc sync point we are not + // executing any stub code at all, hence at this time we are serialized on a single thread (gc) + // and no other thread is accessing the data structures. + static void ReclaimAll(); + void Reclaim(); + +#ifndef DACCESS_COMPILE + VirtualCallStubManager() + : StubManager(), + lookup_rangeList(), + resolve_rangeList(), + dispatch_rangeList(), + cache_entry_rangeList(), + parentDomain(NULL), + isCollectible(false), + m_initialReservedMemForHeaps(NULL), + m_FreeIndCellList(NULL), + m_RecycledIndCellList(NULL), + indcell_heap(NULL), + cache_entry_heap(NULL), + lookup_heap(NULL), + dispatch_heap(NULL), + resolve_heap(NULL), +#ifdef _TARGET_AMD64_ + m_fShouldAllocateLongJumpDispatchStubs(FALSE), +#endif + lookups(NULL), + cache_entries(NULL), + dispatchers(NULL), + resolvers(NULL), + m_counters(NULL), + m_cur_counter_block(NULL), + m_cur_counter_block_for_reclaim(NULL), + m_cur_counter_block_for_reclaim_index(NULL), + m_pNext(NULL) + { + LIMITED_METHOD_CONTRACT; + ZeroMemory(&stats, sizeof(stats)); + } + + ~VirtualCallStubManager(); +#endif // !DACCESS_COMPILE + + + enum StubKind { + SK_UNKNOWN, + SK_LOOKUP, // Lookup Stubs are SLOW stubs that simply call into the runtime to do all work. + SK_DISPATCH, // Dispatch Stubs have a fast check for one type otherwise jumps to runtime. Works for monomorphic sites + SK_RESOLVE, // Resolve Stubs do a hash lookup before fallling back to the runtime. Works for polymorphic sites. + SK_BREAKPOINT + }; + + // peek at the assembly code and predict which kind of a stub we have + StubKind predictStubKind(PCODE stubStartAddress); + + /* know thine own stubs. It is possible that when multiple + virtualcallstub managers are built that these may need to become + non-static, and the callers modified accordingly */ + StubKind getStubKind(PCODE stubStartAddress) + { + WRAPPER_NO_CONTRACT; + SUPPORTS_DAC; + + // This method can called with stubStartAddress==NULL, e.g. when handling null reference exceptions + // caused by IP=0. Early out for this case to avoid confusing handled access violations inside predictStubKind. + if (PCODEToPINSTR(stubStartAddress) == NULL) + return SK_UNKNOWN; + + // Rather than calling IsInRange(stubStartAddress) for each possible stub kind + // we can peek at the assembly code and predict which kind of a stub we have + StubKind predictedKind = predictStubKind(stubStartAddress); + + if (predictedKind == SK_DISPATCH) + { + if (isDispatchingStub(stubStartAddress)) + return SK_DISPATCH; + } + else if (predictedKind == SK_LOOKUP) + { + if (isLookupStub(stubStartAddress)) + return SK_LOOKUP; + } + else if (predictedKind == SK_RESOLVE) + { + if (isResolvingStub(stubStartAddress)) + return SK_RESOLVE; + } + + // This is the slow case. If the predict returned SK_UNKNOWN, SK_BREAKPOINT, + // or the predict was found to be incorrect when checked against the RangeLists + // (isXXXStub), then we'll check each stub heap in sequence. + if (isDispatchingStub(stubStartAddress)) + return SK_DISPATCH; + else if (isLookupStub(stubStartAddress)) + return SK_LOOKUP; + else if (isResolvingStub(stubStartAddress)) + return SK_RESOLVE; + + return SK_UNKNOWN; + } + + inline BOOL isStub(PCODE stubStartAddress) + { + WRAPPER_NO_CONTRACT; + SUPPORTS_DAC; + + return (getStubKind(stubStartAddress) != SK_UNKNOWN); + } + + BOOL isDispatchingStub(PCODE stubStartAddress) + { + WRAPPER_NO_CONTRACT; + SUPPORTS_DAC; + + return GetDispatchRangeList()->IsInRange(stubStartAddress); + } + + BOOL isResolvingStub(PCODE stubStartAddress) + { + WRAPPER_NO_CONTRACT; + SUPPORTS_DAC; + + return GetResolveRangeList()->IsInRange(stubStartAddress); + } + + BOOL isLookupStub(PCODE stubStartAddress) + { + WRAPPER_NO_CONTRACT; + SUPPORTS_DAC; + + return GetLookupRangeList()->IsInRange(stubStartAddress); + } + + static BOOL isDispatchingStubStatic(PCODE addr) + { + WRAPPER_NO_CONTRACT; + StubKind stubKind; + FindStubManager(addr, &stubKind); + return stubKind == SK_DISPATCH; + } + + static BOOL isResolvingStubStatic(PCODE addr) + { + WRAPPER_NO_CONTRACT; + StubKind stubKind; + FindStubManager(addr, &stubKind); + return stubKind == SK_RESOLVE; + } + + static BOOL isLookupStubStatic(PCODE addr) + { + WRAPPER_NO_CONTRACT; + StubKind stubKind; + FindStubManager(addr, &stubKind); + return stubKind == SK_LOOKUP; + } + + //use range lists to track the chunks of memory that are part of each heap + LockedRangeList lookup_rangeList; + LockedRangeList resolve_rangeList; + LockedRangeList dispatch_rangeList; + LockedRangeList cache_entry_rangeList; + + // Get dac-ized pointers to rangelist. + RangeList* GetLookupRangeList() + { + SUPPORTS_DAC; + + TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, lookup_rangeList); + return PTR_RangeList(addr); + } + RangeList* GetResolveRangeList() + { + SUPPORTS_DAC; + + TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, resolve_rangeList); + return PTR_RangeList(addr); + } + RangeList* GetDispatchRangeList() + { + SUPPORTS_DAC; + + TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, dispatch_rangeList); + return PTR_RangeList(addr); + } + RangeList* GetCacheEntryRangeList() + { + SUPPORTS_DAC; + TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, cache_entry_rangeList); + return PTR_RangeList(addr); + } + +private: + + //allocate and initialize a stub of the desired kind + DispatchHolder *GenerateDispatchStub(PCODE addrOfCode, + PCODE addrOfFail, + void *pMTExpected, + size_t dispatchToken); + +#ifdef _TARGET_AMD64_ + // Used to allocate a long jump dispatch stub. See comment around + // m_fShouldAllocateLongJumpDispatchStubs for explaination. + DispatchHolder *GenerateDispatchStubLong(PCODE addrOfCode, + PCODE addrOfFail, + void *pMTExpected, + size_t dispatchToken); +#endif + + ResolveHolder *GenerateResolveStub(PCODE addrOfResolver, + PCODE addrOfPatcher, + size_t dispatchToken); + + LookupHolder *GenerateLookupStub(PCODE addrOfResolver, + size_t dispatchToken); + + template <typename STUB_HOLDER> + void AddToCollectibleVSDRangeList(STUB_HOLDER *holder) + { + if (isCollectible) + { + parentDomain->GetCollectibleVSDRanges()->AddRange(reinterpret_cast<BYTE *>(holder->stub()), + reinterpret_cast<BYTE *>(holder->stub()) + holder->stub()->size(), + this); + } + } + + // The resolve cache is static across all AppDomains + ResolveCacheElem *GenerateResolveCacheElem(void *addrOfCode, + void *pMTExpected, + size_t token); + + ResolveCacheElem *GetResolveCacheElem(void *pMT, + size_t token, + void *target); + + //Given a dispatch token, an object and a method table, determine the + //target address to go to. The return value (BOOL) states whether this address + //is cacheable or not. + static BOOL Resolver(MethodTable * pMT, + DispatchToken token, + OBJECTREF * protectedObj, + PCODE * ppTarget); + + // This can be used to find a target without needing the ability to throw + static BOOL TraceResolver(Object *pObj, DispatchToken token, TraceDestination *trace); + +public: + // Return the MethodDesc corresponding to this token. + static MethodDesc *GetRepresentativeMethodDescFromToken(DispatchToken token, MethodTable *pMT); + static MethodDesc *GetInterfaceMethodDescFromToken(DispatchToken token); + static MethodTable *GetTypeFromToken(DispatchToken token); + + //This is used to get the token out of a stub + static size_t GetTokenFromStub(PCODE stub); + + //This is used to get the token out of a stub and we know the stub manager and stub kind + static size_t GetTokenFromStubQuick(VirtualCallStubManager * pMgr, PCODE stub, StubKind kind); + + // General utility functions + // Quick lookup in the cache. NOTHROW, GC_NOTRIGGER + static PCODE CacheLookup(size_t token, UINT16 tokenHash, MethodTable *pMT); + + // Full exhaustive lookup. THROWS, GC_TRIGGERS + static PCODE GetTarget(DispatchToken token, MethodTable *pMT); + +private: + // Given a dispatch token, return true if the token represents an interface, false if just a slot. + static BOOL IsInterfaceToken(DispatchToken token); + + // Given a dispatch token, return true if the token represents a slot on the target. + static BOOL IsClassToken(DispatchToken token); + +#ifdef CHAIN_LOOKUP + static ResolveCacheElem* __fastcall PromoteChainEntry(ResolveCacheElem *pElem); +#endif + + // Flags used by the non-x86 versions of VSD_ResolveWorker + +#define SDF_ResolveBackPatch (0x01) +#define SDF_ResolvePromoteChain (0x02) +#define SDF_ResolveFlags (0x03) + + // These method needs to call the instance methods. + friend PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock, + TADDR siteAddrForRegisterIndirect, + size_t token +#ifndef _TARGET_X86_ + , UINT_PTR flags +#endif + ); + + //These are the entrypoints that the stubs actually end up calling via the + // xxxAsmStub methods above + static void STDCALL BackPatchWorkerStatic(PCODE returnAddr, TADDR siteAddrForRegisterIndirect); + +public: + PCODE ResolveWorker(StubCallSite* pCallSite, OBJECTREF *protectedObj, DispatchToken token, StubKind stubKind); + void BackPatchWorker(StubCallSite* pCallSite); + + //Change the callsite to point to stub + void BackPatchSite(StubCallSite* pCallSite, PCODE stub); + +public: + /* the following two public functions are to support tracing or stepping thru + stubs via the debugger. */ + virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress); + virtual BOOL TraceManager(Thread *thread, + TraceDestination *trace, + T_CONTEXT *pContext, + BYTE **pRetAddr); + size_t GetSize() + { + LIMITED_METHOD_CONTRACT; + size_t retval=0; + if(indcell_heap) + retval+=indcell_heap->GetSize(); + if(cache_entry_heap) + retval+=cache_entry_heap->GetSize(); + if(lookup_heap) + retval+=lookup_heap->GetSize(); + if(dispatch_heap) + retval+=dispatch_heap->GetSize(); + if(resolve_heap) + retval+=resolve_heap->GetSize(); + return retval; + }; + +private: + /* the following two private functions are to support tracing or stepping thru + stubs via the debugger. */ + virtual BOOL DoTraceStub(PCODE stubStartAddress, + TraceDestination *trace); + +private: + // The parent domain of this manager + PTR_BaseDomain parentDomain; + bool isCollectible; + + BYTE * m_initialReservedMemForHeaps; + + static const UINT32 INDCELLS_PER_BLOCK = 32; // 32 indirection cells per block. + + CrstExplicitInit m_indCellLock; + + // List of free indirection cells. The cells were directly allocated from the loader heap + // (code:VirtualCallStubManager::GenerateStubIndirection) + BYTE * m_FreeIndCellList; + + // List of recycled indirection cells. The cells were recycled from finalized dynamic methods + // (code:LCGMethodResolver::RecycleIndCells). + BYTE * m_RecycledIndCellList; + +#ifndef DACCESS_COMPILE + // This methods returns the a free cell from m_FreeIndCellList. It returns NULL if the list is empty. + BYTE * GetOneFreeIndCell() + { + WRAPPER_NO_CONTRACT; + + return GetOneIndCell(&m_FreeIndCellList); + } + + // This methods returns the a recycled cell from m_RecycledIndCellList. It returns NULL if the list is empty. + BYTE * GetOneRecycledIndCell() + { + WRAPPER_NO_CONTRACT; + + return GetOneIndCell(&m_RecycledIndCellList); + } + + // This methods returns the a cell from ppList. It returns NULL if the list is empty. + BYTE * GetOneIndCell(BYTE ** ppList) + { + CONTRACT (BYTE*) { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(CheckPointer(ppList)); + PRECONDITION(m_indCellLock.OwnedByCurrentThread()); + } CONTRACT_END; + + BYTE * temp = *ppList; + + if (temp) + { + BYTE * pNext = *((BYTE **)temp); + *ppList = pNext; + RETURN temp; + } + + RETURN NULL; + } + + // insert a linked list of indirection cells at the beginning of m_FreeIndCellList + void InsertIntoFreeIndCellList(BYTE * head, BYTE * tail) + { + WRAPPER_NO_CONTRACT; + + InsertIntoIndCellList(&m_FreeIndCellList, head, tail); + } + + // insert a linked list of indirection cells at the beginning of ppList + void InsertIntoIndCellList(BYTE ** ppList, BYTE * head, BYTE * tail) + { + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + PRECONDITION(CheckPointer(ppList)); + PRECONDITION(CheckPointer(head)); + PRECONDITION(CheckPointer(tail)); + PRECONDITION(m_indCellLock.OwnedByCurrentThread()); + } CONTRACTL_END; + + BYTE * temphead = *ppList; + *((BYTE**)tail) = temphead; + *ppList = head; + } +#endif // !DACCESS_COMPILE + + PTR_LoaderHeap indcell_heap; // indirection cells go here + PTR_LoaderHeap cache_entry_heap; // resolve cache elem entries go here + PTR_LoaderHeap lookup_heap; // lookup stubs go here + PTR_LoaderHeap dispatch_heap; // dispatch stubs go here + PTR_LoaderHeap resolve_heap; // resolve stubs go here + +#ifdef _TARGET_AMD64_ + // When we layout the stub heaps, we put them close together in a sequential order + // so that we maximize performance with respect to branch predictions. On AMD64, + // dispatch stubs use a rel32 jump on failure to the resolve stub. This works for + // a while because of the ordering, but as soon as we have to start allocating more + // memory for either the dispatch or resolve heaps we have a chance that we'll be + // further away than a rel32 jump can reach, because we're in a 64-bit address + // space. As such, this flag will indicate when we allocate the first dispatch stub + // that cannot reach a resolve stub, and when this happens we'll switch over to + // allocating the larger version of the dispatch stub which contains an abs64 jump. + //@TODO: This is a bit of a workaround, but the limitations of LoaderHeap require that we + //@TODO: take this approach. Hopefully in Orcas we'll have a chance to rewrite LoaderHeap. + BOOL m_fShouldAllocateLongJumpDispatchStubs; // Defaults to FALSE. +#endif + + BucketTable * lookups; // hash table of lookups keyed by tokens + BucketTable * cache_entries; // hash table of dispatch token/target structs for dispatch cache + BucketTable * dispatchers; // hash table of dispatching stubs keyed by tokens/actualtype + BucketTable * resolvers; // hash table of resolvers keyed by tokens/resolverstub + + // This structure is used to keep track of the fail counters. + // We only need one fail counter per ResolveStub, + // and most programs use less than 250 ResolveStubs + // We allocate these on the main heap using "new counter block" + struct counter_block + { + static const UINT32 MAX_COUNTER_ENTRIES = 256-2; // 254 counters should be enough for most cases. + + counter_block * next; // the next block + UINT32 used; // the index of the next free entry + INT32 block[MAX_COUNTER_ENTRIES]; // the counters + }; + + counter_block *m_counters; // linked list of counter blocks of failure counters + counter_block *m_cur_counter_block; // current block for updating counts + counter_block *m_cur_counter_block_for_reclaim; // current block for updating + UINT32 m_cur_counter_block_for_reclaim_index; // index into the current block for updating + + // Used to keep track of all the VCSManager objects in the system. + PTR_VirtualCallStubManager m_pNext; // Linked list pointer + +public: + // Given a stub address, find the VCSManager that owns it. + static VirtualCallStubManager *FindStubManager(PCODE addr, + StubKind* wbStubKind = NULL); + +#ifndef DACCESS_COMPILE + // insert a linked list of indirection cells at the beginning of m_RecycledIndCellList + void InsertIntoRecycledIndCellList_Locked(BYTE * head, BYTE * tail) + { + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + MODE_ANY; + } CONTRACTL_END; + + CrstHolder lh(&m_indCellLock); + + InsertIntoIndCellList(&m_RecycledIndCellList, head, tail); + } +#endif // !DACCESS_COMPILE + + // These are the counters for keeping statistics + struct + { + UINT32 site_counter; //# of call sites + UINT32 stub_lookup_counter; //# of lookup stubs + UINT32 stub_poly_counter; //# of resolve stubs + UINT32 stub_mono_counter; //# of dispatch stubs + UINT32 site_write; //# of call site backpatch writes + UINT32 site_write_poly; //# of call site backpatch writes to point to resolve stubs + UINT32 site_write_mono; //# of call site backpatch writes to point to dispatch stubs + UINT32 worker_call; //# of calls into ResolveWorker + UINT32 worker_call_no_patch; //# of times call_worker resulted in no patch + UINT32 worker_collide_to_mono; //# of times we converted a poly stub to a mono stub instead of writing the cache entry + UINT32 stub_space; //# of bytes of stubs + UINT32 cache_entry_counter; //# of cache structs + UINT32 cache_entry_space; //# of bytes used by cache lookup structs + } stats; + + void LogStats(); + +#ifdef DACCESS_COMPILE +protected: + virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags); + virtual LPCWSTR GetStubManagerName(PCODE addr) + { + WRAPPER_NO_CONTRACT; + CONSISTENCY_CHECK(isStub(addr)); + + if (isLookupStub(addr)) + { + return W("VSD_LookupStub"); + } + else if (isDispatchingStub(addr)) + { + return W("VSD_DispatchStub"); + } + else + { + CONSISTENCY_CHECK(isResolvingStub(addr)); + return W("VSD_ResolveStub"); + } + } +#endif +}; + +/******************************************************************************************************** +********************************************************************************************************/ +typedef VPTR(class VirtualCallStubManagerManager) PTR_VirtualCallStubManagerManager; + +class VirtualCallStubManagerIterator; +class VirtualCallStubManagerManager : public StubManager +{ + VPTR_VTABLE_CLASS(VirtualCallStubManagerManager, StubManager) + + friend class StubManager; + friend class VirtualCallStubManager; + friend class VirtualCallStubManagerIterator; + friend class StubManagerIterator; + + public: + virtual BOOL TraceManager(Thread *thread, TraceDestination *trace, + T_CONTEXT *pContext, BYTE **pRetAddr); + + virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress); + + virtual BOOL DoTraceStub(PCODE stubStartAddress, TraceDestination *trace); + + static MethodDesc *Entry2MethodDesc(PCODE stubStartAddress, MethodTable *pMT); + +#ifdef DACCESS_COMPILE + virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags); + virtual LPCWSTR GetStubManagerName(PCODE addr) + { WRAPPER_NO_CONTRACT; return FindVirtualCallStubManager(addr)->GetStubManagerName(addr); } +#endif + + private: + // Used to keep track of all the VCSManager objects in the system. + PTR_VirtualCallStubManager m_pManagers; // Head of the linked list + +#ifndef DACCESS_COMPILE + // Ctor. This is only used by StaticInit. + VirtualCallStubManagerManager(); +#endif + + // A cache element to quickly check the last matched manager. + Volatile<VirtualCallStubManager*> m_pCacheElem; + + // RW lock for reading entries and removing them. + SimpleRWLock m_RWLock; + + // This will look through all the managers in an intelligent fashion to + // find the manager that owns the address. + VirtualCallStubManager *FindVirtualCallStubManager(PCODE stubAddress); + + protected: + // Add a VCSManager to the linked list. + void AddStubManager(VirtualCallStubManager *pMgr); + + // Remove a VCSManager from the linked list. + void RemoveStubManager(VirtualCallStubManager *pMgr); + + VirtualCallStubManager *FirstManager() + { WRAPPER_NO_CONTRACT; return m_pManagers; } + +#ifndef DACCESS_COMPILE + static void InitStatic(); +#endif + + public: + SPTR_DECL(VirtualCallStubManagerManager, g_pManager); + + static VirtualCallStubManagerManager *GlobalManager() + { LIMITED_METHOD_DAC_CONTRACT; CONSISTENCY_CHECK(CheckPointer(g_pManager)); return g_pManager; } + + VirtualCallStubManagerIterator IterateVirtualCallStubManagers(); + +#ifdef _DEBUG + // Debug helper to help identify stub-managers. + virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManagerManager"; } +#endif +}; + +/******************************************************************************************************** +********************************************************************************************************/ +class VirtualCallStubManagerIterator +{ + friend class VirtualCallStubManagerManager; + + public: + BOOL Next(); + VirtualCallStubManager *Current(); + + // Copy ctor + inline VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it); + + protected: + inline VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr); + + BOOL m_fIsStart; + VirtualCallStubManager *m_pCurMgr; +}; + +///////////////////////////////////////////////////////////////////////////////////////////// +// Ctor +inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr) + : m_fIsStart(TRUE), m_pCurMgr(pMgr->m_pManagers) +{ + LIMITED_METHOD_DAC_CONTRACT; + CONSISTENCY_CHECK(CheckPointer(pMgr)); +} + +///////////////////////////////////////////////////////////////////////////////////////////// +// Copy ctor +inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it) + : m_fIsStart(it.m_fIsStart), m_pCurMgr(it.m_pCurMgr) +{ + LIMITED_METHOD_DAC_CONTRACT; +} + +/******************************************************************************************************** +#StubDispatchNotes + +A note on approach. The cache and hash tables used by the stub and lookup mechanism +are designed with an eye to minimizing interlocking and/or syncing and/or locking operations. +They are intended to run in a highly concurrent environment. Since there is no magic, +some tradeoffs and and some implementation constraints are required. The basic notion +is that if all reads and writes are atomic and if all functions and operations operate +correctly in the face of commutative reorderings of the visibility of all reads and writes +across threads, then we don't have to interlock, sync, or serialize. Our approximation of +this is: + +1. All reads and all writes to tables must be atomic. This effectively limits the actual entry +size in a table to be a pointer or a pointer sized thing. + +2. All functions, like comparisons for equality or computation of hash values must function +correctly in the face of concurrent updating of the underlying table. This is accomplished +by making the underlying structures/entries effectively immutable, if concurrency is in anyway possible. +By effectively immutatable, we mean that the stub or token structure is either immutable or that +if it is ever written, all possibley concurrent writes are attempting to write the same value (atomically) +or that the competing (atomic) values do not affect correctness, and that the function operates correctly whether +or not any of the writes have taken place (is visible yet). The constraint we maintain is that all competeing +updates (and their visibility or lack thereof) do not alter the correctness of the program. + +3. All tables are inexact. The counts they hold (e.g. number of contained entries) may be inaccurrate, +but that inaccurracy cannot affect their correctness. Table modifications, such as insertion of +an new entry may not succeed, but such failures cannot affect correctness. This implies that just +because a stub/entry is not present in a table, e.g. has been removed, that does not mean that +it is not in use. It also implies that internal table structures, such as discarded hash table buckets, +cannot be freely recycled since another concurrent thread may still be walking thru it. + +4. Occassionaly it is necessary to pick up the pieces that have been dropped on the floor +so to speak, e.g. actually recycle hash buckets that aren't in use. Since we have a natural +sync point already in the GC, we use that to provide cleanup points. We need to make sure that code that +is walking our structures is not a GC safe point. Hence if the GC calls back into us inside the GC +sync point, we know that nobody is inside our stuctures and we can safely rearrange and recycle things. +********************************************************************************************************/ + +//initial and increment value for fail stub counters +#ifdef STUB_LOGGING +extern UINT32 STUB_MISS_COUNT_VALUE; +extern UINT32 STUB_COLLIDE_WRITE_PCT; +extern UINT32 STUB_COLLIDE_MONO_PCT; +#else // !STUB_LOGGING +#define STUB_MISS_COUNT_VALUE 100 +#define STUB_COLLIDE_WRITE_PCT 100 +#define STUB_COLLIDE_MONO_PCT 0 +#endif // !STUB_LOGGING + +//size and mask of the cache used by resolve stubs +// CALL_STUB_CACHE_SIZE must be equal to 2^CALL_STUB_CACHE_NUM_BITS +#define CALL_STUB_CACHE_NUM_BITS 12 //10 +#define CALL_STUB_CACHE_SIZE 4096 //1024 +#define CALL_STUB_CACHE_MASK (CALL_STUB_CACHE_SIZE-1) +#define CALL_STUB_CACHE_PROBES 5 +//min sizes for BucketTable and buckets and the growth and hashing constants +#define CALL_STUB_MIN_BUCKETS 32 +#define CALL_STUB_MIN_ENTRIES 4 +//this is so that the very first growth will jump from 4 to 32 entries, then double from there. +#define CALL_STUB_SECONDARY_ENTRIES 8 +#define CALL_STUB_GROWTH_FACTOR 2 +#define CALL_STUB_LOAD_FACTOR 90 +#define CALL_STUB_HASH_CONST1 1327 +#define CALL_STUB_HASH_CONST2 43627 +#define LARGE_PRIME 7199369 +//internal layout of buckets=size-1,count,entries.... +#define CALL_STUB_MASK_INDEX 0 +#define CALL_STUB_COUNT_INDEX 1 +#define CALL_STUB_DEAD_LINK 2 +#define CALL_STUB_FIRST_INDEX 3 +//marker entries in cache and hash tables +#define CALL_STUB_EMPTY_ENTRY 0 +// number of successes for a chained element before it gets moved to the front +#define CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT (0x100) + +/******************************************************************************************************* +Entry is an abstract class. We will make specific subclasses for each kind of +entry. Entries hold references to stubs or tokens. The principle thing they provide +is a virtual Equals function that is used by the caching and hashing tables within which +the stubs and tokens are stored. Entries are typically stack allocated by the routines +that call into the hash and caching functions, and the functions stuff stubs into the entry +to do the comparisons. Essentially specific entry subclasses supply a vtable to a stub +as and when needed. This means we don't have to have vtables attached to stubs. + +Summarizing so far, there is a struct for each kind of stub or token of the form XXXXStub. +They provide that actual storage layouts. +There is a stuct in which each stub which has code is containted of the form XXXXHolder. +They provide alignment and anciliary storage for the stub code. +There is a subclass of Entry for each kind of stub or token, of the form XXXXEntry. +They provide the specific implementations of the virtual functions declared in Entry. */ +class Entry +{ +public: + //access and compare the keys of the entry + virtual BOOL Equals(size_t keyA, size_t keyB)=0; + virtual size_t KeyA()=0; + virtual size_t KeyB()=0; + + //contents is the struct or token that the entry exposes + virtual void SetContents(size_t contents)=0; +}; + +/* define the platform specific Stubs and stub holders */ + +#include <virtualcallstubcpu.hpp> + +#if USES_LOOKUP_STUBS +/********************************************************************************************** +LookupEntry wraps LookupStubs and provide the concrete implementation of the abstract class Entry. +Virtual and interface call sites when they are first jitted point to LookupStubs. The hash table +that contains look up stubs is keyed by token, hence the Equals function uses the embedded token in +the stub for comparison purposes. Since we are willing to allow duplicates in the hash table (as +long as they are relatively rare) we do use direct comparison of the tokens rather than extracting +the fields from within the tokens, for perf reasons. */ +class LookupEntry : public Entry +{ +public: + //Creates an entry that wraps lookup stub s + LookupEntry(size_t s) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)s)); + stub = (LookupStub*) s; + } + + //default contructor to allow stack and inline allocation of lookup entries + LookupEntry() {LIMITED_METHOD_CONTRACT; stub = NULL;} + + //implementations of abstract class Entry + BOOL Equals(size_t keyA, size_t keyB) + { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); } + + size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); } + size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; } + + void SetContents(size_t contents) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)contents)); + stub = LookupHolder::FromLookupEntry((PCODE)contents)->stub(); + } + + //extract the token of the underlying lookup stub + + inline size_t Token() { LIMITED_METHOD_CONTRACT; return stub ? stub->token() : 0; } + +private: + LookupStub* stub; //the stub the entry wrapping +}; +#endif // USES_LOOKUP_STUBS + +/********************************************************************************************** +ResolveCacheEntry wraps a ResolveCacheElem and provides lookup functionality for entries that +were created that may be added to the ResolveCache +*/ +class ResolveCacheEntry : public Entry +{ +public: + ResolveCacheEntry(size_t elem) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(elem != 0); + pElem = (ResolveCacheElem*) elem; + } + + //default contructor to allow stack and inline allocation of lookup entries + ResolveCacheEntry() { LIMITED_METHOD_CONTRACT; pElem = NULL; } + + //access and compare the keys of the entry + virtual BOOL Equals(size_t keyA, size_t keyB) + { WRAPPER_NO_CONTRACT; return pElem && (keyA == KeyA()) && (keyB == KeyB()); } + virtual size_t KeyA() + { LIMITED_METHOD_CONTRACT; return pElem != NULL ? pElem->token : 0; } + virtual size_t KeyB() + { LIMITED_METHOD_CONTRACT; return pElem != NULL ? (size_t) pElem->pMT : 0; } + + //contents is the struct or token that the entry exposes + virtual void SetContents(size_t contents) + { + LIMITED_METHOD_CONTRACT; + pElem = (ResolveCacheElem*) contents; + } + + inline const BYTE *Target() + { + LIMITED_METHOD_CONTRACT; + return pElem != NULL ? (const BYTE *)pElem->target : NULL; + } + +private: + ResolveCacheElem *pElem; +}; + +/********************************************************************************************** +ResolveEntry wraps ResolveStubs and provide the concrete implementation of the abstract class Entry. +Polymorphic call sites and monomorphic calls that fail end up in a ResolveStub. Resolve stubs +are stored in hash tables keyed by token, hence the Equals function uses the embedded token in +the stub for comparison purposes. Since we are willing to allow duplicates in the hash table (as +long as they are relatively rare) we do use direct comparison of the tokens rather than extracting +the fields from within the tokens, for perf reasons. */ +class ResolveEntry : public Entry +{ +public: + //Creates an entry that wraps resolve stub s + ResolveEntry (size_t s) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)s)); + stub = (ResolveStub*) s; + } + //default contructor to allow stack and inline allocation of resovler entries + ResolveEntry() { LIMITED_METHOD_CONTRACT; stub = CALL_STUB_EMPTY_ENTRY; } + + //implementations of abstract class Entry + inline BOOL Equals(size_t keyA, size_t keyB) + { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); } + inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); } + inline size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; } + + void SetContents(size_t contents) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)contents)); + stub = ResolveHolder::FromResolveEntry((PCODE)contents)->stub(); + } + //extract the token of the underlying resolve stub + inline size_t Token() { WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->token()) : 0; } +private: + ResolveStub* stub; //the stub the entry is wrapping +}; + +/********************************************************************************************** +DispatchEntry wraps DispatchStubs and provide the concrete implementation of the abstract class Entry. +Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs. Dispatch stubs +are placed in hash and cache tables keyed by the expected Method Table and token they are built for. +Since we are willing to allow duplicates in the hash table (as long as they are relatively rare) +we do use direct comparison of the tokens rather than extracting the fields from within the tokens, +for perf reasons.*/ +class DispatchEntry : public Entry +{ +public: + //Creates an entry that wraps dispatch stub s + DispatchEntry (size_t s) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)s)); + stub = (DispatchStub*) s; + } + //default contructor to allow stack and inline allocation of resovler entries + DispatchEntry() { LIMITED_METHOD_CONTRACT; stub = CALL_STUB_EMPTY_ENTRY; } + + //implementations of abstract class Entry + inline BOOL Equals(size_t keyA, size_t keyB) + { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); } + inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); } + inline size_t KeyB() { WRAPPER_NO_CONTRACT; return ExpectedMT();} + + void SetContents(size_t contents) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)contents)); + stub = DispatchHolder::FromDispatchEntry((PCODE)contents)->stub(); + } + + //extract the fields of the underlying dispatch stub + inline size_t ExpectedMT() + { WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->expectedMT()) : 0; } + + size_t Token() + { + WRAPPER_NO_CONTRACT; + if (stub) + { + ResolveHolder * resolveHolder = ResolveHolder::FromFailEntry(stub->failTarget()); + size_t token = resolveHolder->stub()->token(); + _ASSERTE(token == VirtualCallStubManager::GetTokenFromStub((PCODE)stub)); + return token; + } + else + { + return 0; + } + } + + inline PCODE Target() + { WRAPPER_NO_CONTRACT; return stub ? stub->implTarget() : 0; } + +private: + DispatchStub* stub; +}; + +/************************************************************************************************* +DispatchCache is the cache table that the resolve stubs use for inline polymorphic resolution +of a call. The cache entry is logically a triplet of (method table, token, impl address) where method table +is the type of the calling frame's <this>, token identifies the method being invoked, +i.e. is a (type id,slot #) pair, and impl address is the address of the method implementation. +*/ +class DispatchCache +{ +public: + static const UINT16 INVALID_HASH = (UINT16)(-1); + + DispatchCache(); + + //read and write the cache keyed by (method table,token) pair. + inline ResolveCacheElem* Lookup(size_t token, void* mt) + { WRAPPER_NO_CONTRACT; return Lookup(token, INVALID_HASH, mt);} + + ResolveCacheElem* Lookup(size_t token, UINT16 tokenHash, void* mt); + + enum InsertKind {IK_NONE, IK_DISPATCH, IK_RESOLVE, IK_SHARED, IK_EXTERNAL}; + + BOOL Insert(ResolveCacheElem* elem, InsertKind insertKind); +#ifdef CHAIN_LOOKUP + void PromoteChainEntry(ResolveCacheElem* elem); +#endif + + // This is the heavyweight hashing algorithm. Use sparingly. + static UINT16 HashToken(size_t token); + + inline void GetLoadFactor(size_t *total, size_t *used) + { + LIMITED_METHOD_CONTRACT; + + *total = CALL_STUB_CACHE_SIZE; + size_t count = 0; + for (size_t i = 0; i < CALL_STUB_CACHE_SIZE; i++) + if (cache[i] != empty) + count++; + *used = count; + } + + inline void *GetCacheBaseAddr() + { LIMITED_METHOD_CONTRACT; return &cache[0]; } + inline size_t GetCacheCount() + { LIMITED_METHOD_CONTRACT; return CALL_STUB_CACHE_SIZE; } + inline ResolveCacheElem *GetCacheEntry(size_t idx) + { LIMITED_METHOD_CONTRACT; return VolatileLoad(&cache[idx]); } + inline BOOL IsCacheEntryEmpty(size_t idx) + { LIMITED_METHOD_CONTRACT; return cache[idx] == empty; } + + inline void SetCacheEntry(size_t idx, ResolveCacheElem *elem) + { + LIMITED_METHOD_CONTRACT; +#ifdef STUB_LOGGING + cacheData[idx].numWrites++; +#endif +#ifdef CHAIN_LOOKUP + CONSISTENCY_CHECK(m_writeLock.OwnedByCurrentThread()); +#endif + cache[idx] = elem; + } + + inline void ClearCacheEntry(size_t idx) + { + LIMITED_METHOD_CONTRACT; +#ifdef STUB_LOGGING + cacheData[idx].numClears++; +#endif + cache[idx] = empty; + } + + struct + { + UINT32 insert_cache_external; //# of times Insert was called for IK_EXTERNAL + UINT32 insert_cache_shared; //# of times Insert was called for IK_SHARED + UINT32 insert_cache_dispatch; //# of times Insert was called for IK_DISPATCH + UINT32 insert_cache_resolve; //# of times Insert was called for IK_RESOLVE + UINT32 insert_cache_hit; //# of times Insert found an empty cache entry + UINT32 insert_cache_miss; //# of times Insert already had a matching cache entry + UINT32 insert_cache_collide; //# of times Insert found a used cache entry + UINT32 insert_cache_write; //# of times Insert wrote a cache entry + } stats; + + void LogStats(); + + // Unlocked iterator of entries. Use only when read/write access to the cache + // is safe. This would typically be at GC sync points, currently needed during + // appdomain unloading. + class Iterator + { + public: + Iterator(DispatchCache *pCache); + inline BOOL IsValid() + { WRAPPER_NO_CONTRACT; return (m_curBucket < (INT32)m_pCache->GetCacheCount()); } + void Next(); + // Unlink the current entry. + // **NOTE** Using this method implicitly performs a call to Next to move + // past the unlinked entry. Thus, one could accidentally skip + // entries unless you take this into consideration. + ResolveCacheElem *UnlinkEntry(); + inline ResolveCacheElem *Entry() + { LIMITED_METHOD_CONTRACT; CONSISTENCY_CHECK(IsValid()); return *m_ppCurElem; } + + private: + void NextValidBucket(); + inline void NextBucket() + { LIMITED_METHOD_CONTRACT; m_curBucket++; m_ppCurElem = &m_pCache->cache[m_curBucket]; } + + DispatchCache *m_pCache; + INT32 m_curBucket; + ResolveCacheElem **m_ppCurElem; + }; + +private: +#ifdef CHAIN_LOOKUP + Crst m_writeLock; +#endif + + //the following hash computation is also inlined in the resolve stub in asm (SO NO TOUCHIE) + inline static UINT16 HashMT(UINT16 tokenHash, void* mt) + { + LIMITED_METHOD_CONTRACT; + + UINT16 hash; + + size_t mtHash = (size_t) mt; + mtHash = (((mtHash >> CALL_STUB_CACHE_NUM_BITS) + mtHash) >> LOG2_PTRSIZE) & CALL_STUB_CACHE_MASK; + hash = (UINT16) mtHash; + + hash ^= (tokenHash & CALL_STUB_CACHE_MASK); + + return hash; + } + + ResolveCacheElem* cache[CALL_STUB_CACHE_SIZE]; //must be first + ResolveCacheElem* empty; //empty entry, initialized to fail all comparisons +#ifdef STUB_LOGGING +public: + struct CacheEntryData { + UINT32 numWrites; + UINT16 numClears; + }; + CacheEntryData cacheData[CALL_STUB_CACHE_SIZE]; +#endif // STUB_LOGGING +}; + +/************************************************************************************************** +The hash tables are accessed via instances of the Prober. Prober is a probe into a bucket +of the hash table, and therefore has an index which is the current probe position. +It includes a count of the number of probes done in that bucket so far and a stride +to step thru the bucket with. To do comparisons, it has a reference to an entry with which +it can do comparisons (Equals(...)) of the entries (stubs) inside the hash table. It also has +the key pair (keyA, keyB) that it is looking for. + +Typically, an entry of the appropriate type is created on the stack and then the prober is created passing +in a reference to the entry. The prober is used for a complete operation, such as look for and find an +entry (stub), creating and inserting it as necessary. + +The initial index and the stride are orthogonal hashes of the key pair, i.e. we are doing a varient of +double hashing. When we initialize the prober (see FormHash below) we set the initial probe based on +one hash. The stride (used as a modulo addition of the probe position) is based on a different hash and +is such that it will vist every location in the bucket before repeating. Hence it is imperative that +the bucket size and the stride be relative prime wrt each other. We have chosen to make bucket sizes +a power of 2, so we force stride to be odd. + +Note -- it must be assumed that multiple probers are walking the same tables and buckets at the same time. +Additionally, the counts may not be accurate, and there may be duplicates in the tables. Since the tables +do not allow concurrrent deletion, some of the concurrency issues are ameliorated. +*/ +class Prober +{ + friend class FastTable; + friend class BucketTable; +public: + Prober(Entry* e) {LIMITED_METHOD_CONTRACT; comparer = e;} + //find the requested entry, if not there return CALL_STUB_EMPTY_ENTRY + size_t Find(); + //add the entry into the bucket, if it is not already in the bucket. + //return the entry actually in the bucket (existing or added) + size_t Add(size_t entry); +private: + //return the bucket (FastTable*) that the prober is currently walking + inline size_t* items() {LIMITED_METHOD_CONTRACT; return &base[-CALL_STUB_FIRST_INDEX];} + //are there more probes possible, or have we probed everything in the bucket + inline BOOL NoMore() {LIMITED_METHOD_CONTRACT; return probes>mask;} //both probes and mask are (-1) + //advance the probe to a new place in the bucket + inline BOOL Next() + { + WRAPPER_NO_CONTRACT; + index = (index + stride) & mask; + probes++; + return !NoMore(); + } + inline size_t Read() + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(base); + return VolatileLoad(&base[index]); + } + //initialize a prober across a bucket (table) for the specified keys. + void InitProber(size_t key1, size_t key2, size_t* table); + //set up the initial index and stride and probe count + inline void FormHash() + { + LIMITED_METHOD_CONTRACT; + + probes = 0; + //these two hash functions have not been formally measured for effectiveness + //but they are at least orthogonal + + size_t a = ((keyA>>16) + keyA); + size_t b = ((keyB>>16) ^ keyB); + index = (((a*CALL_STUB_HASH_CONST1)>>4)+((b*CALL_STUB_HASH_CONST2)>>4)+CALL_STUB_HASH_CONST1) & mask; + stride = ((a+(b*CALL_STUB_HASH_CONST1)+CALL_STUB_HASH_CONST2) | 1) & mask; + } + //atomically grab an empty slot so we can insert a new entry into the bucket + BOOL GrabEntry(size_t entryValue); + size_t keyA; //key pair we are looking for + size_t keyB; + size_t* base; //we have our own pointer to the bucket, so races don't matter. + // We won't care if we do the lookup in an + // outdated bucket (has grown out from under us). + // All that will happen is possibly dropping an entry + // on the floor or adding a duplicate. + size_t index; //current probe point in the bucket + size_t stride; //amount to step on each successive probe, must be relatively prime wrt the bucket size + size_t mask; //size of bucket - 1 + size_t probes; //number probes - 1 + Entry* comparer;//used to compare an entry against the sought after key pair +}; + +/******************************************************************************************************** +FastTable is used to implement the buckets of a BucketTable, a bucketized hash table. A FastTable is +an array of entries (contents). The first two slots of contents store the size-1 and count of entries +actually in the FastTable. Note that the count may be inaccurate and there may be duplicates. Careful +attention must be paid to eliminate the need for interlocked or serialized or locked operations in face +of concurrency. +*/ +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4200) // disable zero-sized array warning +#endif // _MSC_VER +class FastTable +{ + friend class BucketTable; +public: +private: + FastTable() { LIMITED_METHOD_CONTRACT; } + ~FastTable() { LIMITED_METHOD_CONTRACT; } + + //initialize a prober for the specified keys. + inline BOOL SetUpProber(size_t keyA, size_t keyB, Prober* probe) + { + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + FORBID_FAULT; + } CONTRACTL_END; + + _ASSERTE(probe); + _ASSERTE(contents); + probe->InitProber(keyA, keyB, &contents[0]); + return TRUE; + } + //find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY + size_t Find(Prober* probe); + //add the entry, if it is not already there. Probe is used to search. + //Return the entry actually containted (existing or added) + size_t Add(size_t entry, Prober* probe); + void IncrementCount(); + + // Create a FastTable with space for numberOfEntries. Please note that this method + // does not throw on OOM. **YOU MUST CHECK FOR NULL RETURN** + static FastTable* MakeTable(size_t numberOfEntries) + { + CONTRACTL { + THROWS; + GC_TRIGGERS; + INJECT_FAULT(COMPlusThrowOM();); + } CONTRACTL_END; + + size_t size = CALL_STUB_MIN_ENTRIES; + while (size < numberOfEntries) {size = size<<1;} +// if (size == CALL_STUB_MIN_ENTRIES) +// size += 3; + size_t* bucket = new size_t[(sizeof(FastTable)/sizeof(size_t))+size+CALL_STUB_FIRST_INDEX]; + FastTable* table = new (bucket) FastTable(); + table->InitializeContents(size); + return table; + } + //Initialize as empty + void InitializeContents(size_t size) + { + LIMITED_METHOD_CONTRACT; + memset(&contents[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(BYTE*)); + contents[CALL_STUB_MASK_INDEX] = size-1; + } + inline size_t tableMask() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_MASK_INDEX]);} + inline size_t tableSize() {LIMITED_METHOD_CONTRACT; return tableMask()+1;} + inline size_t tableCount() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_COUNT_INDEX]);} + inline BOOL isFull() + { + LIMITED_METHOD_CONTRACT; + return (tableCount()+1) * 100 / CALL_STUB_LOAD_FACTOR >= tableSize(); + } + //we store (size-1) in bucket[CALL_STUB_MASK_INDEX==0], + //we store the used count in bucket[CALL_STUB_COUNT_INDEX==1], + //we have an unused cell to use as a temp at bucket[CALL_STUB_DEAD_LINK==2], + //and the table starts at bucket[CALL_STUB_FIRST_INDEX==3], + size_t contents[]; +}; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +/****************************************************************************************************** +BucketTable is a bucketized hash table. It uses FastTables for its buckets. The hash tables +used by the VirtualCallStubManager are BucketTables. The number of buckets is fixed at the time +the table is created. The actual buckets are allocated as needed, and grow as necessary. The reason +for using buckets is primarily to reduce the cost of growing, since only a single bucket is actually +grown at any given time. Since the hash tables are accessed infrequently, the load factor that +controls growth is quite high (90%). Since we use hashing to pick the bucket, and we use hashing to +lookup inside the bucket, it is important that the hashing function used here is orthogonal to the ones +used in the buckets themselves (see FastTable::FormHash). +*/ +class BucketTable +{ +public: + BucketTable(size_t numberOfBuckets) + { + WRAPPER_NO_CONTRACT; + size_t size = CALL_STUB_MIN_BUCKETS; + while (size < numberOfBuckets) {size = size<<1;} + buckets = AllocateBuckets(size); + // Initialize statistics counters + memset(&stats, 0, sizeof(stats)); + } + + ~BucketTable() + { + LIMITED_METHOD_CONTRACT; + if(buckets != NULL) + { + size_t size = bucketCount()+CALL_STUB_FIRST_INDEX; + for(size_t ix = CALL_STUB_FIRST_INDEX; ix < size; ix++) delete (FastTable*)(buckets[ix]); + delete buckets; + } + } + + //initialize a prober for the specified keys. + BOOL SetUpProber(size_t keyA, size_t keyB, Prober *prober); + //find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY + inline size_t Find(Prober* probe) {WRAPPER_NO_CONTRACT; return probe->Find();} + //add the entry, if it is not already there. Probe is used to search. + size_t Add(size_t entry, Prober* probe); + //reclaim abandoned buckets. Buckets are abaondoned when they need to grow. + //needs to be called inside a gc sync point. + static void Reclaim(); + + struct + { + UINT32 bucket_space; //# of bytes in caches and tables, not including the stubs themselves + UINT32 bucket_space_dead; //# of bytes of abandoned buckets not yet recycled. + } stats; + + void LogStats(); + +private: + inline size_t bucketMask() {LIMITED_METHOD_CONTRACT; return (size_t) (buckets[CALL_STUB_MASK_INDEX]);} + inline size_t bucketCount() {LIMITED_METHOD_CONTRACT; return bucketMask()+1;} + inline size_t ComputeBucketIndex(size_t keyA, size_t keyB) + { + LIMITED_METHOD_CONTRACT; + size_t a = ((keyA>>16) + keyA); + size_t b = ((keyB>>16) ^ keyB); + return CALL_STUB_FIRST_INDEX+(((((a*CALL_STUB_HASH_CONST2)>>5)^((b*CALL_STUB_HASH_CONST1)>>5))+CALL_STUB_HASH_CONST2) & bucketMask()); + } + //grows the bucket referenced by probe. + BOOL GetMoreSpace(const Prober* probe); + //creates storage in which to store references to the buckets + static size_t* AllocateBuckets(size_t size) + { + LIMITED_METHOD_CONTRACT; + size_t* buckets = new size_t[size+CALL_STUB_FIRST_INDEX]; + if (buckets != NULL) + { + memset(&buckets[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(void*)); + buckets[CALL_STUB_MASK_INDEX] = size-1; + } + return buckets; + } + inline size_t Read(size_t index) + { + LIMITED_METHOD_CONTRACT; + CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX); + return VolatileLoad(&buckets[index]); + } + +#ifdef _MSC_VER +#pragma warning(disable: 4267) //work-around for the compiler +#endif + inline void Write(size_t index, size_t value) + { + LIMITED_METHOD_CONTRACT; + CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX); + VolatileStore(&buckets[index], value); + } +#ifdef _MSC_VER +#pragma warning(default: 4267) +#endif + + // We store (#buckets-1) in bucket[CALL_STUB_MASK_INDEX ==0] + // We have two unused cells at bucket[CALL_STUB_COUNT_INDEX ==1] + // and bucket[CALL_STUB_DEAD_LINK ==2] + // and the table starts at bucket[CALL_STUB_FIRST_INDEX ==3] + // the number of elements is bucket[CALL_STUB_MASK_INDEX]+CALL_STUB_FIRST_INDEX + size_t* buckets; + static FastTable* dead; //linked list head of to be deleted (abandoned) buckets +}; + +#endif // !CROSSGEN_COMPILE + +#endif // !_VIRTUAL_CALL_STUB_H |