// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. // // File: VirtualCallStub.h // // // See code:VirtualCallStubManager for details // // ============================================================================ #ifndef _VIRTUAL_CALL_STUB_H #define _VIRTUAL_CALL_STUB_H #ifndef CROSSGEN_COMPILE #define CHAIN_LOOKUP #if defined(_TARGET_X86_) // If this is uncommented, leaves a file "StubLog_.log" with statistics on the behavior // of stub-based interface dispatch. //#define STUB_LOGGING #endif #include "stubmgr.h" ///////////////////////////////////////////////////////////////////////////////////// // Forward class declarations class FastTable; class BucketTable; class Entry; class Prober; class VirtualCallStubManager; class VirtualCallStubManagerManager; struct LookupHolder; struct DispatchHolder; struct ResolveHolder; ///////////////////////////////////////////////////////////////////////////////////// // Forward function declarations extern "C" void InContextTPQuickDispatchAsmStub(); extern "C" PCODE STDCALL StubDispatchFixupWorker(TransitionBlock * pTransitionBlock, TADDR siteAddrForRegisterIndirect, DWORD sectionIndex, Module * pModule); extern "C" PCODE STDCALL VSD_ResolveWorker(TransitionBlock * pTransitionBlock, TADDR siteAddrForRegisterIndirect, size_t token #ifndef _TARGET_X86_ , UINT_PTR flags #endif ); ///////////////////////////////////////////////////////////////////////////////////// #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) typedef INT32 DISPL; #endif ///////////////////////////////////////////////////////////////////////////////////// // Represents the struct that is added to the resolve cache // NOTE: If you change the layout of this struct, you'll need to update various // ASM helpers in VirtualCallStubCpu that rely on offsets of members. // struct ResolveCacheElem { void *pMT; size_t token; // DispatchToken void *target; // These are used for chaining ResolveCacheElem *pNext; ResolveCacheElem *Next() { LIMITED_METHOD_CONTRACT; return VolatileLoad(&pNext); } #ifdef _DEBUG UINT16 debug_hash; UINT16 debug_index; #endif // _DEBUG BOOL Equals(size_t token, void *pMT) { LIMITED_METHOD_CONTRACT; return (this->pMT == pMT && this->token == token); } BOOL Equals(ResolveCacheElem *pElem) { WRAPPER_NO_CONTRACT; return Equals(pElem->token, pElem->pMT); } }; enum { e_resolveCacheElem_sizeof_mt = sizeof(void *), e_resolveCacheElem_sizeof_token = sizeof(size_t), e_resolveCacheElem_sizeof_target = sizeof(void *), e_resolveCacheElem_sizeof_next = sizeof(ResolveCacheElem *), e_resolveCacheElem_offset_mt = 0, e_resolveCacheElem_offset_token = e_resolveCacheElem_offset_mt + e_resolveCacheElem_sizeof_mt, e_resolveCacheElem_offset_target = e_resolveCacheElem_offset_token + e_resolveCacheElem_sizeof_token, e_resolveCacheElem_offset_next = e_resolveCacheElem_offset_target + e_resolveCacheElem_sizeof_target, }; ///////////////////////////////////////////////////////////////////////////////////// // A utility class to help manipulate a call site struct StubCallSite { friend class VirtualCallStubManager; private: // On x86 are four possible kinds of callsites when you take into account all features // Relative: direct call, e.g. "call addr". Not used currently. // RelativeIndirect (JmpRel): indirect call through a relative address, e.g. "call [addr]" // RegisterIndirect: indirect call through a register, e.g. "call [eax]" // DelegateCallSite: anything else, tail called through a register by shuffle thunk, e.g. "jmp [eax]" // // On all other platforms we always use an indirect call through an indirection cell // In these cases all calls are made by the platform equivalent of "call [addr]". // // DelegateCallSite are particular in that they can come in a variety of forms: // a direct delegate call has a sequence defined by the jit but a multicast or secure delegate // are defined in a stub and have a different shape // PTR_PCODE m_siteAddr; // Stores the address of an indirection cell PCODE m_returnAddr; public: #if defined(_TARGET_X86_) StubCallSite(TADDR siteAddrForRegisterIndirect, PCODE returnAddr); PCODE GetCallerAddress(); #else // !defined(_TARGET_X86_) // On platforms where we always use an indirection cell things // are much simpler - the siteAddr always stores a pointer to a // value that in turn points to the indirection cell. StubCallSite(TADDR siteAddr, PCODE returnAddr) { LIMITED_METHOD_CONTRACT; m_siteAddr = dac_cast(siteAddr); m_returnAddr = returnAddr; } PCODE GetCallerAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; } #endif // !defined(_TARGET_X86_) PCODE GetSiteTarget() { WRAPPER_NO_CONTRACT; return *(GetIndirectCell()); } void SetSiteTarget(PCODE newTarget); PTR_PCODE GetIndirectCell() { LIMITED_METHOD_CONTRACT; return dac_cast(m_siteAddr); } PTR_PCODE * GetIndirectCellAddress() { LIMITED_METHOD_CONTRACT; return &m_siteAddr; } PCODE GetReturnAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; } }; #ifdef FEATURE_PREJIT extern "C" void StubDispatchFixupStub(); // for lazy fixup of ngen call sites #endif // These are the assembly language entry points that the stubs use when they want to go into the EE extern "C" void ResolveWorkerAsmStub(); // resolve a token and transfer control to that method extern "C" void ResolveWorkerChainLookupAsmStub(); // for chaining of entries in the cache #ifdef _TARGET_X86_ extern "C" void BackPatchWorkerAsmStub(); // backpatch a call site to point to a different stub #ifdef FEATURE_PAL extern "C" void BackPatchWorkerStaticStub(PCODE returnAddr, TADDR siteAddrForRegisterIndirect); #endif // FEATURE_PAL #endif // _TARGET_X86_ typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager; // VirtualCallStubManager is the heart of the stub dispatch logic. See the book of the runtime entry // // file:../../doc/BookOfTheRuntime/ClassLoader/VirtualStubDispatchDesign.doc // // The basic idea is that a call to an interface (it could also be used for virtual calls in general, but we // do not do this), is simply the code // // call [DispatchCell] // // Where we make sure 'DispatchCell' points at stubs that will do the right thing. DispatchCell is writable // so we can udpate the code over time. There are three basic types of stubs that the dispatch cell can point // to. // * Lookup: The intial stub that has no 'fast path' and simply pushes a ID for interface being called // and calls into the runtime at code:VirtualCallStubManager.ResolveWorkerStatic. // * Dispatch: Lookup stubs are patched to this stub which has a fast path that checks for a particular // Method Table and if that fails jumps to code that // * Decrements a 'missCount' (starts out as code:STUB_MISS_COUNT_VALUE). If this count goes to zero // code:VirtualCallStubManager.BackPatchWorkerStatic is called, morphs it into a resolve stub // (however since this decrementing logic is SHARED among all dispatch stubs, it may take // multiples of code:STUB_MISS_COUNT_VALUE if mulitple call sites are actively polymorphic (this // seems unlikley). // * Calls a resolve stub (Whenever a dispatch stub is created, it always has a cooresponding resolve // stub (but the resolve stubs are shared among many dispatch stubs). // * Resolve: see code:ResolveStub. This looks up the Method table in a process wide cache (see // code:ResolveCacheElem, and if found, jumps to it. This code path is about 17 instructions long (so // pretty fast, but certainly much slower than a normal call). If the method table is not found in // the cache, it calls into the runtime code:VirtualCallStubManager.ResolveWorkerStatic, which // populates it. // So the general progression is call site's cells // * start out life pointing to a lookup stub // * On first call they get updated into a dispatch stub. When this misses, it calls a resolve stub, // which populates a resovle stub's cache, but does not update the call site' cell (thus it is still // pointing at the dispatch cell. // * After code:STUB_MISS_COUNT_VALUE misses, we update the call site's cell to point directly at the // resolve stub (thus avoiding the overhead of the quick check that always seems to be failing and // the miss count update). // // QUESTION: What is the lifetimes of the various stubs and hash table entries? // // QUESTION: There does not seem to be any logic that will change a call site's cell once it becomes a // Resolve stub. Thus once a particular call site becomes a Resolve stub we live with the Resolve stub's // (in)efficiency forever. // // see code:#StubDispatchNotes for more class VirtualCallStubManager : public StubManager { friend class ClrDataAccess; friend class VirtualCallStubManagerManager; friend class VirtualCallStubManagerIterator; VPTR_VTABLE_CLASS(VirtualCallStubManager, StubManager) public: #ifdef _DEBUG virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManager"; } #endif // The reason for our existence, return a callstub for type id and slot number // where type id = 0 for the class contract (i.e. a virtual call), and type id > 0 for an // interface invoke where the id indicates which interface it is. // // The function is idempotent, i.e. // you'll get the same callstub twice if you call it with identical inputs. PCODE GetCallStub(TypeHandle ownerType, MethodDesc *pMD); PCODE GetCallStub(TypeHandle ownerType, DWORD slot); // Generate an fresh indirection cell. BYTE* GenerateStubIndirection(PCODE stub, BOOL fUseRecycledCell = FALSE); // Set up static data structures - called during EEStartup static void InitStatic(); static void UninitStatic(); // Per instance initialization - called during AppDomain::Init and ::Uninit and for collectible loader allocators void Init(BaseDomain* pDomain, LoaderAllocator *pLoaderAllocator); void Uninit(); //@TODO: the logging should be tied into the VMs normal loggin mechanisms, //@TODO: for now we just always write a short log file called "StubLog_.log" static void StartupLogging(); static void LoggingDump(); static void FinishLogging(); static void ResetCache(); // Reclaim/rearrange any structures that can only be done during a gc sync point. // This is the mechanism we are using to avoid synchronization of alot of our // cache and hash table accesses. We are requiring that during a gc sync point we are not // executing any stub code at all, hence at this time we are serialized on a single thread (gc) // and no other thread is accessing the data structures. static void ReclaimAll(); void Reclaim(); #ifndef DACCESS_COMPILE VirtualCallStubManager() : StubManager(), lookup_rangeList(), resolve_rangeList(), dispatch_rangeList(), cache_entry_rangeList(), parentDomain(NULL), isCollectible(false), m_initialReservedMemForHeaps(NULL), m_FreeIndCellList(NULL), m_RecycledIndCellList(NULL), indcell_heap(NULL), cache_entry_heap(NULL), lookup_heap(NULL), dispatch_heap(NULL), resolve_heap(NULL), #ifdef _TARGET_AMD64_ m_fShouldAllocateLongJumpDispatchStubs(FALSE), #endif lookups(NULL), cache_entries(NULL), dispatchers(NULL), resolvers(NULL), m_counters(NULL), m_cur_counter_block(NULL), m_cur_counter_block_for_reclaim(NULL), m_cur_counter_block_for_reclaim_index(NULL), m_pNext(NULL) { LIMITED_METHOD_CONTRACT; ZeroMemory(&stats, sizeof(stats)); } ~VirtualCallStubManager(); #endif // !DACCESS_COMPILE enum StubKind { SK_UNKNOWN, SK_LOOKUP, // Lookup Stubs are SLOW stubs that simply call into the runtime to do all work. SK_DISPATCH, // Dispatch Stubs have a fast check for one type otherwise jumps to runtime. Works for monomorphic sites SK_RESOLVE, // Resolve Stubs do a hash lookup before fallling back to the runtime. Works for polymorphic sites. SK_BREAKPOINT }; // peek at the assembly code and predict which kind of a stub we have StubKind predictStubKind(PCODE stubStartAddress); /* know thine own stubs. It is possible that when multiple virtualcallstub managers are built that these may need to become non-static, and the callers modified accordingly */ StubKind getStubKind(PCODE stubStartAddress, BOOL usePredictStubKind = TRUE) { WRAPPER_NO_CONTRACT; SUPPORTS_DAC; // This method can called with stubStartAddress==NULL, e.g. when handling null reference exceptions // caused by IP=0. Early out for this case to avoid confusing handled access violations inside predictStubKind. if (PCODEToPINSTR(stubStartAddress) == NULL) return SK_UNKNOWN; // Rather than calling IsInRange(stubStartAddress) for each possible stub kind // we can peek at the assembly code and predict which kind of a stub we have StubKind predictedKind = (usePredictStubKind) ? predictStubKind(stubStartAddress) : SK_UNKNOWN; if (predictedKind == SK_DISPATCH) { if (isDispatchingStub(stubStartAddress)) return SK_DISPATCH; } else if (predictedKind == SK_LOOKUP) { if (isLookupStub(stubStartAddress)) return SK_LOOKUP; } else if (predictedKind == SK_RESOLVE) { if (isResolvingStub(stubStartAddress)) return SK_RESOLVE; } // This is the slow case. If the predict returned SK_UNKNOWN, SK_BREAKPOINT, // or the predict was found to be incorrect when checked against the RangeLists // (isXXXStub), then we'll check each stub heap in sequence. if (isDispatchingStub(stubStartAddress)) return SK_DISPATCH; else if (isLookupStub(stubStartAddress)) return SK_LOOKUP; else if (isResolvingStub(stubStartAddress)) return SK_RESOLVE; return SK_UNKNOWN; } inline BOOL isStub(PCODE stubStartAddress) { WRAPPER_NO_CONTRACT; SUPPORTS_DAC; return (getStubKind(stubStartAddress) != SK_UNKNOWN); } BOOL isDispatchingStub(PCODE stubStartAddress) { WRAPPER_NO_CONTRACT; SUPPORTS_DAC; return GetDispatchRangeList()->IsInRange(stubStartAddress); } BOOL isResolvingStub(PCODE stubStartAddress) { WRAPPER_NO_CONTRACT; SUPPORTS_DAC; return GetResolveRangeList()->IsInRange(stubStartAddress); } BOOL isLookupStub(PCODE stubStartAddress) { WRAPPER_NO_CONTRACT; SUPPORTS_DAC; return GetLookupRangeList()->IsInRange(stubStartAddress); } static BOOL isDispatchingStubStatic(PCODE addr) { WRAPPER_NO_CONTRACT; StubKind stubKind; FindStubManager(addr, &stubKind); return stubKind == SK_DISPATCH; } static BOOL isResolvingStubStatic(PCODE addr) { WRAPPER_NO_CONTRACT; StubKind stubKind; FindStubManager(addr, &stubKind); return stubKind == SK_RESOLVE; } static BOOL isLookupStubStatic(PCODE addr) { WRAPPER_NO_CONTRACT; StubKind stubKind; FindStubManager(addr, &stubKind); return stubKind == SK_LOOKUP; } //use range lists to track the chunks of memory that are part of each heap LockedRangeList lookup_rangeList; LockedRangeList resolve_rangeList; LockedRangeList dispatch_rangeList; LockedRangeList cache_entry_rangeList; // Get dac-ized pointers to rangelist. RangeList* GetLookupRangeList() { SUPPORTS_DAC; TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, lookup_rangeList); return PTR_RangeList(addr); } RangeList* GetResolveRangeList() { SUPPORTS_DAC; TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, resolve_rangeList); return PTR_RangeList(addr); } RangeList* GetDispatchRangeList() { SUPPORTS_DAC; TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, dispatch_rangeList); return PTR_RangeList(addr); } RangeList* GetCacheEntryRangeList() { SUPPORTS_DAC; TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, cache_entry_rangeList); return PTR_RangeList(addr); } private: //allocate and initialize a stub of the desired kind DispatchHolder *GenerateDispatchStub(PCODE addrOfCode, PCODE addrOfFail, void *pMTExpected, size_t dispatchToken); #ifdef _TARGET_AMD64_ // Used to allocate a long jump dispatch stub. See comment around // m_fShouldAllocateLongJumpDispatchStubs for explaination. DispatchHolder *GenerateDispatchStubLong(PCODE addrOfCode, PCODE addrOfFail, void *pMTExpected, size_t dispatchToken); #endif ResolveHolder *GenerateResolveStub(PCODE addrOfResolver, PCODE addrOfPatcher, size_t dispatchToken); LookupHolder *GenerateLookupStub(PCODE addrOfResolver, size_t dispatchToken); template void AddToCollectibleVSDRangeList(STUB_HOLDER *holder) { if (isCollectible) { parentDomain->GetCollectibleVSDRanges()->AddRange(reinterpret_cast(holder->stub()), reinterpret_cast(holder->stub()) + holder->stub()->size(), this); } } // The resolve cache is static across all AppDomains ResolveCacheElem *GenerateResolveCacheElem(void *addrOfCode, void *pMTExpected, size_t token); ResolveCacheElem *GetResolveCacheElem(void *pMT, size_t token, void *target); //Given a dispatch token, an object and a method table, determine the //target address to go to. The return value (BOOL) states whether this address //is cacheable or not. static BOOL Resolver(MethodTable * pMT, DispatchToken token, OBJECTREF * protectedObj, PCODE * ppTarget); // This can be used to find a target without needing the ability to throw static BOOL TraceResolver(Object *pObj, DispatchToken token, TraceDestination *trace); public: // Return the MethodDesc corresponding to this token. static MethodDesc *GetRepresentativeMethodDescFromToken(DispatchToken token, MethodTable *pMT); static MethodDesc *GetInterfaceMethodDescFromToken(DispatchToken token); static MethodTable *GetTypeFromToken(DispatchToken token); //This is used to get the token out of a stub static size_t GetTokenFromStub(PCODE stub); //This is used to get the token out of a stub and we know the stub manager and stub kind static size_t GetTokenFromStubQuick(VirtualCallStubManager * pMgr, PCODE stub, StubKind kind); // General utility functions // Quick lookup in the cache. NOTHROW, GC_NOTRIGGER static PCODE CacheLookup(size_t token, UINT16 tokenHash, MethodTable *pMT); // Full exhaustive lookup. THROWS, GC_TRIGGERS static PCODE GetTarget(DispatchToken token, MethodTable *pMT); private: // Given a dispatch token, return true if the token represents an interface, false if just a slot. static BOOL IsInterfaceToken(DispatchToken token); // Given a dispatch token, return true if the token represents a slot on the target. static BOOL IsClassToken(DispatchToken token); #ifdef CHAIN_LOOKUP static ResolveCacheElem* __fastcall PromoteChainEntry(ResolveCacheElem *pElem); #endif // Flags used by the non-x86 versions of VSD_ResolveWorker #define SDF_ResolveBackPatch (0x01) #define SDF_ResolvePromoteChain (0x02) #define SDF_ResolveFlags (0x03) // These method needs to call the instance methods. friend PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock, TADDR siteAddrForRegisterIndirect, size_t token #ifndef _TARGET_X86_ , UINT_PTR flags #endif ); #if defined(_TARGET_X86_) && defined(FEATURE_PAL) friend void BackPatchWorkerStaticStub(PCODE returnAddr, TADDR siteAddrForRegisterIndirect); #endif //These are the entrypoints that the stubs actually end up calling via the // xxxAsmStub methods above static void STDCALL BackPatchWorkerStatic(PCODE returnAddr, TADDR siteAddrForRegisterIndirect); public: PCODE ResolveWorker(StubCallSite* pCallSite, OBJECTREF *protectedObj, DispatchToken token, StubKind stubKind); void BackPatchWorker(StubCallSite* pCallSite); //Change the callsite to point to stub void BackPatchSite(StubCallSite* pCallSite, PCODE stub); public: /* the following two public functions are to support tracing or stepping thru stubs via the debugger. */ virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress); virtual BOOL TraceManager(Thread *thread, TraceDestination *trace, T_CONTEXT *pContext, BYTE **pRetAddr); size_t GetSize() { LIMITED_METHOD_CONTRACT; size_t retval=0; if(indcell_heap) retval+=indcell_heap->GetSize(); if(cache_entry_heap) retval+=cache_entry_heap->GetSize(); if(lookup_heap) retval+=lookup_heap->GetSize(); if(dispatch_heap) retval+=dispatch_heap->GetSize(); if(resolve_heap) retval+=resolve_heap->GetSize(); return retval; }; private: /* the following two private functions are to support tracing or stepping thru stubs via the debugger. */ virtual BOOL DoTraceStub(PCODE stubStartAddress, TraceDestination *trace); private: // The parent domain of this manager PTR_BaseDomain parentDomain; bool isCollectible; BYTE * m_initialReservedMemForHeaps; static const UINT32 INDCELLS_PER_BLOCK = 32; // 32 indirection cells per block. CrstExplicitInit m_indCellLock; // List of free indirection cells. The cells were directly allocated from the loader heap // (code:VirtualCallStubManager::GenerateStubIndirection) BYTE * m_FreeIndCellList; // List of recycled indirection cells. The cells were recycled from finalized dynamic methods // (code:LCGMethodResolver::RecycleIndCells). BYTE * m_RecycledIndCellList; #ifndef DACCESS_COMPILE // This methods returns the a free cell from m_FreeIndCellList. It returns NULL if the list is empty. BYTE * GetOneFreeIndCell() { WRAPPER_NO_CONTRACT; return GetOneIndCell(&m_FreeIndCellList); } // This methods returns the a recycled cell from m_RecycledIndCellList. It returns NULL if the list is empty. BYTE * GetOneRecycledIndCell() { WRAPPER_NO_CONTRACT; return GetOneIndCell(&m_RecycledIndCellList); } // This methods returns the a cell from ppList. It returns NULL if the list is empty. BYTE * GetOneIndCell(BYTE ** ppList) { CONTRACT (BYTE*) { NOTHROW; GC_NOTRIGGER; MODE_ANY; PRECONDITION(CheckPointer(ppList)); PRECONDITION(m_indCellLock.OwnedByCurrentThread()); } CONTRACT_END; BYTE * temp = *ppList; if (temp) { BYTE * pNext = *((BYTE **)temp); *ppList = pNext; RETURN temp; } RETURN NULL; } // insert a linked list of indirection cells at the beginning of m_FreeIndCellList void InsertIntoFreeIndCellList(BYTE * head, BYTE * tail) { WRAPPER_NO_CONTRACT; InsertIntoIndCellList(&m_FreeIndCellList, head, tail); } // insert a linked list of indirection cells at the beginning of ppList void InsertIntoIndCellList(BYTE ** ppList, BYTE * head, BYTE * tail) { CONTRACTL { NOTHROW; GC_NOTRIGGER; PRECONDITION(CheckPointer(ppList)); PRECONDITION(CheckPointer(head)); PRECONDITION(CheckPointer(tail)); PRECONDITION(m_indCellLock.OwnedByCurrentThread()); } CONTRACTL_END; BYTE * temphead = *ppList; *((BYTE**)tail) = temphead; *ppList = head; } #endif // !DACCESS_COMPILE PTR_LoaderHeap indcell_heap; // indirection cells go here PTR_LoaderHeap cache_entry_heap; // resolve cache elem entries go here PTR_LoaderHeap lookup_heap; // lookup stubs go here PTR_LoaderHeap dispatch_heap; // dispatch stubs go here PTR_LoaderHeap resolve_heap; // resolve stubs go here #ifdef _TARGET_AMD64_ // When we layout the stub heaps, we put them close together in a sequential order // so that we maximize performance with respect to branch predictions. On AMD64, // dispatch stubs use a rel32 jump on failure to the resolve stub. This works for // a while because of the ordering, but as soon as we have to start allocating more // memory for either the dispatch or resolve heaps we have a chance that we'll be // further away than a rel32 jump can reach, because we're in a 64-bit address // space. As such, this flag will indicate when we allocate the first dispatch stub // that cannot reach a resolve stub, and when this happens we'll switch over to // allocating the larger version of the dispatch stub which contains an abs64 jump. //@TODO: This is a bit of a workaround, but the limitations of LoaderHeap require that we //@TODO: take this approach. Hopefully in Orcas we'll have a chance to rewrite LoaderHeap. BOOL m_fShouldAllocateLongJumpDispatchStubs; // Defaults to FALSE. #endif BucketTable * lookups; // hash table of lookups keyed by tokens BucketTable * cache_entries; // hash table of dispatch token/target structs for dispatch cache BucketTable * dispatchers; // hash table of dispatching stubs keyed by tokens/actualtype BucketTable * resolvers; // hash table of resolvers keyed by tokens/resolverstub // This structure is used to keep track of the fail counters. // We only need one fail counter per ResolveStub, // and most programs use less than 250 ResolveStubs // We allocate these on the main heap using "new counter block" struct counter_block { static const UINT32 MAX_COUNTER_ENTRIES = 256-2; // 254 counters should be enough for most cases. counter_block * next; // the next block UINT32 used; // the index of the next free entry INT32 block[MAX_COUNTER_ENTRIES]; // the counters }; counter_block *m_counters; // linked list of counter blocks of failure counters counter_block *m_cur_counter_block; // current block for updating counts counter_block *m_cur_counter_block_for_reclaim; // current block for updating UINT32 m_cur_counter_block_for_reclaim_index; // index into the current block for updating // Used to keep track of all the VCSManager objects in the system. PTR_VirtualCallStubManager m_pNext; // Linked list pointer public: // Given a stub address, find the VCSManager that owns it. static VirtualCallStubManager *FindStubManager(PCODE addr, StubKind* wbStubKind = NULL, BOOL usePredictStubKind = TRUE); #ifndef DACCESS_COMPILE // insert a linked list of indirection cells at the beginning of m_RecycledIndCellList void InsertIntoRecycledIndCellList_Locked(BYTE * head, BYTE * tail) { CONTRACTL { NOTHROW; GC_TRIGGERS; MODE_ANY; } CONTRACTL_END; CrstHolder lh(&m_indCellLock); InsertIntoIndCellList(&m_RecycledIndCellList, head, tail); } #endif // !DACCESS_COMPILE // These are the counters for keeping statistics struct { UINT32 site_counter; //# of call sites UINT32 stub_lookup_counter; //# of lookup stubs UINT32 stub_poly_counter; //# of resolve stubs UINT32 stub_mono_counter; //# of dispatch stubs UINT32 site_write; //# of call site backpatch writes UINT32 site_write_poly; //# of call site backpatch writes to point to resolve stubs UINT32 site_write_mono; //# of call site backpatch writes to point to dispatch stubs UINT32 worker_call; //# of calls into ResolveWorker UINT32 worker_call_no_patch; //# of times call_worker resulted in no patch UINT32 worker_collide_to_mono; //# of times we converted a poly stub to a mono stub instead of writing the cache entry UINT32 stub_space; //# of bytes of stubs UINT32 cache_entry_counter; //# of cache structs UINT32 cache_entry_space; //# of bytes used by cache lookup structs } stats; void LogStats(); #ifdef DACCESS_COMPILE protected: virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags); virtual LPCWSTR GetStubManagerName(PCODE addr) { WRAPPER_NO_CONTRACT; CONSISTENCY_CHECK(isStub(addr)); if (isLookupStub(addr)) { return W("VSD_LookupStub"); } else if (isDispatchingStub(addr)) { return W("VSD_DispatchStub"); } else { CONSISTENCY_CHECK(isResolvingStub(addr)); return W("VSD_ResolveStub"); } } #endif }; /******************************************************************************************************** ********************************************************************************************************/ typedef VPTR(class VirtualCallStubManagerManager) PTR_VirtualCallStubManagerManager; class VirtualCallStubManagerIterator; class VirtualCallStubManagerManager : public StubManager { VPTR_VTABLE_CLASS(VirtualCallStubManagerManager, StubManager) friend class StubManager; friend class VirtualCallStubManager; friend class VirtualCallStubManagerIterator; friend class StubManagerIterator; public: virtual BOOL TraceManager(Thread *thread, TraceDestination *trace, T_CONTEXT *pContext, BYTE **pRetAddr); virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress); virtual BOOL DoTraceStub(PCODE stubStartAddress, TraceDestination *trace); static MethodDesc *Entry2MethodDesc(PCODE stubStartAddress, MethodTable *pMT); #ifdef DACCESS_COMPILE virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags); virtual LPCWSTR GetStubManagerName(PCODE addr) { WRAPPER_NO_CONTRACT; return FindVirtualCallStubManager(addr)->GetStubManagerName(addr); } #endif private: // Used to keep track of all the VCSManager objects in the system. PTR_VirtualCallStubManager m_pManagers; // Head of the linked list #ifndef DACCESS_COMPILE // Ctor. This is only used by StaticInit. VirtualCallStubManagerManager(); #endif // A cache element to quickly check the last matched manager. Volatile m_pCacheElem; // RW lock for reading entries and removing them. SimpleRWLock m_RWLock; // This will look through all the managers in an intelligent fashion to // find the manager that owns the address. VirtualCallStubManager *FindVirtualCallStubManager(PCODE stubAddress); protected: // Add a VCSManager to the linked list. void AddStubManager(VirtualCallStubManager *pMgr); // Remove a VCSManager from the linked list. void RemoveStubManager(VirtualCallStubManager *pMgr); VirtualCallStubManager *FirstManager() { WRAPPER_NO_CONTRACT; return m_pManagers; } #ifndef DACCESS_COMPILE static void InitStatic(); #endif public: SPTR_DECL(VirtualCallStubManagerManager, g_pManager); static VirtualCallStubManagerManager *GlobalManager() { LIMITED_METHOD_DAC_CONTRACT; CONSISTENCY_CHECK(CheckPointer(g_pManager)); return g_pManager; } VirtualCallStubManagerIterator IterateVirtualCallStubManagers(); #ifdef _DEBUG // Debug helper to help identify stub-managers. virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManagerManager"; } #endif }; /******************************************************************************************************** ********************************************************************************************************/ class VirtualCallStubManagerIterator { friend class VirtualCallStubManagerManager; public: BOOL Next(); VirtualCallStubManager *Current(); // Copy ctor inline VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it); protected: inline VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr); BOOL m_fIsStart; VirtualCallStubManager *m_pCurMgr; }; ///////////////////////////////////////////////////////////////////////////////////////////// // Ctor inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr) : m_fIsStart(TRUE), m_pCurMgr(pMgr->m_pManagers) { LIMITED_METHOD_DAC_CONTRACT; CONSISTENCY_CHECK(CheckPointer(pMgr)); } ///////////////////////////////////////////////////////////////////////////////////////////// // Copy ctor inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it) : m_fIsStart(it.m_fIsStart), m_pCurMgr(it.m_pCurMgr) { LIMITED_METHOD_DAC_CONTRACT; } /******************************************************************************************************** #StubDispatchNotes A note on approach. The cache and hash tables used by the stub and lookup mechanism are designed with an eye to minimizing interlocking and/or syncing and/or locking operations. They are intended to run in a highly concurrent environment. Since there is no magic, some tradeoffs and and some implementation constraints are required. The basic notion is that if all reads and writes are atomic and if all functions and operations operate correctly in the face of commutative reorderings of the visibility of all reads and writes across threads, then we don't have to interlock, sync, or serialize. Our approximation of this is: 1. All reads and all writes to tables must be atomic. This effectively limits the actual entry size in a table to be a pointer or a pointer sized thing. 2. All functions, like comparisons for equality or computation of hash values must function correctly in the face of concurrent updating of the underlying table. This is accomplished by making the underlying structures/entries effectively immutable, if concurrency is in anyway possible. By effectively immutatable, we mean that the stub or token structure is either immutable or that if it is ever written, all possibley concurrent writes are attempting to write the same value (atomically) or that the competing (atomic) values do not affect correctness, and that the function operates correctly whether or not any of the writes have taken place (is visible yet). The constraint we maintain is that all competeing updates (and their visibility or lack thereof) do not alter the correctness of the program. 3. All tables are inexact. The counts they hold (e.g. number of contained entries) may be inaccurrate, but that inaccurracy cannot affect their correctness. Table modifications, such as insertion of an new entry may not succeed, but such failures cannot affect correctness. This implies that just because a stub/entry is not present in a table, e.g. has been removed, that does not mean that it is not in use. It also implies that internal table structures, such as discarded hash table buckets, cannot be freely recycled since another concurrent thread may still be walking thru it. 4. Occassionaly it is necessary to pick up the pieces that have been dropped on the floor so to speak, e.g. actually recycle hash buckets that aren't in use. Since we have a natural sync point already in the GC, we use that to provide cleanup points. We need to make sure that code that is walking our structures is not a GC safe point. Hence if the GC calls back into us inside the GC sync point, we know that nobody is inside our stuctures and we can safely rearrange and recycle things. ********************************************************************************************************/ //initial and increment value for fail stub counters #ifdef STUB_LOGGING extern UINT32 STUB_MISS_COUNT_VALUE; extern UINT32 STUB_COLLIDE_WRITE_PCT; extern UINT32 STUB_COLLIDE_MONO_PCT; #else // !STUB_LOGGING #define STUB_MISS_COUNT_VALUE 100 #define STUB_COLLIDE_WRITE_PCT 100 #define STUB_COLLIDE_MONO_PCT 0 #endif // !STUB_LOGGING //size and mask of the cache used by resolve stubs // CALL_STUB_CACHE_SIZE must be equal to 2^CALL_STUB_CACHE_NUM_BITS #define CALL_STUB_CACHE_NUM_BITS 12 //10 #define CALL_STUB_CACHE_SIZE 4096 //1024 #define CALL_STUB_CACHE_MASK (CALL_STUB_CACHE_SIZE-1) #define CALL_STUB_CACHE_PROBES 5 //min sizes for BucketTable and buckets and the growth and hashing constants #define CALL_STUB_MIN_BUCKETS 32 #define CALL_STUB_MIN_ENTRIES 4 //this is so that the very first growth will jump from 4 to 32 entries, then double from there. #define CALL_STUB_SECONDARY_ENTRIES 8 #define CALL_STUB_GROWTH_FACTOR 2 #define CALL_STUB_LOAD_FACTOR 90 #define CALL_STUB_HASH_CONST1 1327 #define CALL_STUB_HASH_CONST2 43627 #define LARGE_PRIME 7199369 //internal layout of buckets=size-1,count,entries.... #define CALL_STUB_MASK_INDEX 0 #define CALL_STUB_COUNT_INDEX 1 #define CALL_STUB_DEAD_LINK 2 #define CALL_STUB_FIRST_INDEX 3 //marker entries in cache and hash tables #define CALL_STUB_EMPTY_ENTRY 0 // number of successes for a chained element before it gets moved to the front #define CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT (0x100) /******************************************************************************************************* Entry is an abstract class. We will make specific subclasses for each kind of entry. Entries hold references to stubs or tokens. The principle thing they provide is a virtual Equals function that is used by the caching and hashing tables within which the stubs and tokens are stored. Entries are typically stack allocated by the routines that call into the hash and caching functions, and the functions stuff stubs into the entry to do the comparisons. Essentially specific entry subclasses supply a vtable to a stub as and when needed. This means we don't have to have vtables attached to stubs. Summarizing so far, there is a struct for each kind of stub or token of the form XXXXStub. They provide that actual storage layouts. There is a stuct in which each stub which has code is containted of the form XXXXHolder. They provide alignment and anciliary storage for the stub code. There is a subclass of Entry for each kind of stub or token, of the form XXXXEntry. They provide the specific implementations of the virtual functions declared in Entry. */ class Entry { public: //access and compare the keys of the entry virtual BOOL Equals(size_t keyA, size_t keyB)=0; virtual size_t KeyA()=0; virtual size_t KeyB()=0; //contents is the struct or token that the entry exposes virtual void SetContents(size_t contents)=0; }; /* define the platform specific Stubs and stub holders */ #include #if USES_LOOKUP_STUBS /********************************************************************************************** LookupEntry wraps LookupStubs and provide the concrete implementation of the abstract class Entry. Virtual and interface call sites when they are first jitted point to LookupStubs. The hash table that contains look up stubs is keyed by token, hence the Equals function uses the embedded token in the stub for comparison purposes. Since we are willing to allow duplicates in the hash table (as long as they are relatively rare) we do use direct comparison of the tokens rather than extracting the fields from within the tokens, for perf reasons. */ class LookupEntry : public Entry { public: //Creates an entry that wraps lookup stub s LookupEntry(size_t s) { LIMITED_METHOD_CONTRACT; _ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)s)); stub = (LookupStub*) s; } //default contructor to allow stack and inline allocation of lookup entries LookupEntry() {LIMITED_METHOD_CONTRACT; stub = NULL;} //implementations of abstract class Entry BOOL Equals(size_t keyA, size_t keyB) { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); } size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); } size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; } void SetContents(size_t contents) { LIMITED_METHOD_CONTRACT; _ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)contents)); stub = LookupHolder::FromLookupEntry((PCODE)contents)->stub(); } //extract the token of the underlying lookup stub inline size_t Token() { LIMITED_METHOD_CONTRACT; return stub ? stub->token() : 0; } private: LookupStub* stub; //the stub the entry wrapping }; #endif // USES_LOOKUP_STUBS /********************************************************************************************** ResolveCacheEntry wraps a ResolveCacheElem and provides lookup functionality for entries that were created that may be added to the ResolveCache */ class ResolveCacheEntry : public Entry { public: ResolveCacheEntry(size_t elem) { LIMITED_METHOD_CONTRACT; _ASSERTE(elem != 0); pElem = (ResolveCacheElem*) elem; } //default contructor to allow stack and inline allocation of lookup entries ResolveCacheEntry() { LIMITED_METHOD_CONTRACT; pElem = NULL; } //access and compare the keys of the entry virtual BOOL Equals(size_t keyA, size_t keyB) { WRAPPER_NO_CONTRACT; return pElem && (keyA == KeyA()) && (keyB == KeyB()); } virtual size_t KeyA() { LIMITED_METHOD_CONTRACT; return pElem != NULL ? pElem->token : 0; } virtual size_t KeyB() { LIMITED_METHOD_CONTRACT; return pElem != NULL ? (size_t) pElem->pMT : 0; } //contents is the struct or token that the entry exposes virtual void SetContents(size_t contents) { LIMITED_METHOD_CONTRACT; pElem = (ResolveCacheElem*) contents; } inline const BYTE *Target() { LIMITED_METHOD_CONTRACT; return pElem != NULL ? (const BYTE *)pElem->target : NULL; } private: ResolveCacheElem *pElem; }; /********************************************************************************************** ResolveEntry wraps ResolveStubs and provide the concrete implementation of the abstract class Entry. Polymorphic call sites and monomorphic calls that fail end up in a ResolveStub. Resolve stubs are stored in hash tables keyed by token, hence the Equals function uses the embedded token in the stub for comparison purposes. Since we are willing to allow duplicates in the hash table (as long as they are relatively rare) we do use direct comparison of the tokens rather than extracting the fields from within the tokens, for perf reasons. */ class ResolveEntry : public Entry { public: //Creates an entry that wraps resolve stub s ResolveEntry (size_t s) { LIMITED_METHOD_CONTRACT; _ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)s)); stub = (ResolveStub*) s; } //default contructor to allow stack and inline allocation of resovler entries ResolveEntry() { LIMITED_METHOD_CONTRACT; stub = CALL_STUB_EMPTY_ENTRY; } //implementations of abstract class Entry inline BOOL Equals(size_t keyA, size_t keyB) { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); } inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); } inline size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; } void SetContents(size_t contents) { LIMITED_METHOD_CONTRACT; _ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)contents)); stub = ResolveHolder::FromResolveEntry((PCODE)contents)->stub(); } //extract the token of the underlying resolve stub inline size_t Token() { WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->token()) : 0; } private: ResolveStub* stub; //the stub the entry is wrapping }; /********************************************************************************************** DispatchEntry wraps DispatchStubs and provide the concrete implementation of the abstract class Entry. Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs. Dispatch stubs are placed in hash and cache tables keyed by the expected Method Table and token they are built for. Since we are willing to allow duplicates in the hash table (as long as they are relatively rare) we do use direct comparison of the tokens rather than extracting the fields from within the tokens, for perf reasons.*/ class DispatchEntry : public Entry { public: //Creates an entry that wraps dispatch stub s DispatchEntry (size_t s) { LIMITED_METHOD_CONTRACT; _ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)s)); stub = (DispatchStub*) s; } //default contructor to allow stack and inline allocation of resovler entries DispatchEntry() { LIMITED_METHOD_CONTRACT; stub = CALL_STUB_EMPTY_ENTRY; } //implementations of abstract class Entry inline BOOL Equals(size_t keyA, size_t keyB) { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); } inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); } inline size_t KeyB() { WRAPPER_NO_CONTRACT; return ExpectedMT();} void SetContents(size_t contents) { LIMITED_METHOD_CONTRACT; _ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)contents)); stub = DispatchHolder::FromDispatchEntry((PCODE)contents)->stub(); } //extract the fields of the underlying dispatch stub inline size_t ExpectedMT() { WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->expectedMT()) : 0; } size_t Token() { WRAPPER_NO_CONTRACT; if (stub) { ResolveHolder * resolveHolder = ResolveHolder::FromFailEntry(stub->failTarget()); size_t token = resolveHolder->stub()->token(); _ASSERTE(token == VirtualCallStubManager::GetTokenFromStub((PCODE)stub)); return token; } else { return 0; } } inline PCODE Target() { WRAPPER_NO_CONTRACT; return stub ? stub->implTarget() : 0; } private: DispatchStub* stub; }; /************************************************************************************************* DispatchCache is the cache table that the resolve stubs use for inline polymorphic resolution of a call. The cache entry is logically a triplet of (method table, token, impl address) where method table is the type of the calling frame's , token identifies the method being invoked, i.e. is a (type id,slot #) pair, and impl address is the address of the method implementation. */ class DispatchCache { public: static const UINT16 INVALID_HASH = (UINT16)(-1); DispatchCache(); //read and write the cache keyed by (method table,token) pair. inline ResolveCacheElem* Lookup(size_t token, void* mt) { WRAPPER_NO_CONTRACT; return Lookup(token, INVALID_HASH, mt);} ResolveCacheElem* Lookup(size_t token, UINT16 tokenHash, void* mt); enum InsertKind {IK_NONE, IK_DISPATCH, IK_RESOLVE, IK_SHARED, IK_EXTERNAL}; BOOL Insert(ResolveCacheElem* elem, InsertKind insertKind); #ifdef CHAIN_LOOKUP void PromoteChainEntry(ResolveCacheElem* elem); #endif // This is the heavyweight hashing algorithm. Use sparingly. static UINT16 HashToken(size_t token); inline void GetLoadFactor(size_t *total, size_t *used) { LIMITED_METHOD_CONTRACT; *total = CALL_STUB_CACHE_SIZE; size_t count = 0; for (size_t i = 0; i < CALL_STUB_CACHE_SIZE; i++) if (cache[i] != empty) count++; *used = count; } inline void *GetCacheBaseAddr() { LIMITED_METHOD_CONTRACT; return &cache[0]; } inline size_t GetCacheCount() { LIMITED_METHOD_CONTRACT; return CALL_STUB_CACHE_SIZE; } inline ResolveCacheElem *GetCacheEntry(size_t idx) { LIMITED_METHOD_CONTRACT; return VolatileLoad(&cache[idx]); } inline BOOL IsCacheEntryEmpty(size_t idx) { LIMITED_METHOD_CONTRACT; return cache[idx] == empty; } inline void SetCacheEntry(size_t idx, ResolveCacheElem *elem) { LIMITED_METHOD_CONTRACT; #ifdef STUB_LOGGING cacheData[idx].numWrites++; #endif #ifdef CHAIN_LOOKUP CONSISTENCY_CHECK(m_writeLock.OwnedByCurrentThread()); #endif cache[idx] = elem; } inline void ClearCacheEntry(size_t idx) { LIMITED_METHOD_CONTRACT; #ifdef STUB_LOGGING cacheData[idx].numClears++; #endif cache[idx] = empty; } struct { UINT32 insert_cache_external; //# of times Insert was called for IK_EXTERNAL UINT32 insert_cache_shared; //# of times Insert was called for IK_SHARED UINT32 insert_cache_dispatch; //# of times Insert was called for IK_DISPATCH UINT32 insert_cache_resolve; //# of times Insert was called for IK_RESOLVE UINT32 insert_cache_hit; //# of times Insert found an empty cache entry UINT32 insert_cache_miss; //# of times Insert already had a matching cache entry UINT32 insert_cache_collide; //# of times Insert found a used cache entry UINT32 insert_cache_write; //# of times Insert wrote a cache entry } stats; void LogStats(); // Unlocked iterator of entries. Use only when read/write access to the cache // is safe. This would typically be at GC sync points, currently needed during // appdomain unloading. class Iterator { public: Iterator(DispatchCache *pCache); inline BOOL IsValid() { WRAPPER_NO_CONTRACT; return (m_curBucket < (INT32)m_pCache->GetCacheCount()); } void Next(); // Unlink the current entry. // **NOTE** Using this method implicitly performs a call to Next to move // past the unlinked entry. Thus, one could accidentally skip // entries unless you take this into consideration. ResolveCacheElem *UnlinkEntry(); inline ResolveCacheElem *Entry() { LIMITED_METHOD_CONTRACT; CONSISTENCY_CHECK(IsValid()); return *m_ppCurElem; } private: void NextValidBucket(); inline void NextBucket() { LIMITED_METHOD_CONTRACT; m_curBucket++; m_ppCurElem = &m_pCache->cache[m_curBucket]; } DispatchCache *m_pCache; INT32 m_curBucket; ResolveCacheElem **m_ppCurElem; }; private: #ifdef CHAIN_LOOKUP Crst m_writeLock; #endif //the following hash computation is also inlined in the resolve stub in asm (SO NO TOUCHIE) inline static UINT16 HashMT(UINT16 tokenHash, void* mt) { LIMITED_METHOD_CONTRACT; UINT16 hash; size_t mtHash = (size_t) mt; mtHash = (((mtHash >> CALL_STUB_CACHE_NUM_BITS) + mtHash) >> LOG2_PTRSIZE) & CALL_STUB_CACHE_MASK; hash = (UINT16) mtHash; hash ^= (tokenHash & CALL_STUB_CACHE_MASK); return hash; } ResolveCacheElem* cache[CALL_STUB_CACHE_SIZE]; //must be first ResolveCacheElem* empty; //empty entry, initialized to fail all comparisons #ifdef STUB_LOGGING public: struct CacheEntryData { UINT32 numWrites; UINT16 numClears; }; CacheEntryData cacheData[CALL_STUB_CACHE_SIZE]; #endif // STUB_LOGGING }; /************************************************************************************************** The hash tables are accessed via instances of the Prober. Prober is a probe into a bucket of the hash table, and therefore has an index which is the current probe position. It includes a count of the number of probes done in that bucket so far and a stride to step thru the bucket with. To do comparisons, it has a reference to an entry with which it can do comparisons (Equals(...)) of the entries (stubs) inside the hash table. It also has the key pair (keyA, keyB) that it is looking for. Typically, an entry of the appropriate type is created on the stack and then the prober is created passing in a reference to the entry. The prober is used for a complete operation, such as look for and find an entry (stub), creating and inserting it as necessary. The initial index and the stride are orthogonal hashes of the key pair, i.e. we are doing a varient of double hashing. When we initialize the prober (see FormHash below) we set the initial probe based on one hash. The stride (used as a modulo addition of the probe position) is based on a different hash and is such that it will vist every location in the bucket before repeating. Hence it is imperative that the bucket size and the stride be relative prime wrt each other. We have chosen to make bucket sizes a power of 2, so we force stride to be odd. Note -- it must be assumed that multiple probers are walking the same tables and buckets at the same time. Additionally, the counts may not be accurate, and there may be duplicates in the tables. Since the tables do not allow concurrrent deletion, some of the concurrency issues are ameliorated. */ class Prober { friend class FastTable; friend class BucketTable; public: Prober(Entry* e) {LIMITED_METHOD_CONTRACT; comparer = e;} //find the requested entry, if not there return CALL_STUB_EMPTY_ENTRY size_t Find(); //add the entry into the bucket, if it is not already in the bucket. //return the entry actually in the bucket (existing or added) size_t Add(size_t entry); private: //return the bucket (FastTable*) that the prober is currently walking inline size_t* items() {LIMITED_METHOD_CONTRACT; return &base[-CALL_STUB_FIRST_INDEX];} //are there more probes possible, or have we probed everything in the bucket inline BOOL NoMore() {LIMITED_METHOD_CONTRACT; return probes>mask;} //both probes and mask are (-1) //advance the probe to a new place in the bucket inline BOOL Next() { WRAPPER_NO_CONTRACT; index = (index + stride) & mask; probes++; return !NoMore(); } inline size_t Read() { LIMITED_METHOD_CONTRACT; _ASSERTE(base); return VolatileLoad(&base[index]); } //initialize a prober across a bucket (table) for the specified keys. void InitProber(size_t key1, size_t key2, size_t* table); //set up the initial index and stride and probe count inline void FormHash() { LIMITED_METHOD_CONTRACT; probes = 0; //these two hash functions have not been formally measured for effectiveness //but they are at least orthogonal size_t a = ((keyA>>16) + keyA); size_t b = ((keyB>>16) ^ keyB); index = (((a*CALL_STUB_HASH_CONST1)>>4)+((b*CALL_STUB_HASH_CONST2)>>4)+CALL_STUB_HASH_CONST1) & mask; stride = ((a+(b*CALL_STUB_HASH_CONST1)+CALL_STUB_HASH_CONST2) | 1) & mask; } //atomically grab an empty slot so we can insert a new entry into the bucket BOOL GrabEntry(size_t entryValue); size_t keyA; //key pair we are looking for size_t keyB; size_t* base; //we have our own pointer to the bucket, so races don't matter. // We won't care if we do the lookup in an // outdated bucket (has grown out from under us). // All that will happen is possibly dropping an entry // on the floor or adding a duplicate. size_t index; //current probe point in the bucket size_t stride; //amount to step on each successive probe, must be relatively prime wrt the bucket size size_t mask; //size of bucket - 1 size_t probes; //number probes - 1 Entry* comparer;//used to compare an entry against the sought after key pair }; /******************************************************************************************************** FastTable is used to implement the buckets of a BucketTable, a bucketized hash table. A FastTable is an array of entries (contents). The first two slots of contents store the size-1 and count of entries actually in the FastTable. Note that the count may be inaccurate and there may be duplicates. Careful attention must be paid to eliminate the need for interlocked or serialized or locked operations in face of concurrency. */ #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 4200) // disable zero-sized array warning #endif // _MSC_VER class FastTable { friend class BucketTable; public: private: FastTable() { LIMITED_METHOD_CONTRACT; } ~FastTable() { LIMITED_METHOD_CONTRACT; } //initialize a prober for the specified keys. inline BOOL SetUpProber(size_t keyA, size_t keyB, Prober* probe) { CONTRACTL { NOTHROW; GC_NOTRIGGER; FORBID_FAULT; } CONTRACTL_END; _ASSERTE(probe); _ASSERTE(contents); probe->InitProber(keyA, keyB, &contents[0]); return TRUE; } //find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY size_t Find(Prober* probe); //add the entry, if it is not already there. Probe is used to search. //Return the entry actually containted (existing or added) size_t Add(size_t entry, Prober* probe); void IncrementCount(); // Create a FastTable with space for numberOfEntries. Please note that this method // does not throw on OOM. **YOU MUST CHECK FOR NULL RETURN** static FastTable* MakeTable(size_t numberOfEntries) { CONTRACTL { THROWS; GC_TRIGGERS; INJECT_FAULT(COMPlusThrowOM();); } CONTRACTL_END; size_t size = CALL_STUB_MIN_ENTRIES; while (size < numberOfEntries) {size = size<<1;} // if (size == CALL_STUB_MIN_ENTRIES) // size += 3; size_t* bucket = new size_t[(sizeof(FastTable)/sizeof(size_t))+size+CALL_STUB_FIRST_INDEX]; FastTable* table = new (bucket) FastTable(); table->InitializeContents(size); return table; } //Initialize as empty void InitializeContents(size_t size) { LIMITED_METHOD_CONTRACT; memset(&contents[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(BYTE*)); contents[CALL_STUB_MASK_INDEX] = size-1; } inline size_t tableMask() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_MASK_INDEX]);} inline size_t tableSize() {LIMITED_METHOD_CONTRACT; return tableMask()+1;} inline size_t tableCount() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_COUNT_INDEX]);} inline BOOL isFull() { LIMITED_METHOD_CONTRACT; return (tableCount()+1) * 100 / CALL_STUB_LOAD_FACTOR >= tableSize(); } //we store (size-1) in bucket[CALL_STUB_MASK_INDEX==0], //we store the used count in bucket[CALL_STUB_COUNT_INDEX==1], //we have an unused cell to use as a temp at bucket[CALL_STUB_DEAD_LINK==2], //and the table starts at bucket[CALL_STUB_FIRST_INDEX==3], size_t contents[]; }; #ifdef _MSC_VER #pragma warning(pop) #endif /****************************************************************************************************** BucketTable is a bucketized hash table. It uses FastTables for its buckets. The hash tables used by the VirtualCallStubManager are BucketTables. The number of buckets is fixed at the time the table is created. The actual buckets are allocated as needed, and grow as necessary. The reason for using buckets is primarily to reduce the cost of growing, since only a single bucket is actually grown at any given time. Since the hash tables are accessed infrequently, the load factor that controls growth is quite high (90%). Since we use hashing to pick the bucket, and we use hashing to lookup inside the bucket, it is important that the hashing function used here is orthogonal to the ones used in the buckets themselves (see FastTable::FormHash). */ class BucketTable { public: BucketTable(size_t numberOfBuckets) { WRAPPER_NO_CONTRACT; size_t size = CALL_STUB_MIN_BUCKETS; while (size < numberOfBuckets) {size = size<<1;} buckets = AllocateBuckets(size); // Initialize statistics counters memset(&stats, 0, sizeof(stats)); } ~BucketTable() { LIMITED_METHOD_CONTRACT; if(buckets != NULL) { size_t size = bucketCount()+CALL_STUB_FIRST_INDEX; for(size_t ix = CALL_STUB_FIRST_INDEX; ix < size; ix++) delete (FastTable*)(buckets[ix]); delete buckets; } } //initialize a prober for the specified keys. BOOL SetUpProber(size_t keyA, size_t keyB, Prober *prober); //find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY inline size_t Find(Prober* probe) {WRAPPER_NO_CONTRACT; return probe->Find();} //add the entry, if it is not already there. Probe is used to search. size_t Add(size_t entry, Prober* probe); //reclaim abandoned buckets. Buckets are abaondoned when they need to grow. //needs to be called inside a gc sync point. static void Reclaim(); struct { UINT32 bucket_space; //# of bytes in caches and tables, not including the stubs themselves UINT32 bucket_space_dead; //# of bytes of abandoned buckets not yet recycled. } stats; void LogStats(); private: inline size_t bucketMask() {LIMITED_METHOD_CONTRACT; return (size_t) (buckets[CALL_STUB_MASK_INDEX]);} inline size_t bucketCount() {LIMITED_METHOD_CONTRACT; return bucketMask()+1;} inline size_t ComputeBucketIndex(size_t keyA, size_t keyB) { LIMITED_METHOD_CONTRACT; size_t a = ((keyA>>16) + keyA); size_t b = ((keyB>>16) ^ keyB); return CALL_STUB_FIRST_INDEX+(((((a*CALL_STUB_HASH_CONST2)>>5)^((b*CALL_STUB_HASH_CONST1)>>5))+CALL_STUB_HASH_CONST2) & bucketMask()); } //grows the bucket referenced by probe. BOOL GetMoreSpace(const Prober* probe); //creates storage in which to store references to the buckets static size_t* AllocateBuckets(size_t size) { LIMITED_METHOD_CONTRACT; size_t* buckets = new size_t[size+CALL_STUB_FIRST_INDEX]; if (buckets != NULL) { memset(&buckets[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(void*)); buckets[CALL_STUB_MASK_INDEX] = size-1; } return buckets; } inline size_t Read(size_t index) { LIMITED_METHOD_CONTRACT; CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX); return VolatileLoad(&buckets[index]); } #ifdef _MSC_VER #pragma warning(disable: 4267) //work-around for the compiler #endif inline void Write(size_t index, size_t value) { LIMITED_METHOD_CONTRACT; CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX); VolatileStore(&buckets[index], value); } #ifdef _MSC_VER #pragma warning(default: 4267) #endif // We store (#buckets-1) in bucket[CALL_STUB_MASK_INDEX ==0] // We have two unused cells at bucket[CALL_STUB_COUNT_INDEX ==1] // and bucket[CALL_STUB_DEAD_LINK ==2] // and the table starts at bucket[CALL_STUB_FIRST_INDEX ==3] // the number of elements is bucket[CALL_STUB_MASK_INDEX]+CALL_STUB_FIRST_INDEX size_t* buckets; static FastTable* dead; //linked list head of to be deleted (abandoned) buckets }; #endif // !CROSSGEN_COMPILE #endif // !_VIRTUAL_CALL_STUB_H