1 files changed, 1625 insertions, 0 deletions
diff --git a/src/vm/virtualcallstub.h b/src/vm/virtualcallstub.h
new file mode 100644
index 0000000000..b3f736fcae
--- /dev/null
+++ b/src/vm/virtualcallstub.h
@@ -0,0 +1,1625 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// File: VirtualCallStub.h
+//
+
+
+
+//
+// See code:VirtualCallStubManager for details
+//
+// ============================================================================
+
+#ifndef _VIRTUAL_CALL_STUB_H 
+#define _VIRTUAL_CALL_STUB_H
+
+#ifndef CROSSGEN_COMPILE
+
+#define CHAIN_LOOKUP
+
+#if defined(_TARGET_X86_)
+// If this is uncommented, leaves a file "StubLog_<pid>.log" with statistics on the behavior
+// of stub-based interface dispatch.
+//#define STUB_LOGGING
+#endif
+
+#include "stubmgr.h"
+
+/////////////////////////////////////////////////////////////////////////////////////
+// Forward class declarations
+class FastTable;
+class BucketTable;
+class Entry;
+class Prober;
+class VirtualCallStubManager;
+class VirtualCallStubManagerManager;
+struct LookupHolder;
+struct DispatchHolder;
+struct ResolveHolder;
+
+/////////////////////////////////////////////////////////////////////////////////////
+// Forward function declarations
+extern "C" void InContextTPQuickDispatchAsmStub();
+
+extern "C" PCODE STDCALL StubDispatchFixupWorker(TransitionBlock * pTransitionBlock, 
+                                                 TADDR siteAddrForRegisterIndirect,
+                                                 DWORD sectionIndex, 
+                                                 Module * pModule);
+
+extern "C" PCODE STDCALL VSD_ResolveWorker(TransitionBlock * pTransitionBlock,
+                                           TADDR siteAddrForRegisterIndirect,
+                                           size_t token
+#ifndef _TARGET_X86_
+                                           , UINT_PTR flags
+#endif                               
+                                           );
+
+#ifdef FEATURE_REMOTING
+// This is used by TransparentProxyWorkerStub to take a stub address (token), and
+// MethodTable and return the target. It will look in the cache first, and if not found
+// will call the resolver and then put the result into the cache.
+extern "C" PCODE STDCALL VSD_GetTargetForTPWorkerQuick(TransparentProxyObject * orTP, size_t token);
+extern "C" PCODE STDCALL VSD_GetTargetForTPWorker(TransitionBlock * pTransitionBlock, size_t token);
+#endif
+
+/////////////////////////////////////////////////////////////////////////////////////
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+typedef INT32 DISPL;
+#endif
+
+/////////////////////////////////////////////////////////////////////////////////////
+// Represents the struct that is added to the resolve cache
+// NOTE: If you change the layout of this struct, you'll need to update various
+//       ASM helpers in VirtualCallStubCpu that rely on offsets of members.
+//
+struct ResolveCacheElem
+{
+    void *pMT;
+    size_t token;   // DispatchToken
+    void *target;
+
+    // These are used for chaining
+    ResolveCacheElem *pNext;
+    ResolveCacheElem *Next()
+    { LIMITED_METHOD_CONTRACT; return VolatileLoad(&pNext); }
+
+#ifdef _DEBUG 
+    UINT16 debug_hash;
+    UINT16 debug_index;
+#endif // _DEBUG
+
+    BOOL Equals(size_t token, void *pMT)
+    { LIMITED_METHOD_CONTRACT; return (this->pMT == pMT && this->token == token); }
+
+    BOOL Equals(ResolveCacheElem *pElem)
+    { WRAPPER_NO_CONTRACT; return Equals(pElem->token, pElem->pMT); }
+
+};
+
+enum
+{
+    e_resolveCacheElem_sizeof_mt                 = sizeof(void *),
+    e_resolveCacheElem_sizeof_token              = sizeof(size_t),
+    e_resolveCacheElem_sizeof_target             = sizeof(void *),
+    e_resolveCacheElem_sizeof_next               = sizeof(ResolveCacheElem *),
+
+    e_resolveCacheElem_offset_mt                 = 0,
+    e_resolveCacheElem_offset_token              = e_resolveCacheElem_offset_mt + e_resolveCacheElem_sizeof_mt,
+    e_resolveCacheElem_offset_target             = e_resolveCacheElem_offset_token + e_resolveCacheElem_sizeof_token,
+    e_resolveCacheElem_offset_next               = e_resolveCacheElem_offset_target + e_resolveCacheElem_sizeof_target,
+};
+
+/////////////////////////////////////////////////////////////////////////////////////
+// A utility class to help manipulate a call site
+struct StubCallSite
+{
+    friend class VirtualCallStubManager;
+
+private:
+
+    // On x86 are four possible kinds of callsites when you take into account all features
+    //  Relative:                  direct call, e.g. "call addr". Not used currently.
+    //  RelativeIndirect (JmpRel): indirect call through a relative address, e.g. "call [addr]"
+    //  RegisterIndirect:          indirect call through a register, e.g. "call [eax]"
+    //  DelegateCallSite:          anything else, tail called through a register by shuffle thunk, e.g. "jmp [eax]"
+    //
+    // On all other platforms we always use an indirect call through an indirection cell
+    // In these cases all calls are made by the platform equivalent of "call [addr]".
+    //
+    // DelegateCallSite are particular in that they can come in a variety of forms:
+    // a direct delegate call has a sequence defined by the jit but a multicast or secure delegate
+    // are defined in a stub and have a different shape
+    //
+    PTR_PCODE       m_siteAddr;     // Stores the address of an indirection cell
+    PCODE           m_returnAddr;
+
+public:
+
+#if defined(_TARGET_X86_) 
+    StubCallSite(TADDR siteAddrForRegisterIndirect, PCODE returnAddr);
+
+    PCODE           GetCallerAddress();
+#else // !defined(_TARGET_X86_)
+    // On platforms where we always use an indirection cell things
+    // are much simpler - the siteAddr always stores a pointer to a
+    // value that in turn points to the indirection cell.
+
+    StubCallSite(TADDR siteAddr, PCODE returnAddr)
+       { LIMITED_METHOD_CONTRACT; m_siteAddr = dac_cast<PTR_PCODE>(siteAddr); m_returnAddr = returnAddr; }
+
+    PCODE           GetCallerAddress()     { LIMITED_METHOD_CONTRACT; return m_returnAddr; }
+#endif // !defined(_TARGET_X86_)
+
+    PCODE           GetSiteTarget()        { WRAPPER_NO_CONTRACT; return *(GetIndirectCell()); }
+    void            SetSiteTarget(PCODE newTarget);
+    PTR_PCODE       GetIndirectCell()      { LIMITED_METHOD_CONTRACT; return dac_cast<PTR_PCODE>(m_siteAddr); }
+    PTR_PCODE *     GetIndirectCellAddress() { LIMITED_METHOD_CONTRACT; return &m_siteAddr; }
+
+    PCODE           GetReturnAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; }
+};
+
+#ifdef FEATURE_PREJIT
+extern "C" void StubDispatchFixupStub();              // for lazy fixup of ngen call sites
+#endif
+
+// These are the assembly language entry points that the stubs use when they want to go into the EE
+
+extern "C" void ResolveWorkerAsmStub();               // resolve a token and transfer control to that method
+extern "C" void ResolveWorkerChainLookupAsmStub();    // for chaining of entries in the cache
+
+#ifdef _TARGET_X86_ 
+extern "C" void BackPatchWorkerAsmStub();             // backpatch a call site to point to a different stub
+#endif // _TARGET_X86_
+
+
+typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager;
+
+// VirtualCallStubManager is the heart of the stub dispatch logic. See the book of the runtime entry
+// 
+// file:../../doc/BookOfTheRuntime/ClassLoader/VirtualStubDispatchDesign.doc
+// 
+// The basic idea is that a call to an interface (it could also be used for virtual calls in general, but we
+// do not do this), is simply the code
+// 
+//     call [DispatchCell]
+// 
+// Where we make sure 'DispatchCell' points at stubs that will do the right thing. DispatchCell is writable
+// so we can udpate the code over time. There are three basic types of stubs that the dispatch cell can point
+// to.
+//     * Lookup: The intial stub that has no 'fast path' and simply pushes a ID for interface being called
+//         and calls into the runtime at code:VirtualCallStubManager.ResolveWorkerStatic.
+//     * Dispatch: Lookup stubs are patched to this stub which has a fast path that checks for a particular
+//         Method Table and if that fails jumps to code that
+//         * Decrements a 'missCount' (starts out as code:STUB_MISS_COUNT_VALUE). If this count goes to zero
+//             code:VirtualCallStubManager.BackPatchWorkerStatic is called, morphs it into a resolve stub
+//             (however since this decrementing logic is SHARED among all dispatch stubs, it may take
+//             multiples of code:STUB_MISS_COUNT_VALUE if mulitple call sites are actively polymorphic (this
+//             seems unlikley).
+//         * Calls a resolve stub (Whenever a dispatch stub is created, it always has a cooresponding resolve
+//             stub (but the resolve stubs are shared among many dispatch stubs).
+//     * Resolve: see code:ResolveStub. This looks up the Method table in a process wide cache (see
+//         code:ResolveCacheElem, and if found, jumps to it. This code path is about 17 instructions long (so
+//         pretty fast, but certainly much slower than a normal call). If the method table is not found in
+//         the cache, it calls into the runtime code:VirtualCallStubManager.ResolveWorkerStatic, which
+//         populates it.
+// So the general progression is call site's cells
+//     * start out life pointing to a lookup stub
+//     * On first call they get updated into a dispatch stub. When this misses, it calls a resolve stub,
+//         which populates a resovle stub's cache, but does not update the call site' cell (thus it is still
+//         pointing at the dispatch cell.
+//     * After code:STUB_MISS_COUNT_VALUE misses, we update the call site's cell to point directly at the
+//         resolve stub (thus avoiding the overhead of the quick check that always seems to be failing and
+//         the miss count update).
+//         
+// QUESTION: What is the lifetimes of the various stubs and hash table entries?
+// 
+// QUESTION: There does not seem to be any logic that will change a call site's cell once it becomes a
+// Resolve stub. Thus once a particular call site becomes a Resolve stub we live with the Resolve stub's
+// (in)efficiency forever.
+// 
+// see code:#StubDispatchNotes for more
+class VirtualCallStubManager : public StubManager
+{
+    friend class ClrDataAccess;
+    friend class VirtualCallStubManagerManager;
+    friend class VirtualCallStubManagerIterator;
+
+    VPTR_VTABLE_CLASS(VirtualCallStubManager, StubManager)
+
+public:
+#ifdef _DEBUG
+    virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManager"; }
+#endif    
+
+    // The reason for our existence, return a callstub for type id and slot number
+    // where type id = 0 for the class contract (i.e. a virtual call), and type id > 0 for an
+    // interface invoke where the id indicates which interface it is.
+    //
+    // The function is idempotent, i.e.
+    // you'll get the same callstub twice if you call it with identical inputs.
+    PCODE GetCallStub(TypeHandle ownerType, MethodDesc *pMD);
+    PCODE GetCallStub(TypeHandle ownerType, DWORD slot);
+
+    // Generate an fresh indirection cell.
+    BYTE* GenerateStubIndirection(PCODE stub, BOOL fUseRecycledCell = FALSE);
+
+    // Set up static data structures - called during EEStartup
+    static void InitStatic();
+    static void UninitStatic();
+
+    // Per instance initialization - called during AppDomain::Init and ::Uninit and for collectible loader allocators
+    void Init(BaseDomain* pDomain, LoaderAllocator *pLoaderAllocator);
+    void Uninit();
+
+    //@TODO: the logging should be tied into the VMs normal loggin mechanisms,
+    //@TODO: for now we just always write a short log file called "StubLog_<pid>.log"
+    static void StartupLogging();
+    static void LoggingDump();
+    static void FinishLogging();
+
+    static void ResetCache();
+
+    // Reclaim/rearrange any structures that can only be done during a gc sync point.
+    // This is the mechanism we are using to avoid synchronization of alot of our
+    // cache and hash table accesses.  We are requiring that during a gc sync point we are not
+    // executing any stub code at all, hence at this time we are serialized on a single thread (gc)
+    // and no other thread is accessing the data structures.
+    static void ReclaimAll();
+    void Reclaim();
+
+#ifndef DACCESS_COMPILE 
+    VirtualCallStubManager()
+        : StubManager(),
+          lookup_rangeList(),
+          resolve_rangeList(),
+          dispatch_rangeList(),
+          cache_entry_rangeList(),
+          parentDomain(NULL),
+          isCollectible(false),
+          m_initialReservedMemForHeaps(NULL),
+          m_FreeIndCellList(NULL),
+          m_RecycledIndCellList(NULL),
+          indcell_heap(NULL),
+          cache_entry_heap(NULL),
+          lookup_heap(NULL),
+          dispatch_heap(NULL),
+          resolve_heap(NULL),
+#ifdef _TARGET_AMD64_
+          m_fShouldAllocateLongJumpDispatchStubs(FALSE),
+#endif
+          lookups(NULL),
+          cache_entries(NULL),
+          dispatchers(NULL),
+          resolvers(NULL),
+          m_counters(NULL),
+          m_cur_counter_block(NULL),
+          m_cur_counter_block_for_reclaim(NULL),
+          m_cur_counter_block_for_reclaim_index(NULL),
+          m_pNext(NULL)
+    {
+        LIMITED_METHOD_CONTRACT;
+        ZeroMemory(&stats, sizeof(stats));
+    }
+
+    ~VirtualCallStubManager();
+#endif // !DACCESS_COMPILE
+
+
+    enum StubKind { 
+        SK_UNKNOWN, 
+        SK_LOOKUP,      // Lookup Stubs are SLOW stubs that simply call into the runtime to do all work.
+        SK_DISPATCH,    // Dispatch Stubs have a fast check for one type otherwise jumps to runtime.  Works for monomorphic sites
+        SK_RESOLVE,     // Resolve Stubs do a hash lookup before fallling back to the runtime.  Works for polymorphic sites.
+        SK_BREAKPOINT 
+    };
+
+    // peek at the assembly code and predict which kind of a stub we have
+    StubKind predictStubKind(PCODE stubStartAddress);
+
+    /* know thine own stubs.  It is possible that when multiple
+    virtualcallstub managers are built that these may need to become
+    non-static, and the callers modified accordingly */
+    StubKind getStubKind(PCODE stubStartAddress)
+    {
+        WRAPPER_NO_CONTRACT;
+        SUPPORTS_DAC;
+
+        // This method can called with stubStartAddress==NULL, e.g. when handling null reference exceptions
+        // caused by IP=0. Early out for this case to avoid confusing handled access violations inside predictStubKind.
+        if (PCODEToPINSTR(stubStartAddress) == NULL)
+            return SK_UNKNOWN;
+
+        // Rather than calling IsInRange(stubStartAddress) for each possible stub kind
+        // we can peek at the assembly code and predict which kind of a stub we have
+        StubKind predictedKind = predictStubKind(stubStartAddress);
+
+        if (predictedKind == SK_DISPATCH)
+        {
+            if (isDispatchingStub(stubStartAddress))
+                return SK_DISPATCH;
+        }
+        else if (predictedKind == SK_LOOKUP)
+        {
+            if (isLookupStub(stubStartAddress))
+                return SK_LOOKUP;
+        }
+        else if (predictedKind == SK_RESOLVE)
+        {
+            if (isResolvingStub(stubStartAddress))
+                return SK_RESOLVE;
+        }
+
+        // This is the slow case. If the predict returned SK_UNKNOWN, SK_BREAKPOINT,
+        // or the predict was found to be incorrect when checked against the RangeLists
+        // (isXXXStub), then we'll check each stub heap in sequence.
+        if (isDispatchingStub(stubStartAddress))
+            return SK_DISPATCH;
+        else if (isLookupStub(stubStartAddress))
+            return SK_LOOKUP;
+        else if (isResolvingStub(stubStartAddress))
+            return SK_RESOLVE;
+
+        return SK_UNKNOWN;
+    }
+
+    inline BOOL isStub(PCODE stubStartAddress)
+    {
+        WRAPPER_NO_CONTRACT;
+        SUPPORTS_DAC;
+
+        return (getStubKind(stubStartAddress) != SK_UNKNOWN);
+    }
+
+    BOOL isDispatchingStub(PCODE stubStartAddress)
+    {
+        WRAPPER_NO_CONTRACT;
+        SUPPORTS_DAC;
+
+        return GetDispatchRangeList()->IsInRange(stubStartAddress);
+    }
+
+    BOOL isResolvingStub(PCODE stubStartAddress)
+    {
+        WRAPPER_NO_CONTRACT;
+        SUPPORTS_DAC;
+
+        return GetResolveRangeList()->IsInRange(stubStartAddress);
+    }
+
+    BOOL isLookupStub(PCODE stubStartAddress)
+    {
+        WRAPPER_NO_CONTRACT;
+        SUPPORTS_DAC;
+
+        return GetLookupRangeList()->IsInRange(stubStartAddress);
+    }
+
+    static BOOL isDispatchingStubStatic(PCODE addr)
+    {
+        WRAPPER_NO_CONTRACT;
+        StubKind stubKind;
+        FindStubManager(addr, &stubKind);
+        return stubKind == SK_DISPATCH;
+    }
+
+    static BOOL isResolvingStubStatic(PCODE addr)
+    {
+        WRAPPER_NO_CONTRACT;
+        StubKind stubKind;
+        FindStubManager(addr, &stubKind);
+        return stubKind == SK_RESOLVE;
+    }
+
+    static BOOL isLookupStubStatic(PCODE addr)
+    {
+        WRAPPER_NO_CONTRACT;
+        StubKind stubKind;
+        FindStubManager(addr, &stubKind);
+        return stubKind == SK_LOOKUP;
+    }
+
+    //use range lists to track the chunks of memory that are part of each heap
+    LockedRangeList lookup_rangeList;
+    LockedRangeList resolve_rangeList;
+    LockedRangeList dispatch_rangeList;
+    LockedRangeList cache_entry_rangeList;
+
+    // Get dac-ized pointers to rangelist.
+    RangeList* GetLookupRangeList() 
+    {
+        SUPPORTS_DAC;
+
+        TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, lookup_rangeList);
+        return PTR_RangeList(addr);
+    }
+    RangeList* GetResolveRangeList() 
+    {
+        SUPPORTS_DAC;
+
+        TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, resolve_rangeList);
+        return PTR_RangeList(addr);
+    }
+    RangeList* GetDispatchRangeList() 
+    {
+        SUPPORTS_DAC;
+
+        TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, dispatch_rangeList);
+        return PTR_RangeList(addr);
+    }
+    RangeList* GetCacheEntryRangeList() 
+    {
+        SUPPORTS_DAC;
+        TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, cache_entry_rangeList);
+        return PTR_RangeList(addr);
+    }
+
+private:
+
+    //allocate and initialize a stub of the desired kind
+    DispatchHolder *GenerateDispatchStub(PCODE addrOfCode,
+                                         PCODE addrOfFail,
+                                         void *pMTExpected,
+                                         size_t dispatchToken);
+
+#ifdef _TARGET_AMD64_
+    // Used to allocate a long jump dispatch stub. See comment around
+    // m_fShouldAllocateLongJumpDispatchStubs for explaination.
+    DispatchHolder *GenerateDispatchStubLong(PCODE addrOfCode,
+                                             PCODE addrOfFail,
+                                             void *pMTExpected,
+                                             size_t dispatchToken);
+#endif
+
+    ResolveHolder *GenerateResolveStub(PCODE addrOfResolver,
+                                       PCODE addrOfPatcher,
+                                       size_t dispatchToken);
+
+    LookupHolder *GenerateLookupStub(PCODE addrOfResolver,
+                                     size_t dispatchToken);
+
+    template <typename STUB_HOLDER>
+    void AddToCollectibleVSDRangeList(STUB_HOLDER *holder)
+    {
+        if (isCollectible)
+        {
+            parentDomain->GetCollectibleVSDRanges()->AddRange(reinterpret_cast<BYTE *>(holder->stub()),
+                reinterpret_cast<BYTE *>(holder->stub()) + holder->stub()->size(),
+                this);
+        }
+    }
+
+    // The resolve cache is static across all AppDomains
+    ResolveCacheElem *GenerateResolveCacheElem(void *addrOfCode,
+                                               void *pMTExpected,
+                                               size_t token);
+
+    ResolveCacheElem *GetResolveCacheElem(void *pMT,
+                                          size_t token,
+                                          void *target);
+
+    //Given a dispatch token, an object and a method table, determine the
+    //target address to go to.  The return value (BOOL) states whether this address
+    //is cacheable or not.
+    static BOOL Resolver(MethodTable   * pMT,
+                         DispatchToken   token,
+                         OBJECTREF     * protectedObj,
+                         PCODE         * ppTarget);
+
+    // This can be used to find a target without needing the ability to throw
+    static BOOL TraceResolver(Object *pObj, DispatchToken token, TraceDestination *trace);
+
+public:
+    // Return the MethodDesc corresponding to this token.
+    static MethodDesc *GetRepresentativeMethodDescFromToken(DispatchToken token, MethodTable *pMT);
+    static MethodDesc *GetInterfaceMethodDescFromToken(DispatchToken token);
+    static MethodTable *GetTypeFromToken(DispatchToken token);
+
+    //This is used to get the token out of a stub
+    static size_t GetTokenFromStub(PCODE stub);
+
+    //This is used to get the token out of a stub and we know the stub manager and stub kind
+    static size_t GetTokenFromStubQuick(VirtualCallStubManager * pMgr, PCODE stub, StubKind kind);
+
+    // General utility functions
+    // Quick lookup in the cache. NOTHROW, GC_NOTRIGGER
+    static PCODE CacheLookup(size_t token, UINT16 tokenHash, MethodTable *pMT);
+
+    // Full exhaustive lookup. THROWS, GC_TRIGGERS
+    static PCODE GetTarget(DispatchToken token, MethodTable *pMT);
+
+private:
+    // Given a dispatch token, return true if the token represents an interface, false if just a slot.
+    static BOOL IsInterfaceToken(DispatchToken token);
+
+    // Given a dispatch token, return true if the token represents a slot on the target.
+    static BOOL IsClassToken(DispatchToken token);
+
+#ifdef CHAIN_LOOKUP 
+    static ResolveCacheElem* __fastcall PromoteChainEntry(ResolveCacheElem *pElem);
+#endif
+
+    // Flags used by the non-x86 versions of VSD_ResolveWorker
+
+#define SDF_ResolveBackPatch        (0x01)
+#define SDF_ResolvePromoteChain     (0x02)
+#define SDF_ResolveFlags            (0x03)
+
+    // These method needs to call the instance methods.
+    friend PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock,
+                                   TADDR siteAddrForRegisterIndirect,
+                                   size_t token
+#ifndef _TARGET_X86_
+                                   , UINT_PTR flags
+#endif                            
+                                   );
+
+    //These are the entrypoints that the stubs actually end up calling via the
+    // xxxAsmStub methods above
+    static void STDCALL BackPatchWorkerStatic(PCODE returnAddr, TADDR siteAddrForRegisterIndirect);
+
+public:
+    PCODE ResolveWorker(StubCallSite* pCallSite, OBJECTREF *protectedObj, DispatchToken token, StubKind stubKind);
+    void BackPatchWorker(StubCallSite* pCallSite);
+
+    //Change the callsite to point to stub
+    void BackPatchSite(StubCallSite* pCallSite, PCODE stub);
+
+public:
+    /* the following two public functions are to support tracing or stepping thru
+    stubs via the debugger. */
+    virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress);
+    virtual BOOL TraceManager(Thread *thread,
+                              TraceDestination *trace,
+                              T_CONTEXT *pContext,
+                              BYTE **pRetAddr);
+    size_t GetSize()
+    {
+        LIMITED_METHOD_CONTRACT;
+        size_t retval=0;
+        if(indcell_heap)
+            retval+=indcell_heap->GetSize();
+        if(cache_entry_heap)
+            retval+=cache_entry_heap->GetSize();
+        if(lookup_heap)
+            retval+=lookup_heap->GetSize();
+         if(dispatch_heap)
+            retval+=dispatch_heap->GetSize();
+         if(resolve_heap)
+            retval+=resolve_heap->GetSize();
+         return retval;
+    };
+
+private:
+    /* the following two private functions are to support tracing or stepping thru
+    stubs via the debugger. */
+    virtual BOOL DoTraceStub(PCODE stubStartAddress,
+                             TraceDestination *trace);
+
+private:
+    // The parent domain of this manager
+    PTR_BaseDomain  parentDomain;
+    bool            isCollectible;
+
+    BYTE *          m_initialReservedMemForHeaps;
+
+    static const UINT32 INDCELLS_PER_BLOCK = 32;    // 32 indirection cells per block.
+
+    CrstExplicitInit m_indCellLock;
+
+    // List of free indirection cells. The cells were directly allocated from the loader heap
+    // (code:VirtualCallStubManager::GenerateStubIndirection) 
+    BYTE * m_FreeIndCellList;
+
+    // List of recycled indirection cells. The cells were recycled from finalized dynamic methods
+    // (code:LCGMethodResolver::RecycleIndCells).
+    BYTE * m_RecycledIndCellList;
+
+#ifndef DACCESS_COMPILE 
+    // This methods returns the a free cell from m_FreeIndCellList. It returns NULL if the list is empty.
+    BYTE * GetOneFreeIndCell()
+    {
+        WRAPPER_NO_CONTRACT;
+
+        return GetOneIndCell(&m_FreeIndCellList);
+    }
+
+    // This methods returns the a recycled cell from m_RecycledIndCellList. It returns NULL if the list is empty.
+    BYTE * GetOneRecycledIndCell()
+    {
+        WRAPPER_NO_CONTRACT;
+
+        return GetOneIndCell(&m_RecycledIndCellList);
+    }
+
+    // This methods returns the a cell from ppList. It returns NULL if the list is empty.
+    BYTE * GetOneIndCell(BYTE ** ppList)
+    {
+        CONTRACT (BYTE*) {
+            NOTHROW;
+            GC_NOTRIGGER;
+            MODE_ANY;
+            PRECONDITION(CheckPointer(ppList));
+            PRECONDITION(m_indCellLock.OwnedByCurrentThread());        
+        } CONTRACT_END;
+
+        BYTE * temp = *ppList;
+
+        if (temp)
+        {
+            BYTE * pNext = *((BYTE **)temp);
+            *ppList = pNext;
+            RETURN temp;
+        }
+
+        RETURN NULL;
+    }
+
+    // insert a linked list of indirection cells at the beginning of m_FreeIndCellList
+    void InsertIntoFreeIndCellList(BYTE * head, BYTE * tail)
+    {
+        WRAPPER_NO_CONTRACT;
+
+        InsertIntoIndCellList(&m_FreeIndCellList, head, tail);
+    }
+
+    // insert a linked list of indirection cells at the beginning of ppList
+    void InsertIntoIndCellList(BYTE ** ppList, BYTE * head, BYTE * tail)
+    {
+        CONTRACTL {
+            NOTHROW;
+            GC_NOTRIGGER;
+            PRECONDITION(CheckPointer(ppList));
+            PRECONDITION(CheckPointer(head));
+            PRECONDITION(CheckPointer(tail));
+            PRECONDITION(m_indCellLock.OwnedByCurrentThread());        
+        } CONTRACTL_END;
+
+        BYTE * temphead = *ppList;
+        *((BYTE**)tail) = temphead;
+        *ppList = head;
+    }
+#endif // !DACCESS_COMPILE
+
+    PTR_LoaderHeap  indcell_heap;       // indirection cells go here
+    PTR_LoaderHeap  cache_entry_heap;   // resolve cache elem entries go here
+    PTR_LoaderHeap  lookup_heap;        // lookup stubs go here
+    PTR_LoaderHeap  dispatch_heap;      // dispatch stubs go here
+    PTR_LoaderHeap  resolve_heap;       // resolve stubs go here
+
+#ifdef _TARGET_AMD64_
+    // When we layout the stub heaps, we put them close together in a sequential order
+    // so that we maximize performance with respect to branch predictions. On AMD64,
+    // dispatch stubs use a rel32 jump on failure to the resolve stub. This works for
+    // a while because of the ordering, but as soon as we have to start allocating more
+    // memory for either the dispatch or resolve heaps we have a chance that we'll be
+    // further away than a rel32 jump can reach, because we're in a 64-bit address
+    // space. As such, this flag will indicate when we allocate the first dispatch stub
+    // that cannot reach a resolve stub, and when this happens we'll switch over to
+    // allocating the larger version of the dispatch stub which contains an abs64 jump.
+    //@TODO: This is a bit of a workaround, but the limitations of LoaderHeap require that we
+    //@TODO: take this approach. Hopefully in Orcas we'll have a chance to rewrite LoaderHeap.
+    BOOL            m_fShouldAllocateLongJumpDispatchStubs; // Defaults to FALSE.
+#endif
+
+    BucketTable *   lookups;            // hash table of lookups keyed by tokens
+    BucketTable *   cache_entries;      // hash table of dispatch token/target structs for dispatch cache
+    BucketTable *   dispatchers;        // hash table of dispatching stubs keyed by tokens/actualtype
+    BucketTable *   resolvers;          // hash table of resolvers keyed by tokens/resolverstub
+
+    // This structure is used to keep track of the fail counters.
+    // We only need one fail counter per ResolveStub,
+    //  and most programs use less than 250 ResolveStubs
+    // We allocate these on the main heap using "new counter block"
+    struct counter_block
+    {
+        static const UINT32 MAX_COUNTER_ENTRIES = 256-2;  // 254 counters should be enough for most cases.
+
+        counter_block *  next;                            // the next block
+        UINT32           used;                            // the index of the next free entry
+        INT32            block[MAX_COUNTER_ENTRIES];      // the counters
+    };
+
+    counter_block *m_counters;                            // linked list of counter blocks of failure counters
+    counter_block *m_cur_counter_block;                   // current block for updating counts
+    counter_block *m_cur_counter_block_for_reclaim;       // current block for updating
+    UINT32         m_cur_counter_block_for_reclaim_index; // index into the current block for updating
+
+    // Used to keep track of all the VCSManager objects in the system.
+    PTR_VirtualCallStubManager m_pNext;            // Linked list pointer
+
+public:
+    // Given a stub address, find the VCSManager that owns it.
+    static VirtualCallStubManager *FindStubManager(PCODE addr,
+                                                   StubKind* wbStubKind = NULL);
+
+#ifndef DACCESS_COMPILE
+    // insert a linked list of indirection cells at the beginning of m_RecycledIndCellList
+    void InsertIntoRecycledIndCellList_Locked(BYTE * head, BYTE * tail)
+    {
+        CONTRACTL {
+            NOTHROW;
+            GC_TRIGGERS;
+            MODE_ANY;
+        } CONTRACTL_END;
+
+        CrstHolder lh(&m_indCellLock);
+
+        InsertIntoIndCellList(&m_RecycledIndCellList, head, tail);
+    }
+#endif // !DACCESS_COMPILE
+
+    // These are the counters for keeping statistics
+    struct
+    {
+        UINT32 site_counter;            //# of call sites
+        UINT32 stub_lookup_counter;     //# of lookup stubs
+        UINT32 stub_poly_counter;       //# of resolve stubs
+        UINT32 stub_mono_counter;       //# of dispatch stubs
+        UINT32 site_write;              //# of call site backpatch writes
+        UINT32 site_write_poly;         //# of call site backpatch writes to point to resolve stubs
+        UINT32 site_write_mono;         //# of call site backpatch writes to point to dispatch stubs
+        UINT32 worker_call;             //# of calls into ResolveWorker
+        UINT32 worker_call_no_patch;    //# of times call_worker resulted in no patch
+        UINT32 worker_collide_to_mono;  //# of times we converted a poly stub to a mono stub instead of writing the cache entry
+        UINT32 stub_space;              //# of bytes of stubs
+        UINT32 cache_entry_counter;     //# of cache structs
+        UINT32 cache_entry_space;       //# of bytes used by cache lookup structs
+    } stats;
+
+    void LogStats();
+
+#ifdef DACCESS_COMPILE 
+protected:
+    virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags);
+    virtual LPCWSTR GetStubManagerName(PCODE addr)
+    {
+        WRAPPER_NO_CONTRACT;
+        CONSISTENCY_CHECK(isStub(addr));
+
+        if (isLookupStub(addr))
+        {
+            return W("VSD_LookupStub");
+        }
+        else if (isDispatchingStub(addr))
+        {
+            return W("VSD_DispatchStub");
+        }
+        else
+        {
+            CONSISTENCY_CHECK(isResolvingStub(addr));
+            return W("VSD_ResolveStub");
+        }
+    }
+#endif
+};
+
+/********************************************************************************************************
+********************************************************************************************************/
+typedef VPTR(class VirtualCallStubManagerManager) PTR_VirtualCallStubManagerManager;
+
+class VirtualCallStubManagerIterator;
+class VirtualCallStubManagerManager : public StubManager
+{
+    VPTR_VTABLE_CLASS(VirtualCallStubManagerManager, StubManager)
+
+    friend class StubManager;
+    friend class VirtualCallStubManager;
+    friend class VirtualCallStubManagerIterator;
+    friend class StubManagerIterator;
+
+  public:
+    virtual BOOL TraceManager(Thread *thread, TraceDestination *trace,
+                              T_CONTEXT *pContext, BYTE **pRetAddr);
+
+    virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress);
+
+    virtual BOOL DoTraceStub(PCODE stubStartAddress, TraceDestination *trace);
+
+    static MethodDesc *Entry2MethodDesc(PCODE stubStartAddress, MethodTable *pMT);
+
+#ifdef DACCESS_COMPILE
+    virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags);
+    virtual LPCWSTR GetStubManagerName(PCODE addr)
+        { WRAPPER_NO_CONTRACT; return FindVirtualCallStubManager(addr)->GetStubManagerName(addr); }
+#endif
+
+  private:
+    // Used to keep track of all the VCSManager objects in the system.
+    PTR_VirtualCallStubManager m_pManagers;  // Head of the linked list
+
+#ifndef DACCESS_COMPILE
+    // Ctor. This is only used by StaticInit.
+    VirtualCallStubManagerManager();
+#endif
+    
+    // A cache element to quickly check the last matched manager.
+    Volatile<VirtualCallStubManager*> m_pCacheElem;
+
+    // RW lock for reading entries and removing them.
+    SimpleRWLock m_RWLock;
+
+    // This will look through all the managers in an intelligent fashion to
+    // find the manager that owns the address.
+    VirtualCallStubManager *FindVirtualCallStubManager(PCODE stubAddress);
+
+  protected:
+    // Add a VCSManager to the linked list.
+    void AddStubManager(VirtualCallStubManager *pMgr);
+
+    // Remove a VCSManager from the linked list.
+    void RemoveStubManager(VirtualCallStubManager *pMgr);
+
+    VirtualCallStubManager *FirstManager()
+        { WRAPPER_NO_CONTRACT; return m_pManagers; }
+
+#ifndef DACCESS_COMPILE
+    static void InitStatic();
+#endif
+
+  public:
+    SPTR_DECL(VirtualCallStubManagerManager, g_pManager);
+
+    static VirtualCallStubManagerManager *GlobalManager()
+        { LIMITED_METHOD_DAC_CONTRACT; CONSISTENCY_CHECK(CheckPointer(g_pManager)); return g_pManager; }
+
+    VirtualCallStubManagerIterator IterateVirtualCallStubManagers();
+
+#ifdef _DEBUG
+    // Debug helper to help identify stub-managers.
+    virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManagerManager"; }
+#endif
+};
+
+/********************************************************************************************************
+********************************************************************************************************/
+class VirtualCallStubManagerIterator
+{
+    friend class VirtualCallStubManagerManager;
+
+  public:
+    BOOL Next();
+    VirtualCallStubManager *Current();
+
+    // Copy ctor
+    inline VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it);
+
+  protected:
+    inline VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr);
+
+    BOOL                    m_fIsStart;
+    VirtualCallStubManager *m_pCurMgr;
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////////
+// Ctor
+inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr)
+    : m_fIsStart(TRUE), m_pCurMgr(pMgr->m_pManagers)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+    CONSISTENCY_CHECK(CheckPointer(pMgr));
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////
+// Copy ctor
+inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it)
+    : m_fIsStart(it.m_fIsStart), m_pCurMgr(it.m_pCurMgr)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+}
+
+/********************************************************************************************************
+#StubDispatchNotes
+
+A note on approach.  The cache and hash tables used by the stub and lookup mechanism
+are designed with an eye to minimizing interlocking and/or syncing and/or locking operations.
+They are intended to run in a highly concurrent environment.  Since there is no magic,
+some tradeoffs and and some implementation constraints are required.  The basic notion
+is that if all reads and writes are atomic and if all functions and operations operate
+correctly in the face of commutative reorderings of the visibility of all reads and writes
+across threads, then we don't have to interlock, sync, or serialize.  Our approximation of
+this is:
+
+1. All reads and all writes to tables must be atomic.  This effectively limits the actual entry
+size in a table to be a pointer or a pointer sized thing.
+
+2. All functions, like comparisons for equality or computation of hash values must function
+correctly in the face of concurrent updating of the underlying table.  This is accomplished
+by making the underlying structures/entries effectively immutable, if concurrency is in anyway possible.
+By effectively immutatable, we mean that the stub or token structure is either immutable or that
+if it is ever written, all possibley concurrent writes are attempting to write the same value (atomically)
+or that the competing (atomic) values do not affect correctness, and that the function operates correctly whether
+or not any of the writes have taken place (is visible yet).  The constraint we maintain is that all competeing
+updates (and their visibility or lack thereof) do not alter the correctness of the program.
+
+3. All tables are inexact.  The counts they hold (e.g. number of contained entries) may be inaccurrate,
+but that inaccurracy cannot affect their correctness.  Table modifications, such as insertion of
+an new entry may not succeed, but such failures cannot affect correctness.  This implies that just
+because a stub/entry is not present in a table, e.g. has been removed, that does not mean that
+it is not in use.  It also implies that internal table structures, such as discarded hash table buckets,
+cannot be freely recycled since another concurrent thread may still be walking thru it.
+
+4. Occassionaly it is necessary to pick up the pieces that have been dropped on the floor
+so to speak, e.g. actually recycle hash buckets that aren't in use.  Since we have a natural
+sync point already in the GC, we use that to provide cleanup points.  We need to make sure that code that
+is walking our structures is not a GC safe point.  Hence if the GC calls back into us inside the GC
+sync point, we know that nobody is inside our stuctures and we can safely rearrange and recycle things.
+********************************************************************************************************/
+
+//initial and increment value for fail stub counters
+#ifdef STUB_LOGGING 
+extern UINT32 STUB_MISS_COUNT_VALUE;
+extern UINT32 STUB_COLLIDE_WRITE_PCT;
+extern UINT32 STUB_COLLIDE_MONO_PCT;
+#else // !STUB_LOGGING
+#define STUB_MISS_COUNT_VALUE   100
+#define STUB_COLLIDE_WRITE_PCT  100
+#define STUB_COLLIDE_MONO_PCT     0
+#endif // !STUB_LOGGING
+
+//size and mask of the cache used by resolve stubs
+// CALL_STUB_CACHE_SIZE must be equal to 2^CALL_STUB_CACHE_NUM_BITS
+#define CALL_STUB_CACHE_NUM_BITS 12 //10
+#define CALL_STUB_CACHE_SIZE 4096 //1024
+#define CALL_STUB_CACHE_MASK (CALL_STUB_CACHE_SIZE-1)
+#define CALL_STUB_CACHE_PROBES 5
+//min sizes for BucketTable and buckets and the growth and hashing constants
+#define CALL_STUB_MIN_BUCKETS 32
+#define CALL_STUB_MIN_ENTRIES 4
+//this is so that the very first growth will jump from 4 to 32 entries, then double from there.
+#define CALL_STUB_SECONDARY_ENTRIES 8
+#define CALL_STUB_GROWTH_FACTOR 2
+#define CALL_STUB_LOAD_FACTOR 90
+#define CALL_STUB_HASH_CONST1 1327
+#define CALL_STUB_HASH_CONST2 43627
+#define LARGE_PRIME 7199369
+//internal layout of buckets=size-1,count,entries....
+#define CALL_STUB_MASK_INDEX 0
+#define CALL_STUB_COUNT_INDEX 1
+#define CALL_STUB_DEAD_LINK 2
+#define CALL_STUB_FIRST_INDEX 3
+//marker entries in cache and hash tables
+#define CALL_STUB_EMPTY_ENTRY   0
+// number of successes for a chained element before it gets moved to the front
+#define CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT (0x100)
+
+/*******************************************************************************************************
+Entry is an abstract class.  We will make specific subclasses for each kind of
+entry.  Entries hold references to stubs or tokens.  The principle thing they provide
+is a virtual Equals function that is used by the caching and hashing tables within which
+the stubs and tokens are stored.  Entries are typically stack allocated by the routines
+that call into the hash and caching functions, and the functions stuff stubs into the entry
+to do the comparisons.  Essentially specific entry subclasses supply a vtable to a stub
+as and when needed.  This means we don't have to have vtables attached to stubs.
+
+Summarizing so far, there is a struct for each kind of stub or token of the form XXXXStub.
+They provide that actual storage layouts.
+There is a stuct in which each stub which has code is containted of the form XXXXHolder.
+They provide alignment and anciliary storage for the stub code.
+There is a subclass of Entry for each kind of stub or token, of the form XXXXEntry.
+They provide the specific implementations of the virtual functions declared in Entry. */
+class Entry
+{
+public:
+    //access and compare the keys of the entry
+    virtual BOOL Equals(size_t keyA, size_t keyB)=0;
+    virtual size_t KeyA()=0;
+    virtual size_t KeyB()=0;
+
+    //contents is the struct or token that the entry exposes
+    virtual void SetContents(size_t contents)=0;
+};
+
+/* define the platform specific Stubs and stub holders */
+
+#include <virtualcallstubcpu.hpp>
+
+#if USES_LOOKUP_STUBS 
+/**********************************************************************************************
+LookupEntry wraps LookupStubs and provide the concrete implementation of the abstract class Entry.
+Virtual and interface call sites when they are first jitted point to LookupStubs.  The hash table
+that contains look up stubs is keyed by token, hence the Equals function uses the embedded token in
+the stub for comparison purposes.  Since we are willing to allow duplicates in the hash table (as
+long as they are relatively rare) we do use direct comparison of the tokens rather than extracting
+the fields from within the tokens, for perf reasons. */
+class LookupEntry : public Entry
+{
+public:
+    //Creates an entry that wraps lookup stub s
+    LookupEntry(size_t s)
+    {
+        LIMITED_METHOD_CONTRACT;
+        _ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)s));
+        stub = (LookupStub*) s;
+    }
+
+    //default contructor to allow stack and inline allocation of lookup entries
+    LookupEntry() {LIMITED_METHOD_CONTRACT; stub = NULL;}
+
+    //implementations of abstract class Entry
+    BOOL Equals(size_t keyA, size_t keyB)
+         { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); }
+
+    size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); }
+    size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; }
+
+    void SetContents(size_t contents)
+    {
+        LIMITED_METHOD_CONTRACT;
+        _ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)contents));
+        stub = LookupHolder::FromLookupEntry((PCODE)contents)->stub();
+    }
+
+    //extract the token of the underlying lookup stub
+
+    inline size_t Token()                 { LIMITED_METHOD_CONTRACT; return stub ? stub->token() : 0; }
+
+private:
+    LookupStub* stub;   //the stub the entry wrapping
+};
+#endif // USES_LOOKUP_STUBS
+
+/**********************************************************************************************
+ResolveCacheEntry wraps a ResolveCacheElem and provides lookup functionality for entries that
+were created that may be added to the ResolveCache
+*/
+class ResolveCacheEntry : public Entry
+{
+public:
+    ResolveCacheEntry(size_t elem)
+    {
+        LIMITED_METHOD_CONTRACT;
+        _ASSERTE(elem != 0);
+        pElem = (ResolveCacheElem*) elem;
+    }
+
+    //default contructor to allow stack and inline allocation of lookup entries
+    ResolveCacheEntry() { LIMITED_METHOD_CONTRACT; pElem = NULL; }
+
+    //access and compare the keys of the entry
+    virtual BOOL Equals(size_t keyA, size_t keyB)
+        { WRAPPER_NO_CONTRACT; return pElem && (keyA == KeyA()) && (keyB == KeyB()); }
+    virtual size_t KeyA()
+        { LIMITED_METHOD_CONTRACT; return pElem != NULL ? pElem->token : 0; }
+    virtual size_t KeyB()
+        { LIMITED_METHOD_CONTRACT; return pElem != NULL ? (size_t) pElem->pMT : 0; }
+
+    //contents is the struct or token that the entry exposes
+    virtual void SetContents(size_t contents)
+    {
+        LIMITED_METHOD_CONTRACT;
+        pElem = (ResolveCacheElem*) contents;
+    }
+
+    inline const BYTE *Target()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return pElem != NULL ? (const BYTE *)pElem->target : NULL;
+    }
+
+private:
+    ResolveCacheElem *pElem;
+};
+
+/**********************************************************************************************
+ResolveEntry wraps ResolveStubs and provide the concrete implementation of the abstract class Entry.
+Polymorphic call sites and monomorphic calls that fail end up in a ResolveStub.  Resolve stubs
+are stored in hash tables keyed by token, hence the Equals function uses the embedded token in
+the stub for comparison purposes.  Since we are willing to allow duplicates in the hash table (as
+long as they are relatively rare) we do use direct comparison of the tokens rather than extracting
+the fields from within the tokens, for perf reasons. */
+class ResolveEntry : public Entry
+{
+public:
+    //Creates an entry that wraps resolve stub s
+    ResolveEntry (size_t s)
+    {
+        LIMITED_METHOD_CONTRACT;
+        _ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)s));
+        stub = (ResolveStub*) s;
+    }
+    //default contructor to allow stack and inline allocation of resovler entries
+    ResolveEntry()  { LIMITED_METHOD_CONTRACT;    stub = CALL_STUB_EMPTY_ENTRY; }
+
+    //implementations of abstract class Entry
+    inline BOOL Equals(size_t keyA, size_t keyB)
+         { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); }
+    inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); }
+    inline size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; }
+
+    void SetContents(size_t contents)
+    {
+        LIMITED_METHOD_CONTRACT;
+        _ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)contents));
+        stub = ResolveHolder::FromResolveEntry((PCODE)contents)->stub();
+    }
+    //extract the token of the underlying resolve stub
+    inline size_t Token()  { WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->token()) : 0; }
+private:
+    ResolveStub* stub;          //the stub the entry is wrapping
+};
+
+/**********************************************************************************************
+DispatchEntry wraps DispatchStubs and provide the concrete implementation of the abstract class Entry.
+Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs.  Dispatch stubs
+are placed in hash and cache tables keyed by the expected Method Table and token they are built for.
+Since we are willing to allow duplicates in the hash table (as long as they are relatively rare)
+we do use direct comparison of the tokens rather than extracting the fields from within the tokens,
+for perf reasons.*/
+class DispatchEntry : public Entry
+{
+public:
+    //Creates an entry that wraps dispatch stub s
+    DispatchEntry (size_t s)
+    {
+        LIMITED_METHOD_CONTRACT;
+        _ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)s));
+        stub = (DispatchStub*) s;
+    }
+    //default contructor to allow stack and inline allocation of resovler entries
+    DispatchEntry()                       { LIMITED_METHOD_CONTRACT;    stub = CALL_STUB_EMPTY_ENTRY; }
+
+    //implementations of abstract class Entry
+    inline BOOL Equals(size_t keyA, size_t keyB)
+         { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); }
+    inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); }
+    inline size_t KeyB() { WRAPPER_NO_CONTRACT; return ExpectedMT();}
+
+    void SetContents(size_t contents)
+    {
+        LIMITED_METHOD_CONTRACT;
+        _ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)contents));
+        stub = DispatchHolder::FromDispatchEntry((PCODE)contents)->stub();
+    }
+
+    //extract the fields of the underlying dispatch stub
+    inline size_t ExpectedMT()
+          { WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->expectedMT()) : 0; }
+
+    size_t Token()
+    {
+        WRAPPER_NO_CONTRACT;
+        if (stub)
+        {
+            ResolveHolder * resolveHolder = ResolveHolder::FromFailEntry(stub->failTarget());
+            size_t token = resolveHolder->stub()->token();
+            _ASSERTE(token == VirtualCallStubManager::GetTokenFromStub((PCODE)stub));
+            return token;
+        }
+        else
+        {
+            return 0;
+        }
+    }
+
+    inline PCODE Target()
+          { WRAPPER_NO_CONTRACT; return stub ? stub->implTarget()  : 0; }
+
+private:
+    DispatchStub* stub;
+};
+
+/*************************************************************************************************
+DispatchCache is the cache table that the resolve stubs use for inline polymorphic resolution
+of a call.  The cache entry is logically a triplet of (method table, token, impl address) where method table
+is the type of the calling frame's <this>, token identifies the method being invoked,
+i.e. is a (type id,slot #) pair, and impl address is the address of the method implementation.
+*/
+class DispatchCache
+{
+public:
+    static const UINT16 INVALID_HASH = (UINT16)(-1);
+
+    DispatchCache();
+
+    //read and write the cache keyed by (method table,token) pair.
+    inline ResolveCacheElem* Lookup(size_t token, void* mt)
+        { WRAPPER_NO_CONTRACT; return Lookup(token, INVALID_HASH, mt);}
+
+    ResolveCacheElem* Lookup(size_t token, UINT16 tokenHash, void* mt);
+
+    enum InsertKind {IK_NONE, IK_DISPATCH, IK_RESOLVE, IK_SHARED, IK_EXTERNAL};
+
+    BOOL Insert(ResolveCacheElem* elem, InsertKind insertKind);
+#ifdef CHAIN_LOOKUP 
+    void PromoteChainEntry(ResolveCacheElem* elem);
+#endif
+
+    // This is the heavyweight hashing algorithm. Use sparingly.
+    static UINT16 HashToken(size_t token);
+
+    inline void GetLoadFactor(size_t *total, size_t *used)
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        *total = CALL_STUB_CACHE_SIZE;
+        size_t count = 0;
+        for (size_t i = 0; i < CALL_STUB_CACHE_SIZE; i++)
+            if (cache[i] != empty)
+                count++;
+        *used = count;
+    }
+
+    inline void *GetCacheBaseAddr()
+        { LIMITED_METHOD_CONTRACT; return &cache[0]; }
+    inline size_t GetCacheCount()
+        { LIMITED_METHOD_CONTRACT; return CALL_STUB_CACHE_SIZE; }
+    inline ResolveCacheElem *GetCacheEntry(size_t idx)
+        { LIMITED_METHOD_CONTRACT; return VolatileLoad(&cache[idx]); }
+    inline BOOL IsCacheEntryEmpty(size_t idx)
+        { LIMITED_METHOD_CONTRACT; return cache[idx] == empty; }
+
+    inline void SetCacheEntry(size_t idx, ResolveCacheElem *elem)
+    {
+        LIMITED_METHOD_CONTRACT;
+#ifdef STUB_LOGGING 
+          cacheData[idx].numWrites++;
+#endif
+#ifdef CHAIN_LOOKUP 
+        CONSISTENCY_CHECK(m_writeLock.OwnedByCurrentThread());
+#endif
+          cache[idx] = elem;
+        }
+
+    inline void ClearCacheEntry(size_t idx)
+    {
+        LIMITED_METHOD_CONTRACT;
+#ifdef STUB_LOGGING 
+          cacheData[idx].numClears++;
+#endif
+          cache[idx] = empty;
+        }
+
+    struct
+    {
+        UINT32 insert_cache_external;     //# of times Insert was called for IK_EXTERNAL
+        UINT32 insert_cache_shared;       //# of times Insert was called for IK_SHARED
+        UINT32 insert_cache_dispatch;     //# of times Insert was called for IK_DISPATCH
+        UINT32 insert_cache_resolve;      //# of times Insert was called for IK_RESOLVE
+        UINT32 insert_cache_hit;          //# of times Insert found an empty cache entry
+        UINT32 insert_cache_miss;         //# of times Insert already had a matching cache entry
+        UINT32 insert_cache_collide;      //# of times Insert found a used cache entry
+        UINT32 insert_cache_write;        //# of times Insert wrote a cache entry
+    } stats;
+
+    void LogStats();
+
+    // Unlocked iterator of entries. Use only when read/write access to the cache
+    // is safe. This would typically be at GC sync points, currently needed during
+    // appdomain unloading.
+    class Iterator
+    {
+      public:
+        Iterator(DispatchCache *pCache);
+        inline BOOL IsValid()
+        { WRAPPER_NO_CONTRACT; return (m_curBucket < (INT32)m_pCache->GetCacheCount()); }
+        void Next();
+        // Unlink the current entry.
+        // **NOTE** Using this method implicitly performs a call to Next to move
+        //          past the unlinked entry. Thus, one could accidentally skip
+        //          entries unless you take this into consideration.
+        ResolveCacheElem *UnlinkEntry();
+        inline ResolveCacheElem *Entry()
+        { LIMITED_METHOD_CONTRACT; CONSISTENCY_CHECK(IsValid()); return *m_ppCurElem; }
+
+      private:
+        void NextValidBucket();
+        inline void NextBucket()
+        { LIMITED_METHOD_CONTRACT; m_curBucket++; m_ppCurElem = &m_pCache->cache[m_curBucket]; }
+
+        DispatchCache     *m_pCache;
+        INT32              m_curBucket;
+        ResolveCacheElem **m_ppCurElem;
+    };
+
+private:
+#ifdef CHAIN_LOOKUP 
+    Crst m_writeLock;
+#endif
+
+    //the following hash computation is also inlined in the resolve stub in asm (SO NO TOUCHIE)
+    inline static UINT16 HashMT(UINT16 tokenHash, void* mt)
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        UINT16 hash;
+
+        size_t mtHash = (size_t) mt;
+        mtHash = (((mtHash >> CALL_STUB_CACHE_NUM_BITS) + mtHash) >> LOG2_PTRSIZE) & CALL_STUB_CACHE_MASK;
+        hash  = (UINT16) mtHash;
+
+        hash ^= (tokenHash & CALL_STUB_CACHE_MASK);
+
+        return hash;
+    }
+
+    ResolveCacheElem* cache[CALL_STUB_CACHE_SIZE]; //must be first
+    ResolveCacheElem* empty;                    //empty entry, initialized to fail all comparisons
+#ifdef STUB_LOGGING 
+public:
+    struct CacheEntryData {
+        UINT32 numWrites;
+        UINT16 numClears;
+    };
+    CacheEntryData cacheData[CALL_STUB_CACHE_SIZE];
+#endif // STUB_LOGGING
+};
+
+/**************************************************************************************************
+The hash tables are accessed via instances of the Prober.  Prober is a probe into a bucket
+of the hash table, and therefore has an index which is the current probe position.
+It includes a count of the number of probes done in that bucket so far and a stride
+to step thru the bucket with.  To do comparisons, it has a reference to an entry with which
+it can do comparisons (Equals(...)) of the entries (stubs) inside the hash table.  It also has
+the key pair (keyA, keyB) that it is looking for.
+
+Typically, an entry of the appropriate type is created on the stack and then the prober is created passing
+in a reference to the entry.  The prober is used for a  complete operation, such as look for and find an
+entry (stub), creating and inserting it as necessary.
+
+The initial index and the stride are orthogonal hashes of the key pair, i.e. we are doing a varient of
+double hashing.  When we initialize the prober (see FormHash below) we set the initial probe based on
+one hash.  The stride (used as a modulo addition of the probe position) is based on a different hash and
+is such that it will vist every location in the bucket before repeating.  Hence it is imperative that
+the bucket size and the stride be relative prime wrt each other.  We have chosen to make bucket sizes
+a power of 2, so we force stride to be odd.
+
+Note -- it must be assumed that multiple probers are walking the same tables and buckets at the same time.
+Additionally, the counts may not be accurate, and there may be duplicates in the tables.  Since the tables
+do not allow concurrrent deletion, some of the concurrency issues are ameliorated.
+*/
+class Prober
+{
+    friend class FastTable;
+    friend class BucketTable;
+public:
+    Prober(Entry* e) {LIMITED_METHOD_CONTRACT; comparer = e;}
+    //find the requested entry, if not there return CALL_STUB_EMPTY_ENTRY
+    size_t Find();
+    //add the entry into the bucket, if it is not already in the bucket.
+    //return the entry actually in the bucket (existing or added)
+    size_t Add(size_t entry);
+private:
+    //return the bucket (FastTable*) that the prober is currently walking
+    inline size_t* items() {LIMITED_METHOD_CONTRACT; return &base[-CALL_STUB_FIRST_INDEX];}
+    //are there more probes possible, or have we probed everything in the bucket
+    inline BOOL NoMore() {LIMITED_METHOD_CONTRACT; return probes>mask;} //both probes and mask are (-1)
+    //advance the probe to a new place in the bucket
+    inline BOOL Next()
+    {
+        WRAPPER_NO_CONTRACT;
+        index = (index + stride) & mask;
+        probes++;
+        return !NoMore();
+    }
+    inline size_t Read()
+    {
+        LIMITED_METHOD_CONTRACT;
+        _ASSERTE(base);
+        return VolatileLoad(&base[index]);
+    }
+    //initialize a prober across a bucket (table) for the specified keys.
+    void InitProber(size_t key1, size_t key2, size_t* table);
+    //set up the initial index and stride and probe count
+    inline void FormHash()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        probes = 0;
+        //these two hash functions have not been formally measured for effectiveness
+        //but they are at least orthogonal
+
+        size_t a = ((keyA>>16) + keyA);
+        size_t b = ((keyB>>16) ^ keyB);
+        index    = (((a*CALL_STUB_HASH_CONST1)>>4)+((b*CALL_STUB_HASH_CONST2)>>4)+CALL_STUB_HASH_CONST1) & mask;
+        stride   = ((a+(b*CALL_STUB_HASH_CONST1)+CALL_STUB_HASH_CONST2) | 1) & mask;
+    }
+    //atomically grab an empty slot so we can insert a new entry into the bucket
+    BOOL GrabEntry(size_t entryValue);
+    size_t keyA;        //key pair we are looking for
+    size_t keyB;
+    size_t* base;       //we have our own pointer to the bucket, so races don't matter.
+                        //  We won't care if we do the lookup in an
+                        //  outdated bucket (has grown out from under us).
+                        //  All that will happen is possibly dropping an entry
+                        //  on the floor or adding a duplicate.
+    size_t index;       //current probe point in the bucket
+    size_t stride;      //amount to step on each successive probe, must be relatively prime wrt the bucket size
+    size_t mask;        //size of bucket - 1
+    size_t probes;      //number probes - 1
+    Entry* comparer;//used to compare an entry against the sought after key pair
+};
+
+/********************************************************************************************************
+FastTable is used to implement the buckets of a BucketTable, a bucketized hash table.  A FastTable is
+an array of entries (contents).  The first two slots of contents store the size-1 and count of entries
+actually in the FastTable.  Note that the count may be inaccurate and there may be duplicates.  Careful
+attention must be paid to eliminate the need for interlocked or serialized or locked operations in face
+of concurrency.
+*/
+#ifdef _MSC_VER 
+#pragma warning(push)
+#pragma warning(disable : 4200)     // disable zero-sized array warning
+#endif // _MSC_VER
+class FastTable
+{
+    friend class BucketTable;
+public:
+private:
+    FastTable() { LIMITED_METHOD_CONTRACT; }
+    ~FastTable() { LIMITED_METHOD_CONTRACT; }
+
+    //initialize a prober for the specified keys.
+    inline BOOL SetUpProber(size_t keyA, size_t keyB, Prober* probe)
+    {
+        CONTRACTL {
+            NOTHROW;
+            GC_NOTRIGGER;
+            FORBID_FAULT;
+        } CONTRACTL_END;
+
+        _ASSERTE(probe);
+        _ASSERTE(contents);
+        probe->InitProber(keyA, keyB, &contents[0]);
+        return TRUE;
+    }
+    //find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY
+    size_t Find(Prober* probe);
+    //add the entry, if it is not already there.  Probe is used to search.
+    //Return the entry actually containted (existing or added)
+    size_t Add(size_t entry, Prober* probe);
+    void IncrementCount();
+
+    // Create a FastTable with space for numberOfEntries. Please note that this method
+    // does not throw on OOM. **YOU MUST CHECK FOR NULL RETURN**
+    static FastTable* MakeTable(size_t numberOfEntries)
+    {
+        CONTRACTL {
+            THROWS;
+            GC_TRIGGERS;
+            INJECT_FAULT(COMPlusThrowOM(););
+        } CONTRACTL_END;
+
+        size_t size = CALL_STUB_MIN_ENTRIES;
+        while (size < numberOfEntries) {size = size<<1;}
+//        if (size == CALL_STUB_MIN_ENTRIES)
+//            size += 3;
+        size_t* bucket = new size_t[(sizeof(FastTable)/sizeof(size_t))+size+CALL_STUB_FIRST_INDEX];
+        FastTable* table = new (bucket) FastTable();
+        table->InitializeContents(size);
+        return table;
+    }
+    //Initialize as empty
+    void InitializeContents(size_t size)
+    {
+        LIMITED_METHOD_CONTRACT;
+        memset(&contents[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(BYTE*));
+        contents[CALL_STUB_MASK_INDEX] = size-1;
+    }
+    inline size_t tableMask() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_MASK_INDEX]);}
+    inline size_t tableSize() {LIMITED_METHOD_CONTRACT; return tableMask()+1;}
+    inline size_t tableCount() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_COUNT_INDEX]);}
+    inline BOOL isFull()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (tableCount()+1) * 100 / CALL_STUB_LOAD_FACTOR >= tableSize();
+    }
+    //we store (size-1) in bucket[CALL_STUB_MASK_INDEX==0],
+    //we store the used count in bucket[CALL_STUB_COUNT_INDEX==1],
+    //we have an unused cell to use as a temp at bucket[CALL_STUB_DEAD_LINK==2],
+    //and the table starts at bucket[CALL_STUB_FIRST_INDEX==3],
+    size_t contents[];
+};
+#ifdef _MSC_VER 
+#pragma warning(pop)
+#endif
+
+/******************************************************************************************************
+BucketTable is a bucketized hash table.  It uses FastTables for its buckets.  The hash tables
+used by the VirtualCallStubManager are BucketTables.  The number of buckets is fixed at the time
+the table is created.  The actual buckets are allocated as needed, and grow as necessary.  The reason
+for using buckets is primarily to reduce the cost of growing, since only a single bucket is actually
+grown at any given time.  Since the hash tables are accessed infrequently, the load factor that
+controls growth is quite high (90%).  Since we use hashing to pick the bucket, and we use hashing to
+lookup inside the bucket, it is important that the hashing function used here is orthogonal to the ones
+used in the buckets themselves (see FastTable::FormHash).
+*/
+class BucketTable
+{
+public:
+    BucketTable(size_t numberOfBuckets)
+    {
+        WRAPPER_NO_CONTRACT;
+        size_t size = CALL_STUB_MIN_BUCKETS;
+        while (size < numberOfBuckets) {size = size<<1;}
+        buckets = AllocateBuckets(size);
+        // Initialize statistics counters
+        memset(&stats, 0, sizeof(stats));
+    }
+
+    ~BucketTable()
+    {
+        LIMITED_METHOD_CONTRACT;
+        if(buckets != NULL)
+        {
+            size_t size = bucketCount()+CALL_STUB_FIRST_INDEX;
+            for(size_t ix = CALL_STUB_FIRST_INDEX; ix < size; ix++) delete (FastTable*)(buckets[ix]);
+            delete buckets;
+        }
+    }
+
+    //initialize a prober for the specified keys.
+    BOOL SetUpProber(size_t keyA, size_t keyB, Prober *prober);
+    //find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY
+    inline size_t Find(Prober* probe) {WRAPPER_NO_CONTRACT; return probe->Find();}
+    //add the entry, if it is not already there.  Probe is used to search.
+    size_t Add(size_t entry, Prober* probe);
+    //reclaim abandoned buckets.  Buckets are abaondoned when they need to grow.
+    //needs to be called inside a gc sync point.
+    static void Reclaim();
+
+    struct
+    {
+        UINT32 bucket_space;                    //# of bytes in caches and tables, not including the stubs themselves
+        UINT32 bucket_space_dead;               //# of bytes of abandoned buckets not yet recycled.
+    } stats;
+
+    void LogStats();
+
+private:
+    inline size_t bucketMask() {LIMITED_METHOD_CONTRACT; return (size_t) (buckets[CALL_STUB_MASK_INDEX]);}
+    inline size_t bucketCount() {LIMITED_METHOD_CONTRACT; return bucketMask()+1;}
+    inline size_t ComputeBucketIndex(size_t keyA, size_t keyB)
+    {
+        LIMITED_METHOD_CONTRACT;
+        size_t a = ((keyA>>16) + keyA);
+        size_t b = ((keyB>>16) ^ keyB);
+        return CALL_STUB_FIRST_INDEX+(((((a*CALL_STUB_HASH_CONST2)>>5)^((b*CALL_STUB_HASH_CONST1)>>5))+CALL_STUB_HASH_CONST2) & bucketMask());
+    }
+    //grows the bucket referenced by probe.
+    BOOL GetMoreSpace(const Prober* probe);
+    //creates storage in which to store references to the buckets
+    static size_t* AllocateBuckets(size_t size)
+    {
+        LIMITED_METHOD_CONTRACT;
+        size_t* buckets = new size_t[size+CALL_STUB_FIRST_INDEX];
+        if (buckets != NULL)
+        {
+            memset(&buckets[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(void*));
+            buckets[CALL_STUB_MASK_INDEX] =  size-1;
+        }
+        return buckets;
+    }
+    inline size_t Read(size_t index)
+    {
+        LIMITED_METHOD_CONTRACT;
+        CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX);
+        return VolatileLoad(&buckets[index]);
+    }
+
+#ifdef _MSC_VER
+#pragma warning(disable: 4267) //work-around for the compiler
+#endif
+    inline void Write(size_t index, size_t value)
+    {
+        LIMITED_METHOD_CONTRACT;
+        CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX);
+        VolatileStore(&buckets[index], value);
+    }
+#ifdef _MSC_VER
+#pragma warning(default: 4267)
+#endif
+
+    // We store (#buckets-1) in    bucket[CALL_STUB_MASK_INDEX  ==0]
+    // We have two unused cells at bucket[CALL_STUB_COUNT_INDEX ==1]
+    //                         and bucket[CALL_STUB_DEAD_LINK   ==2]
+    // and the table starts at     bucket[CALL_STUB_FIRST_INDEX ==3]
+    // the number of elements is   bucket[CALL_STUB_MASK_INDEX]+CALL_STUB_FIRST_INDEX
+    size_t* buckets;
+    static FastTable* dead;             //linked list head of to be deleted (abandoned) buckets
+};
+
+#endif // !CROSSGEN_COMPILE
+
+#endif // !_VIRTUAL_CALL_STUB_H