summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFadi Hanna <fadim@microsoft.com>2018-11-13 12:44:49 -0800
committerGitHub <noreply@github.com>2018-11-13 12:44:49 -0800
commitd2cd0df600cfa697d30cf7ef5a7cf1c3c33b9959 (patch)
tree5780e415551b7aa0d43cc18d04c977c014a4d3a3
parentd8bfe11f24598e6e909e6e49aea8fba3925c91b7 (diff)
downloadcoreclr-d2cd0df600cfa697d30cf7ef5a7cf1c3c33b9959.tar.gz
coreclr-d2cd0df600cfa697d30cf7ef5a7cf1c3c33b9959.tar.bz2
coreclr-d2cd0df600cfa697d30cf7ef5a7cf1c3c33b9959.zip
Optimize vtable calls (#20696)
* Implementation of R2R vtable call thunks. These thunks will fetch the target code pointer from the vtable of the input thisPtr, and jump to that address. This is especially helpful with generics, since we can avoid a generic dictionary lookup cost for a simple vtable call. Overall, these thunks cause the CPU to have less branch mispredictions, and give a small performance boost to vtable calls. These stubs are under VirtualCallStubManager so that the managed debugger can handle stepping through them.
-rw-r--r--src/vm/amd64/virtualcallstubcpu.hpp113
-rw-r--r--src/vm/arm/stubs.cpp20
-rw-r--r--src/vm/arm/virtualcallstubcpu.hpp182
-rw-r--r--src/vm/arm64/virtualcallstubcpu.hpp158
-rw-r--r--src/vm/i386/virtualcallstubcpu.hpp113
-rw-r--r--src/vm/jitinterface.cpp17
-rw-r--r--src/vm/loaderallocator.cpp3
-rw-r--r--src/vm/prestub.cpp22
-rw-r--r--src/vm/virtualcallstub.cpp130
-rw-r--r--src/vm/virtualcallstub.h79
-rw-r--r--src/zap/zapinfo.cpp35
11 files changed, 802 insertions, 70 deletions
diff --git a/src/vm/amd64/virtualcallstubcpu.hpp b/src/vm/amd64/virtualcallstubcpu.hpp
index 1bfe858d5f..7547559df0 100644
--- a/src/vm/amd64/virtualcallstubcpu.hpp
+++ b/src/vm/amd64/virtualcallstubcpu.hpp
@@ -63,7 +63,7 @@ struct LookupStub
inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
- inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
+ inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
private:
friend struct LookupHolder;
@@ -430,6 +430,65 @@ struct ResolveHolder
private:
ResolveStub _stub;
};
+
+/*VTableCallStub**************************************************************************************
+These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed
+in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the
+vtable pointer, and finally jumps to the target method at a given slot in the vtable.
+*/
+struct VTableCallStub
+{
+ friend struct VTableCallHolder;
+
+ inline size_t size()
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ BYTE* pStubCode = (BYTE *)this;
+
+ size_t cbSize = 3; // First mov instruction
+ cbSize += (pStubCode[cbSize + 2] == 0x80 ? 7 : 4); // Either 48 8B 80 or 48 8B 40: mov rax,[rax+offset]
+ cbSize += (pStubCode[cbSize + 1] == 0xa0 ? 6 : 3); // Either FF A0 or FF 60: jmp qword ptr [rax+slot]
+ cbSize += 4; // Slot value (data storage, not a real instruction)
+
+ return cbSize;
+ }
+
+ inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
+
+ inline size_t token()
+ {
+ LIMITED_METHOD_CONTRACT;
+ DWORD slot = *(DWORD*)(reinterpret_cast<BYTE*>(this) + size() - 4);
+ return DispatchToken::CreateDispatchToken(slot).To_SIZE_T();
+ }
+
+private:
+ BYTE _entryPoint[0]; // Dynamically sized stub. See Initialize() for more details.
+};
+
+/* VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of
+stubs as necessary. */
+struct VTableCallHolder
+{
+ void Initialize(unsigned slot);
+
+ VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<VTableCallStub *>(this); }
+
+ static size_t GetHolderSize(unsigned slot)
+ {
+ STATIC_CONTRACT_WRAPPER;
+ unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ return 3 + (offsetOfIndirection >= 0x80 ? 7 : 4) + (offsetAfterIndirection >= 0x80 ? 6 : 3) + 4;
+ }
+
+ static VTableCallHolder* VTableCallHolder::FromVTableCallEntry(PCODE entry) { LIMITED_METHOD_CONTRACT; return (VTableCallHolder*)entry; }
+
+private:
+ // VTableCallStub follows here. It is dynamically sized on allocation because it could
+ // use short/long instruction sizes for mov/jmp, depending on the slot value.
+};
#pragma pack(pop)
#ifdef DECLARE_DATA
@@ -732,6 +791,54 @@ ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry)
return resolveHolder;
}
+void VTableCallHolder::Initialize(unsigned slot)
+{
+ unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ _ASSERTE(MethodTable::VTableIndir_t::isRelative == false /* TODO: NYI */);
+
+ VTableCallStub* pStub = stub();
+ BYTE* p = (BYTE*)pStub->entryPoint();
+
+#ifdef UNIX_AMD64_ABI
+ // mov rax,[rdi] : rax = MethodTable pointer
+ *(UINT32 *)p = 0x078b48; p += 3;
+#else
+ // mov rax,[rcx] : rax = MethodTable pointer
+ *(UINT32 *)p = 0x018b48; p += 3;
+#endif
+
+ // mov rax,[rax+vtable offset] : rax = vtable pointer
+ if (offsetOfIndirection >= 0x80)
+ {
+ *(UINT32*)p = 0x00808b48; p += 3;
+ *(UINT32*)p = offsetOfIndirection; p += 4;
+ }
+ else
+ {
+ *(UINT32*)p = 0x00408b48; p += 3;
+ *p++ = (BYTE)offsetOfIndirection;
+ }
+
+ // jmp qword ptr [rax+slot]
+ if (offsetAfterIndirection >= 0x80)
+ {
+ *(UINT32*)p = 0xa0ff; p += 2;
+ *(UINT32*)p = offsetAfterIndirection; p += 4;
+ }
+ else
+ {
+ *(UINT16*)p = 0x60ff; p += 2;
+ *p++ = (BYTE)offsetAfterIndirection;
+ }
+
+ // Store the slot value here for convenience. Not a real instruction (unreachable anyways)
+ *(UINT32*)p = slot; p += 4;
+
+ _ASSERT(p == (BYTE*)stub()->entryPoint() + VTableCallHolder::GetHolderSize(slot));
+ _ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot));
+}
+
VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress)
{
#ifdef DACCESS_COMPILE
@@ -763,6 +870,10 @@ VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE s
{
stubKind = SK_LOOKUP;
}
+ else if (firstWord == 0x8B48)
+ {
+ stubKind = SK_VTABLECALL;
+ }
else
{
BYTE firstByte = ((BYTE*) stubStartAddress)[0];
diff --git a/src/vm/arm/stubs.cpp b/src/vm/arm/stubs.cpp
index 01d8f319d4..d863900eec 100644
--- a/src/vm/arm/stubs.cpp
+++ b/src/vm/arm/stubs.cpp
@@ -3387,6 +3387,16 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target)
}
#endif // FEATURE_COMINTEROP
+void MovRegImm(BYTE* p, int reg, TADDR imm)
+{
+ LIMITED_METHOD_CONTRACT;
+ *(WORD *)(p + 0) = 0xF240;
+ *(WORD *)(p + 2) = (UINT16)(reg << 8);
+ *(WORD *)(p + 4) = 0xF2C0;
+ *(WORD *)(p + 6) = (UINT16)(reg << 8);
+ PutThumb2Mov32((UINT16 *)p, imm);
+}
+
#ifndef DACCESS_COMPILE
#ifndef CROSSGEN_COMPILE
@@ -3411,16 +3421,6 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target)
ClrFlushInstructionCache(pStart, cbAligned); \
return (PCODE)((TADDR)pStart | THUMB_CODE)
-static void MovRegImm(BYTE* p, int reg, TADDR imm)
-{
- LIMITED_METHOD_CONTRACT;
- *(WORD *)(p + 0) = 0xF240;
- *(WORD *)(p + 2) = (UINT16)(reg << 8);
- *(WORD *)(p + 4) = 0xF2C0;
- *(WORD *)(p + 6) = (UINT16)(reg << 8);
- PutThumb2Mov32((UINT16 *)p, imm);
-}
-
PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
{
STANDARD_VM_CONTRACT;
diff --git a/src/vm/arm/virtualcallstubcpu.hpp b/src/vm/arm/virtualcallstubcpu.hpp
index a1e15d3661..6dc99e5093 100644
--- a/src/vm/arm/virtualcallstubcpu.hpp
+++ b/src/vm/arm/virtualcallstubcpu.hpp
@@ -55,9 +55,9 @@ get quickly changed to point to another kind of stub.
*/
struct LookupStub
{
- inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0] + THUMB_CODE; }
- inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
- inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
+ inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0] + THUMB_CODE; }
+ inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
+ inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
private:
friend struct LookupHolder;
@@ -259,6 +259,87 @@ struct ResolveHolder
private:
ResolveStub _stub;
};
+
+/*VTableCallStub**************************************************************************************
+These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed
+in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the
+vtable pointer, and finally jumps to the target method at a given slot in the vtable.
+*/
+struct VTableCallStub
+{
+ friend struct VTableCallHolder;
+
+ inline size_t size()
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ BYTE* pStubCode = (BYTE *)this;
+
+ size_t cbSize = 4; // First ldr instruction
+
+ // If we never save r0 to the red zone, we have the short version of the stub
+ if (*(UINT32*)(&pStubCode[cbSize]) != 0x0c04f84d)
+ {
+ return
+ 4 + // ldr r12,[r0]
+ 4 + // ldr r12,[r12+offset]
+ 4 + // ldr r12,[r12+offset]
+ 2 + // bx r12
+ 4; // Slot value (data storage, not a real instruction)
+ }
+
+ cbSize += 4; // Saving r0 into red zone
+ cbSize += (*(WORD*)(&pStubCode[cbSize]) == 0xf8dc ? 4 : 12); // Loading of vtable into r12
+ cbSize += (*(WORD*)(&pStubCode[cbSize]) == 0xf8dc ? 4 : 12); // Loading of targe address into r12
+
+ return cbSize + 6 /* Restore r0, bx*/ + 4 /* Slot value */;
+ }
+
+ inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0] + THUMB_CODE; }
+
+ inline size_t token()
+ {
+ LIMITED_METHOD_CONTRACT;
+ DWORD slot = *(DWORD*)(reinterpret_cast<BYTE*>(this) + size() - 4);
+ return DispatchToken::CreateDispatchToken(slot).To_SIZE_T();
+ }
+
+private:
+ BYTE _entryPoint[0]; // Dynamically sized stub. See Initialize() for more details.
+};
+
+/* VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of
+stubs as necessary. */
+struct VTableCallHolder
+{
+ void Initialize(unsigned slot);
+
+ VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<VTableCallStub *>(this); }
+
+ static size_t GetHolderSize(unsigned slot)
+ {
+ STATIC_CONTRACT_WRAPPER;
+ unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
+
+ int indirectionsSize = (offsetOfIndirection > 0xFFF ? 12 : 4) + (offsetAfterIndirection > 0xFFF ? 12 : 4);
+ if (offsetOfIndirection > 0xFFF || offsetAfterIndirection > 0xFFF)
+ indirectionsSize += 8; // Save/restore r0 using red zone
+
+ return 6 + indirectionsSize + 4;
+ }
+
+ static VTableCallHolder* VTableCallHolder::FromVTableCallEntry(PCODE entry)
+ {
+ LIMITED_METHOD_CONTRACT;
+ return (VTableCallHolder*)(entry & ~THUMB_CODE);
+ }
+
+private:
+ // VTableCallStub follows here. It is dynamically sized on allocation because it could
+ // use short/long instruction sizes for the mov/jmp, depending on the slot value.
+};
+
#include <poppack.h>
@@ -324,6 +405,69 @@ ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry)
return resolveHolder;
}
+void MovRegImm(BYTE* p, int reg, TADDR imm);
+
+void VTableCallHolder::Initialize(unsigned slot)
+{
+ unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ _ASSERTE(MethodTable::VTableIndir_t::isRelative == false /* TODO: NYI */);
+
+ VTableCallStub* pStub = stub();
+ BYTE* p = (BYTE*)(pStub->entryPoint() & ~THUMB_CODE);
+
+ // ldr r12,[r0] : r12 = MethodTable pointer
+ *(UINT32*)p = 0xc000f8d0; p += 4;
+
+ if (offsetOfIndirection > 0xFFF || offsetAfterIndirection > 0xFFF)
+ {
+ // str r0, [sp, #-4]. Save r0 in the red zone
+ *(UINT32*)p = 0x0c04f84d; p += 4;
+ }
+
+ if (offsetOfIndirection > 0xFFF)
+ {
+ // mov r0, offsetOfIndirection
+ MovRegImm(p, 0, offsetOfIndirection); p += 8;
+ // ldr r12, [r12, r0]
+ *(UINT32*)p = 0xc000f85c; p += 4;
+ }
+ else
+ {
+ // ldr r12, [r12 + offset]
+ *(WORD *)p = 0xf8dc; p += 2;
+ *(WORD *)p = (WORD)(offsetOfIndirection | 0xc000); p += 2;
+ }
+
+ if (offsetAfterIndirection > 0xFFF)
+ {
+ // mov r0, offsetAfterIndirection
+ MovRegImm(p, 0, offsetAfterIndirection); p += 8;
+ // ldr r12, [r12, r0]
+ *(UINT32*)p = 0xc000f85c; p += 4;
+ }
+ else
+ {
+ // ldr r12, [r12 + offset]
+ *(WORD *)p = 0xf8dc; p += 2;
+ *(WORD *)p = (WORD)(offsetAfterIndirection | 0xc000); p += 2;
+ }
+
+ if (offsetOfIndirection > 0xFFF || offsetAfterIndirection > 0xFFF)
+ {
+ // ldr r0, [sp, #-4]. Restore r0 from the red zone.
+ *(UINT32*)p = 0x0c04f85d; p += 4;
+ }
+
+ // bx r12
+ *(UINT16*)p = 0x4760; p += 2;
+
+ // Store the slot value here for convenience. Not a real instruction (unreachable anyways)
+ *(UINT32*)p = slot; p += 4;
+
+ _ASSERT(p == (BYTE*)(stub()->entryPoint() & ~THUMB_CODE) + VTableCallHolder::GetHolderSize(slot));
+ _ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot));
+}
#endif // DACCESS_COMPILE
@@ -347,23 +491,35 @@ VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE s
WORD firstWord = *((WORD*) pInstr);
- //Assuming that RESOLVE_STUB_FIRST_WORD & DISPATCH_STUB_FIRST_WORD have same values
- if (firstWord == DISPATCH_STUB_FIRST_WORD)
+ if (*((UINT32*)pInstr) == 0xc000f8d0)
{
+ // Confirm the thrid word belongs to the vtable stub pattern
WORD thirdWord = ((WORD*)pInstr)[2];
- if(thirdWord == 0xf84d)
+ if (thirdWord == 0xf84d /* Part of str r0, [sp, #-4] */ ||
+ thirdWord == 0xf8dc /* Part of ldr r12, [r12 + offset] */)
+ stubKind = SK_VTABLECALL;
+ }
+
+ if (stubKind == SK_UNKNOWN)
+ {
+ //Assuming that RESOLVE_STUB_FIRST_WORD & DISPATCH_STUB_FIRST_WORD have same values
+ if (firstWord == DISPATCH_STUB_FIRST_WORD)
{
- stubKind = SK_DISPATCH;
+ WORD thirdWord = ((WORD*)pInstr)[2];
+ if (thirdWord == 0xf84d)
+ {
+ stubKind = SK_DISPATCH;
+ }
+ else if (thirdWord == 0xb460)
+ {
+ stubKind = SK_RESOLVE;
+ }
}
- else if(thirdWord == 0xb460)
+ else if (firstWord == 0xf8df)
{
- stubKind = SK_RESOLVE;
+ stubKind = SK_LOOKUP;
}
}
- else if (firstWord == 0xf8df)
- {
- stubKind = SK_LOOKUP;
- }
}
EX_CATCH
{
diff --git a/src/vm/arm64/virtualcallstubcpu.hpp b/src/vm/arm64/virtualcallstubcpu.hpp
index 3f225186a7..c7b3f75e68 100644
--- a/src/vm/arm64/virtualcallstubcpu.hpp
+++ b/src/vm/arm64/virtualcallstubcpu.hpp
@@ -9,6 +9,7 @@
#define DISPATCH_STUB_FIRST_DWORD 0xf940000d
#define RESOLVE_STUB_FIRST_DWORD 0xF940000C
+#define VTABLECALL_STUB_FIRST_DWORD 0xF9400009
struct ARM64EncodeHelpers
{
@@ -386,6 +387,87 @@ private:
ResolveStub _stub;
};
+
+/*VTableCallStub**************************************************************************************
+These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed
+in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the
+vtable pointer, and finally jumps to the target method at a given slot in the vtable.
+*/
+struct VTableCallStub
+{
+ friend struct VTableCallHolder;
+
+ inline size_t size()
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ BYTE* pStubCode = (BYTE *)this;
+
+ int numDataSlots = 0;
+
+ size_t cbSize = 4; // First ldr instruction
+
+ for (int i = 0; i < 2; i++)
+ {
+ if (((*(DWORD*)(&pStubCode[cbSize])) & 0xFFC003FF) == 0xF9400129)
+ {
+ // ldr x9, [x9, #offsetOfIndirection]
+ cbSize += 4;
+ }
+ else
+ {
+ // These 2 instructions used when the indirection offset is >= 0x8000
+ // ldr w10, [PC, #dataOffset]
+ // ldr x9, [x9, x10]
+ numDataSlots++;
+ cbSize += 8;
+ }
+ }
+ return cbSize +
+ 4 + // Last 'br x9' instruction
+ (numDataSlots * 4) + // Data slots containing indirection offset values
+ 4; // Slot value (data storage, not a real instruction)
+ }
+
+ inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
+
+ inline size_t token()
+ {
+ LIMITED_METHOD_CONTRACT;
+ DWORD slot = *(DWORD*)(reinterpret_cast<BYTE*>(this) + size() - 4);
+ return DispatchToken::CreateDispatchToken(slot).To_SIZE_T();
+ }
+
+private:
+ BYTE _entryPoint[0]; // Dynamically sized stub. See Initialize() for more details.
+};
+
+/* VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of
+stubs as necessary. */
+struct VTableCallHolder
+{
+ void Initialize(unsigned slot);
+
+ VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<VTableCallStub *>(this); }
+
+ static size_t GetHolderSize(unsigned slot)
+ {
+ STATIC_CONTRACT_WRAPPER;
+ unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ int indirectionsCodeSize = (offsetOfIndirection >= 0x8000 ? 8 : 4) + (offsetAfterIndirection >= 0x8000 ? 8 : 4);
+ int indirectionsDataSize = (offsetOfIndirection >= 0x8000 ? 4 : 0) + (offsetAfterIndirection >= 0x8000 ? 4 : 0);
+ return 8 + indirectionsCodeSize + indirectionsDataSize + 4;
+ }
+
+ static VTableCallHolder* VTableCallHolder::FromVTableCallEntry(PCODE entry) { LIMITED_METHOD_CONTRACT; return (VTableCallHolder*)entry; }
+
+private:
+ // VTableCallStub follows here. It is dynamically sized on allocation because it could
+ // use short/long instruction sizes for LDR, depending on the slot value.
+};
+
+
#ifdef DECLARE_DATA
#ifndef DACCESS_COMPILE
@@ -403,6 +485,78 @@ ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry)
return resolveHolder;
}
+void VTableCallHolder::Initialize(unsigned slot)
+{
+ unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ _ASSERTE(MethodTable::VTableIndir_t::isRelative == false /* TODO: NYI */);
+
+ int indirectionsCodeSize = (offsetOfIndirection >= 0x8000 ? 8 : 4) + (offsetAfterIndirection >= 0x8000 ? 8 : 4);
+ int indirectionsDataSize = (offsetOfIndirection >= 0x8000 ? 4 : 0) + (offsetAfterIndirection >= 0x8000 ? 4 : 0);
+ int codeSize = 8 + indirectionsCodeSize + indirectionsDataSize;
+
+ VTableCallStub* pStub = stub();
+ BYTE* p = (BYTE*)pStub->entryPoint();
+
+ // ldr x9,[x0] : x9 = MethodTable pointer
+ *(UINT32*)p = 0xF9400009; p += 4;
+
+ // moving offset value wrt PC. Currently points to first indirection offset data.
+ uint dataOffset = codeSize - indirectionsDataSize - 4;
+
+ if (offsetOfIndirection >= 0x8000)
+ {
+ // ldr w10, [PC, #dataOffset]
+ *(DWORD*)p = 0x1800000a | ((dataOffset >> 2) << 5); p += 4;
+ // ldr x9, [x9, x10]
+ *(DWORD*)p = 0xf86a6929; p += 4;
+
+ // move to next indirection offset data
+ dataOffset = dataOffset - 8 + 4; // subtract 8 as we have moved PC by 8 and add 4 as next data is at 4 bytes from previous data
+ }
+ else
+ {
+ // ldr x9, [x9, #offsetOfIndirection]
+ *(DWORD*)p = 0xf9400129 | (((UINT32)offsetOfIndirection >> 3) << 10);
+ p += 4;
+ }
+
+ if (offsetAfterIndirection >= 0x8000)
+ {
+ // ldr w10, [PC, #dataOffset]
+ *(DWORD*)p = 0x1800000a | ((dataOffset >> 2) << 5); p += 4;
+ // ldr x9, [x9, x10]
+ *(DWORD*)p = 0xf86a6929; p += 4;
+ }
+ else
+ {
+ // ldr x9, [x9, #offsetAfterIndirection]
+ *(DWORD*)p = 0xf9400129 | (((UINT32)offsetAfterIndirection >> 3) << 10);
+ p += 4;
+ }
+
+ // br x9
+ *(UINT32*)p = 0xd61f0120; p += 4;
+
+ // data labels:
+ if (offsetOfIndirection >= 0x8000)
+ {
+ *(UINT32*)p = (UINT32)offsetOfIndirection;
+ p += 4;
+ }
+ if (offsetAfterIndirection >= 0x8000)
+ {
+ *(UINT32*)p = (UINT32)offsetAfterIndirection;
+ p += 4;
+ }
+
+ // Store the slot value here for convenience. Not a real instruction (unreachable anyways)
+ // NOTE: Not counted in codeSize above.
+ *(UINT32*)p = slot; p += 4;
+
+ _ASSERT(p == (BYTE*)stub()->entryPoint() + VTableCallHolder::GetHolderSize(slot));
+ _ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot));
+}
#endif // DACCESS_COMPILE
@@ -435,6 +589,10 @@ VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE s
{
stubKind = SK_RESOLVE;
}
+ else if (firstDword == VTABLECALL_STUB_FIRST_DWORD) // assembly of first instruction of VTableCallStub : ldr x9, [x0]
+ {
+ stubKind = SK_VTABLECALL;
+ }
else if (firstDword == 0x10000089) // assembly of first instruction of LookupStub : adr x9, _resolveWorkerTarget
{
stubKind = SK_LOOKUP;
diff --git a/src/vm/i386/virtualcallstubcpu.hpp b/src/vm/i386/virtualcallstubcpu.hpp
index 67737a2d72..3bdae8c3ec 100644
--- a/src/vm/i386/virtualcallstubcpu.hpp
+++ b/src/vm/i386/virtualcallstubcpu.hpp
@@ -57,9 +57,9 @@ get quickly changed to point to another kind of stub.
*/
struct LookupStub
{
- inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
- inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
- inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
+ inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
+ inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
+ inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
private:
friend struct LookupHolder;
@@ -357,6 +357,66 @@ private:
BYTE pad[(sizeof(void*)-((sizeof(ResolveStub))%sizeof(void*))+offsetof(ResolveStub,_token))%sizeof(void*)]; //fill out DWORD
//#endif
};
+
+/*VTableCallStub**************************************************************************************
+These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed
+in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the
+vtable pointer, and finally jumps to the target method at a given slot in the vtable.
+*/
+struct VTableCallStub
+{
+ friend struct VTableCallHolder;
+
+ inline size_t size()
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ BYTE* pStubCode = (BYTE *)this;
+
+ size_t cbSize = 2; // First mov instruction
+ cbSize += (pStubCode[cbSize + 1] == 0x80 ? 6 : 3); // Either 8B 80 or 8B 40: mov eax,[eax+offset]
+ cbSize += (pStubCode[cbSize + 1] == 0xa0 ? 6 : 3); // Either FF A0 or FF 60: jmp dword ptr [eax+slot]
+ cbSize += 4; // Slot value (data storage, not a real instruction)
+
+ return cbSize;
+ }
+
+ inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
+
+ inline size_t token()
+ {
+ LIMITED_METHOD_CONTRACT;
+ DWORD slot = *(DWORD*)(reinterpret_cast<BYTE*>(this) + size() - 4);
+ return DispatchToken::CreateDispatchToken(slot).To_SIZE_T();
+ }
+
+private:
+ BYTE _entryPoint[0]; // Dynamically sized stub. See Initialize() for more details.
+};
+
+/* VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of
+stubs as necessary. */
+struct VTableCallHolder
+{
+ void Initialize(unsigned slot);
+
+ VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<VTableCallStub *>(this); }
+
+ static size_t GetHolderSize(unsigned slot)
+ {
+ STATIC_CONTRACT_WRAPPER;
+ unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ return 2 + (offsetOfIndirection >= 0x80 ? 6 : 3) + (offsetAfterIndirection >= 0x80 ? 6 : 3) + 4;
+ }
+
+ static VTableCallHolder* VTableCallHolder::FromVTableCallEntry(PCODE entry) { LIMITED_METHOD_CONTRACT; return (VTableCallHolder*)entry; }
+
+private:
+ // VTableCallStub follows here. It is dynamically sized on allocation because it could
+ // use short/long instruction sizes for the mov/jmp, depending on the slot value.
+};
+
#include <poppack.h>
@@ -895,6 +955,49 @@ ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry)
return resolveHolder;
}
+void VTableCallHolder::Initialize(unsigned slot)
+{
+ unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
+ _ASSERTE(MethodTable::VTableIndir_t::isRelative == false /* TODO: NYI */);
+
+ VTableCallStub* pStub = stub();
+ BYTE* p = (BYTE*)pStub->entryPoint();
+
+ // mov eax,[ecx] : eax = MethodTable pointer
+ *(UINT16*)p = 0x018b; p += 2;
+
+ // mov eax,[eax+vtable offset] : eax = vtable pointer
+ if (offsetOfIndirection >= 0x80)
+ {
+ *(UINT16*)p = 0x808b; p += 2;
+ *(UINT32*)p = offsetOfIndirection; p += 4;
+ }
+ else
+ {
+ *(UINT16*)p = 0x408b; p += 2;
+ *p++ = (BYTE)offsetOfIndirection;
+ }
+
+ // jmp dword ptr [eax+slot]
+ if (offsetAfterIndirection >= 0x80)
+ {
+ *(UINT16*)p = 0xa0ff; p += 2;
+ *(UINT32*)p = offsetAfterIndirection; p += 4;
+ }
+ else
+ {
+ *(UINT16*)p = 0x60ff; p += 2;
+ *p++ = (BYTE)offsetAfterIndirection;
+ }
+
+ // Store the slot value here for convenience. Not a real instruction (unreachable anyways)
+ *(UINT32*)p = slot; p += 4;
+
+ _ASSERT(p == (BYTE*)stub()->entryPoint() + VTableCallHolder::GetHolderSize(slot));
+ _ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot));
+}
+
#endif // DACCESS_COMPILE
VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress)
@@ -932,6 +1035,10 @@ VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE s
{
stubKind = SK_RESOLVE;
}
+ else if (firstWord == 0x018b)
+ {
+ stubKind = SK_VTABLECALL;
+ }
else
{
BYTE firstByte = ((BYTE*) stubStartAddress)[0];
diff --git a/src/vm/jitinterface.cpp b/src/vm/jitinterface.cpp
index efaa340151..4423c98f8a 100644
--- a/src/vm/jitinterface.cpp
+++ b/src/vm/jitinterface.cpp
@@ -5155,6 +5155,8 @@ void CEEInfo::getCallInfo(
INDEBUG(memset(pResult, 0xCC, sizeof(*pResult)));
+ pResult->stubLookup.lookupKind.needsRuntimeLookup = false;
+
MethodDesc* pMD = (MethodDesc *)pResolvedToken->hMethod;
TypeHandle th(pResolvedToken->hClass);
@@ -5460,13 +5462,18 @@ void CEEInfo::getCallInfo(
pResult->nullInstanceCheck = TRUE;
}
// Non-interface dispatches go through the vtable.
- // We'll special virtual calls to target methods in the corelib assembly when compiling in R2R mode and generate fragile-NI-like callsites for improved performance. We
- // can do that because today we'll always service the corelib assembly and the runtime in one bundle. Any caller in the corelib version bubble can benefit from this
- // performance optimization.
- else if (!pTargetMD->IsInterface() && (!IsReadyToRunCompilation() || CallerAndCalleeInSystemVersionBubble((MethodDesc*)callerHandle, pTargetMD)))
+ else if (!pTargetMD->IsInterface())
{
pResult->kind = CORINFO_VIRTUALCALL_VTABLE;
pResult->nullInstanceCheck = TRUE;
+
+ // We'll special virtual calls to target methods in the corelib assembly when compiling in R2R mode, and generate fragile-NI-like callsites for improved performance. We
+ // can do that because today we'll always service the corelib assembly and the runtime in one bundle. Any caller in the corelib version bubble can benefit from this
+ // performance optimization.
+ if (IsReadyToRunCompilation() && !CallerAndCalleeInSystemVersionBubble((MethodDesc*)callerHandle, pTargetMD))
+ {
+ pResult->kind = CORINFO_VIRTUALCALL_STUB;
+ }
}
else
{
@@ -5504,8 +5511,6 @@ void CEEInfo::getCallInfo(
}
else
{
- pResult->stubLookup.lookupKind.needsRuntimeLookup = false;
-
BYTE * indcell = NULL;
if (!(flags & CORINFO_CALLINFO_KINDONLY) && !isVerifyOnly())
diff --git a/src/vm/loaderallocator.cpp b/src/vm/loaderallocator.cpp
index 7033abf8f2..acb9bcacf3 100644
--- a/src/vm/loaderallocator.cpp
+++ b/src/vm/loaderallocator.cpp
@@ -1027,7 +1027,8 @@ void LoaderAllocator::ActivateManagedTracking()
#endif // !CROSSGEN_COMPILE
-// We don't actually allocate a low frequency heap for collectible types
+// We don't actually allocate a low frequency heap for collectible types.
+// This is carefully tuned to sum up to 16 pages to reduce waste.
#define COLLECTIBLE_LOW_FREQUENCY_HEAP_SIZE (0 * GetOsPageSize())
#define COLLECTIBLE_HIGH_FREQUENCY_HEAP_SIZE (3 * GetOsPageSize())
#define COLLECTIBLE_STUB_HEAP_SIZE GetOsPageSize()
diff --git a/src/vm/prestub.cpp b/src/vm/prestub.cpp
index b614e341a2..78e9f22e17 100644
--- a/src/vm/prestub.cpp
+++ b/src/vm/prestub.cpp
@@ -2286,20 +2286,24 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl
// Get the stub manager for this module
VirtualCallStubManager *pMgr = pModule->GetLoaderAllocator()->GetVirtualCallStubManager();
- DispatchToken token;
- if (pMT->IsInterface())
- token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot);
- else
- token = DispatchToken::CreateDispatchToken(slot);
-
OBJECTREF *protectedObj = pEMFrame->GetThisPtr();
_ASSERTE(protectedObj != NULL);
if (*protectedObj == NULL) {
COMPlusThrow(kNullReferenceException);
}
-
- StubCallSite callSite(pIndirection, pEMFrame->GetReturnAddress());
- pCode = pMgr->ResolveWorker(&callSite, protectedObj, token, VirtualCallStubManager::SK_LOOKUP);
+
+ DispatchToken token;
+ if (pMT->IsInterface() || MethodTable::VTableIndir_t::isRelative)
+ {
+ token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot);
+ StubCallSite callSite(pIndirection, pEMFrame->GetReturnAddress());
+ pCode = pMgr->ResolveWorker(&callSite, protectedObj, token, VirtualCallStubManager::SK_LOOKUP);
+ }
+ else
+ {
+ pCode = pMgr->GetVTableCallStub(slot);
+ *EnsureWritableExecutablePages((TADDR *)pIndirection) = pCode;
+ }
_ASSERTE(pCode != NULL);
}
else
diff --git a/src/vm/virtualcallstub.cpp b/src/vm/virtualcallstub.cpp
index 657200b2ee..78a37483b5 100644
--- a/src/vm/virtualcallstub.cpp
+++ b/src/vm/virtualcallstub.cpp
@@ -36,6 +36,7 @@ UINT32 g_site_write_mono = 0; //# of call site backpatch writes to poi
UINT32 g_stub_lookup_counter = 0; //# of lookup stubs
UINT32 g_stub_mono_counter = 0; //# of dispatch stubs
UINT32 g_stub_poly_counter = 0; //# of resolve stubs
+UINT32 g_stub_vtable_counter = 0; //# of vtable call stubs
UINT32 g_stub_space = 0; //# of bytes of stubs
UINT32 g_reclaim_counter = 0; //# of times a ReclaimAll was performed
@@ -239,6 +240,8 @@ void VirtualCallStubManager::LoggingDump()
WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL);
sprintf_s(szPrintStr, COUNTOF(szPrintStr), OUTPUT_FORMAT_INT, "stub_poly_counter", g_stub_poly_counter);
WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL);
+ sprintf_s(szPrintStr, COUNTOF(szPrintStr), OUTPUT_FORMAT_INT, "stub_vtable_counter", g_stub_vtable_counter);
+ WriteFile(g_hStubLogFile, szPrintStr, (DWORD)strlen(szPrintStr), &dwWriteByte, NULL);
sprintf_s(szPrintStr, COUNTOF(szPrintStr), OUTPUT_FORMAT_INT, "stub_space", g_stub_space);
WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL);
@@ -501,6 +504,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
NewHolder<BucketTable> resolvers_holder(new BucketTable(CALL_STUB_MIN_BUCKETS));
NewHolder<BucketTable> dispatchers_holder(new BucketTable(CALL_STUB_MIN_BUCKETS*2));
NewHolder<BucketTable> lookups_holder(new BucketTable(CALL_STUB_MIN_BUCKETS));
+ NewHolder<BucketTable> vtableCallers_holder(new BucketTable(CALL_STUB_MIN_BUCKETS));
NewHolder<BucketTable> cache_entries_holder(new BucketTable(CALL_STUB_MIN_BUCKETS));
//
@@ -521,6 +525,8 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
DWORD dispatch_heap_commit_size;
DWORD resolve_heap_reserve_size;
DWORD resolve_heap_commit_size;
+ DWORD vtable_heap_reserve_size;
+ DWORD vtable_heap_commit_size;
//
// Setup an expected number of items to commit and reserve
@@ -538,6 +544,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
lookup_heap_commit_size = 24; lookup_heap_reserve_size = 250;
dispatch_heap_commit_size = 24; dispatch_heap_reserve_size = 600;
resolve_heap_commit_size = 24; resolve_heap_reserve_size = 300;
+ vtable_heap_commit_size = 24; vtable_heap_reserve_size = 600;
}
else if (parentDomain->IsSharedDomain())
{
@@ -550,6 +557,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
lookup_heap_commit_size = 24; lookup_heap_reserve_size = 200;
dispatch_heap_commit_size = 24; dispatch_heap_reserve_size = 450;
resolve_heap_commit_size = 24; resolve_heap_reserve_size = 200;
+ vtable_heap_commit_size = 24; vtable_heap_reserve_size = 450;
}
else
{
@@ -559,6 +567,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
lookup_heap_commit_size = 8; lookup_heap_reserve_size = 8;
dispatch_heap_commit_size = 8; dispatch_heap_reserve_size = 8;
resolve_heap_commit_size = 8; resolve_heap_reserve_size = 8;
+ vtable_heap_commit_size = 8; vtable_heap_reserve_size = 8;
}
#ifdef _WIN64
@@ -571,7 +580,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
#endif
//
- // Convert the number of items into a size in bytes to commit abd reserve
+ // Convert the number of items into a size in bytes to commit and reserve
//
indcell_heap_reserve_size *= sizeof(void *);
indcell_heap_commit_size *= sizeof(void *);
@@ -593,6 +602,9 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
resolve_heap_reserve_size *= sizeof(ResolveHolder);
resolve_heap_commit_size *= sizeof(ResolveHolder);
+ vtable_heap_reserve_size *= static_cast<DWORD>(VTableCallHolder::GetHolderSize(0));
+ vtable_heap_commit_size *= static_cast<DWORD>(VTableCallHolder::GetHolderSize(0));
+
//
// Align up all of the commit and reserve sizes
//
@@ -611,6 +623,9 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
resolve_heap_reserve_size = (DWORD) ALIGN_UP(resolve_heap_reserve_size, GetOsPageSize());
resolve_heap_commit_size = (DWORD) ALIGN_UP(resolve_heap_commit_size, GetOsPageSize());
+ vtable_heap_reserve_size = (DWORD) ALIGN_UP(vtable_heap_reserve_size, GetOsPageSize());
+ vtable_heap_commit_size = (DWORD) ALIGN_UP(vtable_heap_commit_size, GetOsPageSize());
+
BYTE * initReservedMem = NULL;
if (!isCollectible)
@@ -619,7 +634,8 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
cache_entry_heap_reserve_size +
lookup_heap_reserve_size +
dispatch_heap_reserve_size +
- resolve_heap_reserve_size;
+ resolve_heap_reserve_size +
+ vtable_heap_reserve_size;
DWORD dwTotalReserveMemSize = (DWORD) ALIGN_UP(dwTotalReserveMemSizeCalc, VIRTUAL_ALLOC_RESERVE_GRANULARITY);
@@ -629,13 +645,14 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
if (dwWastedReserveMemSize != 0)
{
DWORD cWastedPages = dwWastedReserveMemSize / GetOsPageSize();
- DWORD cPagesPerHeap = cWastedPages / 5;
- DWORD cPagesRemainder = cWastedPages % 5; // We'll throw this at the resolve heap
+ DWORD cPagesPerHeap = cWastedPages / 6;
+ DWORD cPagesRemainder = cWastedPages % 6; // We'll throw this at the resolve heap
indcell_heap_reserve_size += cPagesPerHeap * GetOsPageSize();
cache_entry_heap_reserve_size += cPagesPerHeap * GetOsPageSize();
lookup_heap_reserve_size += cPagesPerHeap * GetOsPageSize();
dispatch_heap_reserve_size += cPagesPerHeap * GetOsPageSize();
+ vtable_heap_reserve_size += cPagesPerHeap * GetOsPageSize();
resolve_heap_reserve_size += cPagesPerHeap * GetOsPageSize();
resolve_heap_reserve_size += cPagesRemainder * GetOsPageSize();
}
@@ -644,7 +661,8 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
cache_entry_heap_reserve_size +
lookup_heap_reserve_size +
dispatch_heap_reserve_size +
- resolve_heap_reserve_size) ==
+ resolve_heap_reserve_size +
+ vtable_heap_reserve_size) ==
dwTotalReserveMemSize);
}
@@ -672,12 +690,20 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
resolve_heap_reserve_size = GetOsPageSize();
resolve_heap_commit_size = GetOsPageSize();
+ // Heap for the collectible case is carefully tuned to sum up to 16 pages. Today, we only use the
+ // vtable jump stubs in the R2R scenario, which is unlikely to be loaded in the collectible context,
+ // so we'll keep the heap numbers at zero for now. If we ever use vtable stubs in the collectible
+ // scenario, we'll just allocate the memory on demand.
+ vtable_heap_reserve_size = 0;
+ vtable_heap_commit_size = 0;
+
#ifdef _DEBUG
DWORD dwTotalReserveMemSizeCalc = indcell_heap_reserve_size +
cache_entry_heap_reserve_size +
lookup_heap_reserve_size +
dispatch_heap_reserve_size +
- resolve_heap_reserve_size;
+ resolve_heap_reserve_size +
+ vtable_heap_reserve_size;
#endif
DWORD dwActualVSDSize = 0;
@@ -756,6 +782,19 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
initReservedMem += resolve_heap_reserve_size;
+ // Hot memory, Writable, Execute, write exactly once
+ NewHolder<LoaderHeap> vtable_heap_holder(
+ new LoaderHeap(vtable_heap_reserve_size, vtable_heap_commit_size,
+ initReservedMem, vtable_heap_reserve_size,
+#ifdef ENABLE_PERF_COUNTERS
+ &(GetPerfCounters().m_Loading.cbLoaderHeapSize),
+#else
+ NULL,
+#endif
+ &vtable_rangeList, TRUE));
+
+ initReservedMem += vtable_heap_reserve_size;
+
// Allocate the initial counter block
NewHolder<counter_block> m_counters_holder(new counter_block);
@@ -767,12 +806,13 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
lookup_heap = lookup_heap_holder; lookup_heap_holder.SuppressRelease();
dispatch_heap = dispatch_heap_holder; dispatch_heap_holder.SuppressRelease();
resolve_heap = resolve_heap_holder; resolve_heap_holder.SuppressRelease();
+ vtable_heap = vtable_heap_holder; vtable_heap_holder.SuppressRelease();
cache_entry_heap = cache_entry_heap_holder; cache_entry_heap_holder.SuppressRelease();
resolvers = resolvers_holder; resolvers_holder.SuppressRelease();
dispatchers = dispatchers_holder; dispatchers_holder.SuppressRelease();
lookups = lookups_holder; lookups_holder.SuppressRelease();
-
+ vtableCallers = vtableCallers_holder; vtableCallers_holder.SuppressRelease();
cache_entries = cache_entries_holder; cache_entries_holder.SuppressRelease();
m_counters = m_counters_holder; m_counters_holder.SuppressRelease();
@@ -832,11 +872,13 @@ VirtualCallStubManager::~VirtualCallStubManager()
if (lookup_heap) { delete lookup_heap; lookup_heap = NULL;}
if (dispatch_heap) { delete dispatch_heap; dispatch_heap = NULL;}
if (resolve_heap) { delete resolve_heap; resolve_heap = NULL;}
+ if (vtable_heap) { delete vtable_heap; vtable_heap = NULL;}
if (cache_entry_heap) { delete cache_entry_heap; cache_entry_heap = NULL;}
if (resolvers) { delete resolvers; resolvers = NULL;}
if (dispatchers) { delete dispatchers; dispatchers = NULL;}
if (lookups) { delete lookups; lookups = NULL;}
+ if (vtableCallers) { delete vtableCallers; vtableCallers = NULL;}
if (cache_entries) { delete cache_entries; cache_entries = NULL;}
// Now get rid of the memory taken by the counter_blocks
@@ -1075,6 +1117,8 @@ BOOL VirtualCallStubManager::DoTraceStub(PCODE stubStartAddress, TraceDestinatio
{
LIMITED_METHOD_CONTRACT;
+ LOG((LF_CORDB, LL_EVERYTHING, "VirtualCallStubManager::DoTraceStub called\n"));
+
_ASSERTE(CheckIsStub_Internal(stubStartAddress));
#ifdef FEATURE_PREJIT
@@ -1191,6 +1235,68 @@ PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, DWORD slot)
RETURN (stub);
}
+PCODE VirtualCallStubManager::GetVTableCallStub(DWORD slot)
+{
+ CONTRACT(PCODE) {
+ THROWS;
+ GC_TRIGGERS;
+ MODE_ANY;
+ INJECT_FAULT(COMPlusThrowOM(););
+ PRECONDITION(!MethodTable::VTableIndir_t::isRelative /* Not yet supported */);
+ POSTCONDITION(RETVAL != NULL);
+ } CONTRACT_END;
+
+ GCX_COOP(); // This is necessary for BucketTable synchronization
+
+ PCODE stub = CALL_STUB_EMPTY_ENTRY;
+
+ VTableCallEntry entry;
+ Prober probe(&entry);
+ if (vtableCallers->SetUpProber(DispatchToken::CreateDispatchToken(slot).To_SIZE_T(), 0, &probe))
+ {
+ if ((stub = (PCODE)(vtableCallers->Find(&probe))) == CALL_STUB_EMPTY_ENTRY)
+ {
+ VTableCallHolder *pHolder = GenerateVTableCallStub(slot);
+ stub = (PCODE)(vtableCallers->Add((size_t)(pHolder->stub()->entryPoint()), &probe));
+ }
+ }
+
+ _ASSERTE(stub != CALL_STUB_EMPTY_ENTRY);
+ RETURN(stub);
+}
+
+VTableCallHolder* VirtualCallStubManager::GenerateVTableCallStub(DWORD slot)
+{
+ CONTRACT(VTableCallHolder*) {
+ THROWS;
+ GC_TRIGGERS;
+ MODE_ANY;
+ INJECT_FAULT(COMPlusThrowOM(););
+ PRECONDITION(!MethodTable::VTableIndir_t::isRelative /* Not yet supported */);
+ POSTCONDITION(RETVAL != NULL);
+ } CONTRACT_END;
+
+ //allocate from the requisite heap and copy the template over it.
+ VTableCallHolder * pHolder = (VTableCallHolder*)(void*)vtable_heap->AllocAlignedMem(VTableCallHolder::GetHolderSize(slot), CODE_SIZE_ALIGN);
+
+ pHolder->Initialize(slot);
+ ClrFlushInstructionCache(pHolder->stub(), pHolder->stub()->size());
+
+ AddToCollectibleVSDRangeList(pHolder);
+
+ //incr our counters
+ stats.stub_vtable_counter++;
+ stats.stub_space += (UINT32)pHolder->stub()->size();
+ LOG((LF_STUBS, LL_INFO10000, "GenerateVTableCallStub for slot " FMT_ADDR "at" FMT_ADDR "\n",
+ DBG_ADDR(slot), DBG_ADDR(pHolder->stub())));
+
+#ifdef FEATURE_PERFMAP
+ PerfMap::LogStubs(__FUNCTION__, "GenerateVTableCallStub", (PCODE)pHolder->stub(), pHolder->stub()->size());
+#endif
+
+ RETURN(pHolder);
+}
+
#ifdef FEATURE_PREJIT
extern "C" PCODE STDCALL StubDispatchFixupWorker(TransitionBlock * pTransitionBlock,
TADDR siteAddrForRegisterIndirect,
@@ -1457,6 +1563,12 @@ size_t VirtualCallStubManager::GetTokenFromStubQuick(VirtualCallStubManager * pM
LookupHolder * lookupHolder = LookupHolder::FromLookupEntry(stub);
return lookupHolder->stub()->token();
}
+ else if (kind == SK_VTABLECALL)
+ {
+ _ASSERTE(pMgr->isVTableCallStub(stub));
+ VTableCallStub * vtableStub = (VTableCallStub *)PCODEToPINSTR(stub);
+ return vtableStub->token();
+ }
_ASSERTE(!"Should not get here.");
@@ -3023,12 +3135,14 @@ void VirtualCallStubManager::LogStats()
resolvers->LogStats();
dispatchers->LogStats();
lookups->LogStats();
+ vtableCallers->LogStats();
cache_entries->LogStats();
g_site_counter += stats.site_counter;
g_stub_lookup_counter += stats.stub_lookup_counter;
g_stub_poly_counter += stats.stub_poly_counter;
g_stub_mono_counter += stats.stub_mono_counter;
+ g_stub_vtable_counter += stats.stub_vtable_counter;
g_site_write += stats.site_write;
g_site_write_poly += stats.site_write_poly;
g_site_write_mono += stats.site_write_mono;
@@ -3043,6 +3157,7 @@ void VirtualCallStubManager::LogStats()
stats.stub_lookup_counter = 0;
stats.stub_poly_counter = 0;
stats.stub_mono_counter = 0;
+ stats.stub_vtable_counter = 0;
stats.site_write = 0;
stats.site_write_poly = 0;
stats.site_write_mono = 0;
@@ -3369,6 +3484,7 @@ void BucketTable::Reclaim()
// dispatchers token the expected MT
// resolver token the stub calling convention
// cache_entries token the expected method table
+// vtableCallers token unused (zero)
//
BOOL BucketTable::SetUpProber(size_t keyA, size_t keyB, Prober *prober)
{
diff --git a/src/vm/virtualcallstub.h b/src/vm/virtualcallstub.h
index b8984eab3c..1d22e467c4 100644
--- a/src/vm/virtualcallstub.h
+++ b/src/vm/virtualcallstub.h
@@ -38,6 +38,7 @@ class VirtualCallStubManagerManager;
struct LookupHolder;
struct DispatchHolder;
struct ResolveHolder;
+struct VTableCallHolder;
/////////////////////////////////////////////////////////////////////////////////////
// Forward function declarations
@@ -238,6 +239,9 @@ public:
PCODE GetCallStub(TypeHandle ownerType, MethodDesc *pMD);
PCODE GetCallStub(TypeHandle ownerType, DWORD slot);
+ // Stubs for vtable-based virtual calls with no lookups
+ PCODE GetVTableCallStub(DWORD slot);
+
// Generate an fresh indirection cell.
BYTE* GenerateStubIndirection(PCODE stub, BOOL fUseRecycledCell = FALSE);
@@ -272,6 +276,7 @@ public:
resolve_rangeList(),
dispatch_rangeList(),
cache_entry_rangeList(),
+ vtable_rangeList(),
parentDomain(NULL),
isCollectible(false),
m_initialReservedMemForHeaps(NULL),
@@ -308,6 +313,7 @@ public:
SK_LOOKUP, // Lookup Stubs are SLOW stubs that simply call into the runtime to do all work.
SK_DISPATCH, // Dispatch Stubs have a fast check for one type otherwise jumps to runtime. Works for monomorphic sites
SK_RESOLVE, // Resolve Stubs do a hash lookup before fallling back to the runtime. Works for polymorphic sites.
+ SK_VTABLECALL, // Stub that jumps to a target method using vtable-based indirections. Works for non-interface calls.
SK_BREAKPOINT
};
@@ -346,6 +352,11 @@ public:
if (isResolvingStub(stubStartAddress))
return SK_RESOLVE;
}
+ else if (predictedKind == SK_VTABLECALL)
+ {
+ if (isVTableCallStub(stubStartAddress))
+ return SK_VTABLECALL;
+ }
// This is the slow case. If the predict returned SK_UNKNOWN, SK_BREAKPOINT,
// or the predict was found to be incorrect when checked against the RangeLists
@@ -356,6 +367,8 @@ public:
return SK_LOOKUP;
else if (isResolvingStub(stubStartAddress))
return SK_RESOLVE;
+ else if (isVTableCallStub(stubStartAddress))
+ return SK_VTABLECALL;
return SK_UNKNOWN;
}
@@ -392,6 +405,14 @@ public:
return GetLookupRangeList()->IsInRange(stubStartAddress);
}
+ BOOL isVTableCallStub(PCODE stubStartAddress)
+ {
+ WRAPPER_NO_CONTRACT;
+ SUPPORTS_DAC;
+
+ return GetVTableCallRangeList()->IsInRange(stubStartAddress);
+ }
+
static BOOL isDispatchingStubStatic(PCODE addr)
{
WRAPPER_NO_CONTRACT;
@@ -416,11 +437,20 @@ public:
return stubKind == SK_LOOKUP;
}
+ static BOOL isVtableCallStubStatic(PCODE addr)
+ {
+ WRAPPER_NO_CONTRACT;
+ StubKind stubKind;
+ FindStubManager(addr, &stubKind);
+ return stubKind == SK_VTABLECALL;
+ }
+
//use range lists to track the chunks of memory that are part of each heap
LockedRangeList lookup_rangeList;
LockedRangeList resolve_rangeList;
LockedRangeList dispatch_rangeList;
LockedRangeList cache_entry_rangeList;
+ LockedRangeList vtable_rangeList;
// Get dac-ized pointers to rangelist.
RangeList* GetLookupRangeList()
@@ -450,6 +480,12 @@ public:
TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, cache_entry_rangeList);
return PTR_RangeList(addr);
}
+ RangeList* GetVTableCallRangeList()
+ {
+ SUPPORTS_DAC;
+ TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, vtable_rangeList);
+ return PTR_RangeList(addr);
+ }
private:
@@ -475,6 +511,8 @@ private:
LookupHolder *GenerateLookupStub(PCODE addrOfResolver,
size_t dispatchToken);
+ VTableCallHolder* GenerateVTableCallStub(DWORD slot);
+
template <typename STUB_HOLDER>
void AddToCollectibleVSDRangeList(STUB_HOLDER *holder)
{
@@ -687,6 +725,7 @@ private:
PTR_LoaderHeap lookup_heap; // lookup stubs go here
PTR_LoaderHeap dispatch_heap; // dispatch stubs go here
PTR_LoaderHeap resolve_heap; // resolve stubs go here
+ PTR_LoaderHeap vtable_heap; // vtable-based jump stubs go here
#ifdef _TARGET_AMD64_
// When we layout the stub heaps, we put them close together in a sequential order
@@ -707,6 +746,7 @@ private:
BucketTable * cache_entries; // hash table of dispatch token/target structs for dispatch cache
BucketTable * dispatchers; // hash table of dispatching stubs keyed by tokens/actualtype
BucketTable * resolvers; // hash table of resolvers keyed by tokens/resolverstub
+ BucketTable * vtableCallers; // hash table of vtable call stubs keyed by slot values
// This structure is used to keep track of the fail counters.
// We only need one fail counter per ResolveStub,
@@ -758,6 +798,7 @@ public:
UINT32 stub_lookup_counter; //# of lookup stubs
UINT32 stub_poly_counter; //# of resolve stubs
UINT32 stub_mono_counter; //# of dispatch stubs
+ UINT32 stub_vtable_counter; //# of vtable call stubs
UINT32 site_write; //# of call site backpatch writes
UINT32 site_write_poly; //# of call site backpatch writes to point to resolve stubs
UINT32 site_write_mono; //# of call site backpatch writes to point to dispatch stubs
@@ -1061,6 +1102,44 @@ private:
};
#endif // USES_LOOKUP_STUBS
+class VTableCallEntry : public Entry
+{
+public:
+ //Creates an entry that wraps vtable call stub
+ VTableCallEntry(size_t s)
+ {
+ LIMITED_METHOD_CONTRACT;
+ _ASSERTE(VirtualCallStubManager::isVtableCallStubStatic((PCODE)s));
+ stub = (VTableCallStub*)s;
+ }
+
+ //default contructor to allow stack and inline allocation of vtable call entries
+ VTableCallEntry() { LIMITED_METHOD_CONTRACT; stub = NULL; }
+
+ //implementations of abstract class Entry
+ BOOL Equals(size_t keyA, size_t keyB)
+ {
+ WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB());
+ }
+
+ size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); }
+ size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; }
+
+ void SetContents(size_t contents)
+ {
+ LIMITED_METHOD_CONTRACT;
+ _ASSERTE(VirtualCallStubManager::isVtableCallStubStatic((PCODE)contents));
+ stub = VTableCallHolder::FromVTableCallEntry((PCODE)contents)->stub();
+ }
+
+ //extract the token of the underlying lookup stub
+
+ inline size_t Token() { LIMITED_METHOD_CONTRACT; return stub ? stub->token() : 0; }
+
+private:
+ VTableCallStub* stub; //the stub the entry wrapping
+};
+
/**********************************************************************************************
ResolveCacheEntry wraps a ResolveCacheElem and provides lookup functionality for entries that
were created that may be added to the ResolveCache
diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp
index 8efeedded6..e0acd819c9 100644
--- a/src/zap/zapinfo.cpp
+++ b/src/zap/zapinfo.cpp
@@ -2145,29 +2145,28 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken,
return;
}
-#ifdef FEATURE_READYTORUN_COMPILER
if (IsReadyToRunCompilation())
{
ZapImport * pImport = m_pImage->GetImportTable()->GetStubDispatchCell(pResolvedToken);
pResult->stubLookup.constLookup.accessType = IAT_PVALUE;
pResult->stubLookup.constLookup.addr = pImport;
- break;
}
-#endif
-
- CORINFO_CLASS_HANDLE calleeOwner = pResolvedToken->hClass;
- CORINFO_METHOD_HANDLE callee = pResolvedToken->hMethod;
- _ASSERTE(callee == pResult->hMethod);
+ else
+ {
- //
- // Create the indirection cell
- //
- pTarget = m_pImage->GetImportTable()->GetStubDispatchCell(calleeOwner, callee);
+ CORINFO_CLASS_HANDLE calleeOwner = pResolvedToken->hClass;
+ CORINFO_METHOD_HANDLE callee = pResolvedToken->hMethod;
+ _ASSERTE(callee == pResult->hMethod);
- pResult->stubLookup.constLookup.accessType = IAT_PVALUE;
+ //
+ // Create the indirection cell
+ //
+ pTarget = m_pImage->GetImportTable()->GetStubDispatchCell(calleeOwner, callee);
- pResult->stubLookup.constLookup.addr = pTarget;
+ pResult->stubLookup.constLookup.accessType = IAT_PVALUE;
+ pResult->stubLookup.constLookup.addr = pTarget;
+ }
}
break;
@@ -2183,7 +2182,6 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken,
return;
case CORINFO_CALL:
-#ifdef FEATURE_READYTORUN_COMPILER
if (IsReadyToRunCompilation())
{
// Constrained token is not interesting with this transforms
@@ -2207,12 +2205,11 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken,
pResult->codePointerLookup.constLookup.accessType = IAT_PVALUE;
pResult->codePointerLookup.constLookup.addr = pImport;
}
-#endif
break;
case CORINFO_VIRTUALCALL_VTABLE:
- // READYTORUN: FUTURE: support for vtable-based calls (currently, only calls within the CoreLib version bubble is supported, and the codegen we generate
- // is the same as the fragile NI (because CoreLib and the runtime will always be updated together anyways - this is a special case)
+ // Only calls within the CoreLib version bubble support fragile NI codegen with vtable based calls, for better performance (because
+ // CoreLib and the runtime will always be updated together anyways - this is a special case)
break;
case CORINFO_VIRTUALCALL_LDVIRTFTN:
@@ -2240,7 +2237,6 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken,
break;
}
-#ifdef FEATURE_READYTORUN_COMPILER
if (IsReadyToRunCompilation() && pResult->sig.hasTypeArg())
{
if (pResult->exactContextNeedsRuntimeLookup)
@@ -2272,8 +2268,8 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken,
AppendConditionalImport(pImport);
}
}
-#endif
}
+
BOOL ZapInfo::canAccessFamily(CORINFO_METHOD_HANDLE hCaller,
CORINFO_CLASS_HANDLE hInstanceType)
{
@@ -2285,7 +2281,6 @@ BOOL ZapInfo::isRIDClassDomainID (CORINFO_CLASS_HANDLE cls)
return m_pEEJitInfo->isRIDClassDomainID(cls);
}
-
unsigned ZapInfo::getClassDomainID (CORINFO_CLASS_HANDLE cls, void **ppIndirection)
{
_ASSERTE(ppIndirection != NULL);