diff options
-rw-r--r-- | src/vm/amd64/virtualcallstubcpu.hpp | 113 | ||||
-rw-r--r-- | src/vm/arm/stubs.cpp | 20 | ||||
-rw-r--r-- | src/vm/arm/virtualcallstubcpu.hpp | 182 | ||||
-rw-r--r-- | src/vm/arm64/virtualcallstubcpu.hpp | 158 | ||||
-rw-r--r-- | src/vm/i386/virtualcallstubcpu.hpp | 113 | ||||
-rw-r--r-- | src/vm/jitinterface.cpp | 17 | ||||
-rw-r--r-- | src/vm/loaderallocator.cpp | 3 | ||||
-rw-r--r-- | src/vm/prestub.cpp | 22 | ||||
-rw-r--r-- | src/vm/virtualcallstub.cpp | 130 | ||||
-rw-r--r-- | src/vm/virtualcallstub.h | 79 | ||||
-rw-r--r-- | src/zap/zapinfo.cpp | 35 |
11 files changed, 802 insertions, 70 deletions
diff --git a/src/vm/amd64/virtualcallstubcpu.hpp b/src/vm/amd64/virtualcallstubcpu.hpp index 1bfe858d5f..7547559df0 100644 --- a/src/vm/amd64/virtualcallstubcpu.hpp +++ b/src/vm/amd64/virtualcallstubcpu.hpp @@ -63,7 +63,7 @@ struct LookupStub inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } - inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); } private: friend struct LookupHolder; @@ -430,6 +430,65 @@ struct ResolveHolder private: ResolveStub _stub; }; + +/*VTableCallStub************************************************************************************** +These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed +in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the +vtable pointer, and finally jumps to the target method at a given slot in the vtable. +*/ +struct VTableCallStub +{ + friend struct VTableCallHolder; + + inline size_t size() + { + LIMITED_METHOD_CONTRACT; + + BYTE* pStubCode = (BYTE *)this; + + size_t cbSize = 3; // First mov instruction + cbSize += (pStubCode[cbSize + 2] == 0x80 ? 7 : 4); // Either 48 8B 80 or 48 8B 40: mov rax,[rax+offset] + cbSize += (pStubCode[cbSize + 1] == 0xa0 ? 6 : 3); // Either FF A0 or FF 60: jmp qword ptr [rax+slot] + cbSize += 4; // Slot value (data storage, not a real instruction) + + return cbSize; + } + + inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } + + inline size_t token() + { + LIMITED_METHOD_CONTRACT; + DWORD slot = *(DWORD*)(reinterpret_cast<BYTE*>(this) + size() - 4); + return DispatchToken::CreateDispatchToken(slot).To_SIZE_T(); + } + +private: + BYTE _entryPoint[0]; // Dynamically sized stub. See Initialize() for more details. +}; + +/* VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of +stubs as necessary. */ +struct VTableCallHolder +{ + void Initialize(unsigned slot); + + VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<VTableCallStub *>(this); } + + static size_t GetHolderSize(unsigned slot) + { + STATIC_CONTRACT_WRAPPER; + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + return 3 + (offsetOfIndirection >= 0x80 ? 7 : 4) + (offsetAfterIndirection >= 0x80 ? 6 : 3) + 4; + } + + static VTableCallHolder* VTableCallHolder::FromVTableCallEntry(PCODE entry) { LIMITED_METHOD_CONTRACT; return (VTableCallHolder*)entry; } + +private: + // VTableCallStub follows here. It is dynamically sized on allocation because it could + // use short/long instruction sizes for mov/jmp, depending on the slot value. +}; #pragma pack(pop) #ifdef DECLARE_DATA @@ -732,6 +791,54 @@ ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry) return resolveHolder; } +void VTableCallHolder::Initialize(unsigned slot) +{ + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + _ASSERTE(MethodTable::VTableIndir_t::isRelative == false /* TODO: NYI */); + + VTableCallStub* pStub = stub(); + BYTE* p = (BYTE*)pStub->entryPoint(); + +#ifdef UNIX_AMD64_ABI + // mov rax,[rdi] : rax = MethodTable pointer + *(UINT32 *)p = 0x078b48; p += 3; +#else + // mov rax,[rcx] : rax = MethodTable pointer + *(UINT32 *)p = 0x018b48; p += 3; +#endif + + // mov rax,[rax+vtable offset] : rax = vtable pointer + if (offsetOfIndirection >= 0x80) + { + *(UINT32*)p = 0x00808b48; p += 3; + *(UINT32*)p = offsetOfIndirection; p += 4; + } + else + { + *(UINT32*)p = 0x00408b48; p += 3; + *p++ = (BYTE)offsetOfIndirection; + } + + // jmp qword ptr [rax+slot] + if (offsetAfterIndirection >= 0x80) + { + *(UINT32*)p = 0xa0ff; p += 2; + *(UINT32*)p = offsetAfterIndirection; p += 4; + } + else + { + *(UINT16*)p = 0x60ff; p += 2; + *p++ = (BYTE)offsetAfterIndirection; + } + + // Store the slot value here for convenience. Not a real instruction (unreachable anyways) + *(UINT32*)p = slot; p += 4; + + _ASSERT(p == (BYTE*)stub()->entryPoint() + VTableCallHolder::GetHolderSize(slot)); + _ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot)); +} + VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress) { #ifdef DACCESS_COMPILE @@ -763,6 +870,10 @@ VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE s { stubKind = SK_LOOKUP; } + else if (firstWord == 0x8B48) + { + stubKind = SK_VTABLECALL; + } else { BYTE firstByte = ((BYTE*) stubStartAddress)[0]; diff --git a/src/vm/arm/stubs.cpp b/src/vm/arm/stubs.cpp index 01d8f319d4..d863900eec 100644 --- a/src/vm/arm/stubs.cpp +++ b/src/vm/arm/stubs.cpp @@ -3387,6 +3387,16 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target) } #endif // FEATURE_COMINTEROP +void MovRegImm(BYTE* p, int reg, TADDR imm) +{ + LIMITED_METHOD_CONTRACT; + *(WORD *)(p + 0) = 0xF240; + *(WORD *)(p + 2) = (UINT16)(reg << 8); + *(WORD *)(p + 4) = 0xF2C0; + *(WORD *)(p + 6) = (UINT16)(reg << 8); + PutThumb2Mov32((UINT16 *)p, imm); +} + #ifndef DACCESS_COMPILE #ifndef CROSSGEN_COMPILE @@ -3411,16 +3421,6 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target) ClrFlushInstructionCache(pStart, cbAligned); \ return (PCODE)((TADDR)pStart | THUMB_CODE) -static void MovRegImm(BYTE* p, int reg, TADDR imm) -{ - LIMITED_METHOD_CONTRACT; - *(WORD *)(p + 0) = 0xF240; - *(WORD *)(p + 2) = (UINT16)(reg << 8); - *(WORD *)(p + 4) = 0xF2C0; - *(WORD *)(p + 6) = (UINT16)(reg << 8); - PutThumb2Mov32((UINT16 *)p, imm); -} - PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target) { STANDARD_VM_CONTRACT; diff --git a/src/vm/arm/virtualcallstubcpu.hpp b/src/vm/arm/virtualcallstubcpu.hpp index a1e15d3661..6dc99e5093 100644 --- a/src/vm/arm/virtualcallstubcpu.hpp +++ b/src/vm/arm/virtualcallstubcpu.hpp @@ -55,9 +55,9 @@ get quickly changed to point to another kind of stub. */ struct LookupStub { - inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0] + THUMB_CODE; } - inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } - inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); } + inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0] + THUMB_CODE; } + inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); } private: friend struct LookupHolder; @@ -259,6 +259,87 @@ struct ResolveHolder private: ResolveStub _stub; }; + +/*VTableCallStub************************************************************************************** +These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed +in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the +vtable pointer, and finally jumps to the target method at a given slot in the vtable. +*/ +struct VTableCallStub +{ + friend struct VTableCallHolder; + + inline size_t size() + { + LIMITED_METHOD_CONTRACT; + + BYTE* pStubCode = (BYTE *)this; + + size_t cbSize = 4; // First ldr instruction + + // If we never save r0 to the red zone, we have the short version of the stub + if (*(UINT32*)(&pStubCode[cbSize]) != 0x0c04f84d) + { + return + 4 + // ldr r12,[r0] + 4 + // ldr r12,[r12+offset] + 4 + // ldr r12,[r12+offset] + 2 + // bx r12 + 4; // Slot value (data storage, not a real instruction) + } + + cbSize += 4; // Saving r0 into red zone + cbSize += (*(WORD*)(&pStubCode[cbSize]) == 0xf8dc ? 4 : 12); // Loading of vtable into r12 + cbSize += (*(WORD*)(&pStubCode[cbSize]) == 0xf8dc ? 4 : 12); // Loading of targe address into r12 + + return cbSize + 6 /* Restore r0, bx*/ + 4 /* Slot value */; + } + + inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0] + THUMB_CODE; } + + inline size_t token() + { + LIMITED_METHOD_CONTRACT; + DWORD slot = *(DWORD*)(reinterpret_cast<BYTE*>(this) + size() - 4); + return DispatchToken::CreateDispatchToken(slot).To_SIZE_T(); + } + +private: + BYTE _entryPoint[0]; // Dynamically sized stub. See Initialize() for more details. +}; + +/* VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of +stubs as necessary. */ +struct VTableCallHolder +{ + void Initialize(unsigned slot); + + VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<VTableCallStub *>(this); } + + static size_t GetHolderSize(unsigned slot) + { + STATIC_CONTRACT_WRAPPER; + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + + int indirectionsSize = (offsetOfIndirection > 0xFFF ? 12 : 4) + (offsetAfterIndirection > 0xFFF ? 12 : 4); + if (offsetOfIndirection > 0xFFF || offsetAfterIndirection > 0xFFF) + indirectionsSize += 8; // Save/restore r0 using red zone + + return 6 + indirectionsSize + 4; + } + + static VTableCallHolder* VTableCallHolder::FromVTableCallEntry(PCODE entry) + { + LIMITED_METHOD_CONTRACT; + return (VTableCallHolder*)(entry & ~THUMB_CODE); + } + +private: + // VTableCallStub follows here. It is dynamically sized on allocation because it could + // use short/long instruction sizes for the mov/jmp, depending on the slot value. +}; + #include <poppack.h> @@ -324,6 +405,69 @@ ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry) return resolveHolder; } +void MovRegImm(BYTE* p, int reg, TADDR imm); + +void VTableCallHolder::Initialize(unsigned slot) +{ + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + _ASSERTE(MethodTable::VTableIndir_t::isRelative == false /* TODO: NYI */); + + VTableCallStub* pStub = stub(); + BYTE* p = (BYTE*)(pStub->entryPoint() & ~THUMB_CODE); + + // ldr r12,[r0] : r12 = MethodTable pointer + *(UINT32*)p = 0xc000f8d0; p += 4; + + if (offsetOfIndirection > 0xFFF || offsetAfterIndirection > 0xFFF) + { + // str r0, [sp, #-4]. Save r0 in the red zone + *(UINT32*)p = 0x0c04f84d; p += 4; + } + + if (offsetOfIndirection > 0xFFF) + { + // mov r0, offsetOfIndirection + MovRegImm(p, 0, offsetOfIndirection); p += 8; + // ldr r12, [r12, r0] + *(UINT32*)p = 0xc000f85c; p += 4; + } + else + { + // ldr r12, [r12 + offset] + *(WORD *)p = 0xf8dc; p += 2; + *(WORD *)p = (WORD)(offsetOfIndirection | 0xc000); p += 2; + } + + if (offsetAfterIndirection > 0xFFF) + { + // mov r0, offsetAfterIndirection + MovRegImm(p, 0, offsetAfterIndirection); p += 8; + // ldr r12, [r12, r0] + *(UINT32*)p = 0xc000f85c; p += 4; + } + else + { + // ldr r12, [r12 + offset] + *(WORD *)p = 0xf8dc; p += 2; + *(WORD *)p = (WORD)(offsetAfterIndirection | 0xc000); p += 2; + } + + if (offsetOfIndirection > 0xFFF || offsetAfterIndirection > 0xFFF) + { + // ldr r0, [sp, #-4]. Restore r0 from the red zone. + *(UINT32*)p = 0x0c04f85d; p += 4; + } + + // bx r12 + *(UINT16*)p = 0x4760; p += 2; + + // Store the slot value here for convenience. Not a real instruction (unreachable anyways) + *(UINT32*)p = slot; p += 4; + + _ASSERT(p == (BYTE*)(stub()->entryPoint() & ~THUMB_CODE) + VTableCallHolder::GetHolderSize(slot)); + _ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot)); +} #endif // DACCESS_COMPILE @@ -347,23 +491,35 @@ VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE s WORD firstWord = *((WORD*) pInstr); - //Assuming that RESOLVE_STUB_FIRST_WORD & DISPATCH_STUB_FIRST_WORD have same values - if (firstWord == DISPATCH_STUB_FIRST_WORD) + if (*((UINT32*)pInstr) == 0xc000f8d0) { + // Confirm the thrid word belongs to the vtable stub pattern WORD thirdWord = ((WORD*)pInstr)[2]; - if(thirdWord == 0xf84d) + if (thirdWord == 0xf84d /* Part of str r0, [sp, #-4] */ || + thirdWord == 0xf8dc /* Part of ldr r12, [r12 + offset] */) + stubKind = SK_VTABLECALL; + } + + if (stubKind == SK_UNKNOWN) + { + //Assuming that RESOLVE_STUB_FIRST_WORD & DISPATCH_STUB_FIRST_WORD have same values + if (firstWord == DISPATCH_STUB_FIRST_WORD) { - stubKind = SK_DISPATCH; + WORD thirdWord = ((WORD*)pInstr)[2]; + if (thirdWord == 0xf84d) + { + stubKind = SK_DISPATCH; + } + else if (thirdWord == 0xb460) + { + stubKind = SK_RESOLVE; + } } - else if(thirdWord == 0xb460) + else if (firstWord == 0xf8df) { - stubKind = SK_RESOLVE; + stubKind = SK_LOOKUP; } } - else if (firstWord == 0xf8df) - { - stubKind = SK_LOOKUP; - } } EX_CATCH { diff --git a/src/vm/arm64/virtualcallstubcpu.hpp b/src/vm/arm64/virtualcallstubcpu.hpp index 3f225186a7..c7b3f75e68 100644 --- a/src/vm/arm64/virtualcallstubcpu.hpp +++ b/src/vm/arm64/virtualcallstubcpu.hpp @@ -9,6 +9,7 @@ #define DISPATCH_STUB_FIRST_DWORD 0xf940000d #define RESOLVE_STUB_FIRST_DWORD 0xF940000C +#define VTABLECALL_STUB_FIRST_DWORD 0xF9400009 struct ARM64EncodeHelpers { @@ -386,6 +387,87 @@ private: ResolveStub _stub; }; + +/*VTableCallStub************************************************************************************** +These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed +in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the +vtable pointer, and finally jumps to the target method at a given slot in the vtable. +*/ +struct VTableCallStub +{ + friend struct VTableCallHolder; + + inline size_t size() + { + LIMITED_METHOD_CONTRACT; + + BYTE* pStubCode = (BYTE *)this; + + int numDataSlots = 0; + + size_t cbSize = 4; // First ldr instruction + + for (int i = 0; i < 2; i++) + { + if (((*(DWORD*)(&pStubCode[cbSize])) & 0xFFC003FF) == 0xF9400129) + { + // ldr x9, [x9, #offsetOfIndirection] + cbSize += 4; + } + else + { + // These 2 instructions used when the indirection offset is >= 0x8000 + // ldr w10, [PC, #dataOffset] + // ldr x9, [x9, x10] + numDataSlots++; + cbSize += 8; + } + } + return cbSize + + 4 + // Last 'br x9' instruction + (numDataSlots * 4) + // Data slots containing indirection offset values + 4; // Slot value (data storage, not a real instruction) + } + + inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } + + inline size_t token() + { + LIMITED_METHOD_CONTRACT; + DWORD slot = *(DWORD*)(reinterpret_cast<BYTE*>(this) + size() - 4); + return DispatchToken::CreateDispatchToken(slot).To_SIZE_T(); + } + +private: + BYTE _entryPoint[0]; // Dynamically sized stub. See Initialize() for more details. +}; + +/* VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of +stubs as necessary. */ +struct VTableCallHolder +{ + void Initialize(unsigned slot); + + VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<VTableCallStub *>(this); } + + static size_t GetHolderSize(unsigned slot) + { + STATIC_CONTRACT_WRAPPER; + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + int indirectionsCodeSize = (offsetOfIndirection >= 0x8000 ? 8 : 4) + (offsetAfterIndirection >= 0x8000 ? 8 : 4); + int indirectionsDataSize = (offsetOfIndirection >= 0x8000 ? 4 : 0) + (offsetAfterIndirection >= 0x8000 ? 4 : 0); + return 8 + indirectionsCodeSize + indirectionsDataSize + 4; + } + + static VTableCallHolder* VTableCallHolder::FromVTableCallEntry(PCODE entry) { LIMITED_METHOD_CONTRACT; return (VTableCallHolder*)entry; } + +private: + // VTableCallStub follows here. It is dynamically sized on allocation because it could + // use short/long instruction sizes for LDR, depending on the slot value. +}; + + #ifdef DECLARE_DATA #ifndef DACCESS_COMPILE @@ -403,6 +485,78 @@ ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry) return resolveHolder; } +void VTableCallHolder::Initialize(unsigned slot) +{ + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + _ASSERTE(MethodTable::VTableIndir_t::isRelative == false /* TODO: NYI */); + + int indirectionsCodeSize = (offsetOfIndirection >= 0x8000 ? 8 : 4) + (offsetAfterIndirection >= 0x8000 ? 8 : 4); + int indirectionsDataSize = (offsetOfIndirection >= 0x8000 ? 4 : 0) + (offsetAfterIndirection >= 0x8000 ? 4 : 0); + int codeSize = 8 + indirectionsCodeSize + indirectionsDataSize; + + VTableCallStub* pStub = stub(); + BYTE* p = (BYTE*)pStub->entryPoint(); + + // ldr x9,[x0] : x9 = MethodTable pointer + *(UINT32*)p = 0xF9400009; p += 4; + + // moving offset value wrt PC. Currently points to first indirection offset data. + uint dataOffset = codeSize - indirectionsDataSize - 4; + + if (offsetOfIndirection >= 0x8000) + { + // ldr w10, [PC, #dataOffset] + *(DWORD*)p = 0x1800000a | ((dataOffset >> 2) << 5); p += 4; + // ldr x9, [x9, x10] + *(DWORD*)p = 0xf86a6929; p += 4; + + // move to next indirection offset data + dataOffset = dataOffset - 8 + 4; // subtract 8 as we have moved PC by 8 and add 4 as next data is at 4 bytes from previous data + } + else + { + // ldr x9, [x9, #offsetOfIndirection] + *(DWORD*)p = 0xf9400129 | (((UINT32)offsetOfIndirection >> 3) << 10); + p += 4; + } + + if (offsetAfterIndirection >= 0x8000) + { + // ldr w10, [PC, #dataOffset] + *(DWORD*)p = 0x1800000a | ((dataOffset >> 2) << 5); p += 4; + // ldr x9, [x9, x10] + *(DWORD*)p = 0xf86a6929; p += 4; + } + else + { + // ldr x9, [x9, #offsetAfterIndirection] + *(DWORD*)p = 0xf9400129 | (((UINT32)offsetAfterIndirection >> 3) << 10); + p += 4; + } + + // br x9 + *(UINT32*)p = 0xd61f0120; p += 4; + + // data labels: + if (offsetOfIndirection >= 0x8000) + { + *(UINT32*)p = (UINT32)offsetOfIndirection; + p += 4; + } + if (offsetAfterIndirection >= 0x8000) + { + *(UINT32*)p = (UINT32)offsetAfterIndirection; + p += 4; + } + + // Store the slot value here for convenience. Not a real instruction (unreachable anyways) + // NOTE: Not counted in codeSize above. + *(UINT32*)p = slot; p += 4; + + _ASSERT(p == (BYTE*)stub()->entryPoint() + VTableCallHolder::GetHolderSize(slot)); + _ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot)); +} #endif // DACCESS_COMPILE @@ -435,6 +589,10 @@ VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE s { stubKind = SK_RESOLVE; } + else if (firstDword == VTABLECALL_STUB_FIRST_DWORD) // assembly of first instruction of VTableCallStub : ldr x9, [x0] + { + stubKind = SK_VTABLECALL; + } else if (firstDword == 0x10000089) // assembly of first instruction of LookupStub : adr x9, _resolveWorkerTarget { stubKind = SK_LOOKUP; diff --git a/src/vm/i386/virtualcallstubcpu.hpp b/src/vm/i386/virtualcallstubcpu.hpp index 67737a2d72..3bdae8c3ec 100644 --- a/src/vm/i386/virtualcallstubcpu.hpp +++ b/src/vm/i386/virtualcallstubcpu.hpp @@ -57,9 +57,9 @@ get quickly changed to point to another kind of stub. */ struct LookupStub { - inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } - inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } - inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); } + inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } + inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); } private: friend struct LookupHolder; @@ -357,6 +357,66 @@ private: BYTE pad[(sizeof(void*)-((sizeof(ResolveStub))%sizeof(void*))+offsetof(ResolveStub,_token))%sizeof(void*)]; //fill out DWORD //#endif }; + +/*VTableCallStub************************************************************************************** +These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed +in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the +vtable pointer, and finally jumps to the target method at a given slot in the vtable. +*/ +struct VTableCallStub +{ + friend struct VTableCallHolder; + + inline size_t size() + { + LIMITED_METHOD_CONTRACT; + + BYTE* pStubCode = (BYTE *)this; + + size_t cbSize = 2; // First mov instruction + cbSize += (pStubCode[cbSize + 1] == 0x80 ? 6 : 3); // Either 8B 80 or 8B 40: mov eax,[eax+offset] + cbSize += (pStubCode[cbSize + 1] == 0xa0 ? 6 : 3); // Either FF A0 or FF 60: jmp dword ptr [eax+slot] + cbSize += 4; // Slot value (data storage, not a real instruction) + + return cbSize; + } + + inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } + + inline size_t token() + { + LIMITED_METHOD_CONTRACT; + DWORD slot = *(DWORD*)(reinterpret_cast<BYTE*>(this) + size() - 4); + return DispatchToken::CreateDispatchToken(slot).To_SIZE_T(); + } + +private: + BYTE _entryPoint[0]; // Dynamically sized stub. See Initialize() for more details. +}; + +/* VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of +stubs as necessary. */ +struct VTableCallHolder +{ + void Initialize(unsigned slot); + + VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<VTableCallStub *>(this); } + + static size_t GetHolderSize(unsigned slot) + { + STATIC_CONTRACT_WRAPPER; + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + return 2 + (offsetOfIndirection >= 0x80 ? 6 : 3) + (offsetAfterIndirection >= 0x80 ? 6 : 3) + 4; + } + + static VTableCallHolder* VTableCallHolder::FromVTableCallEntry(PCODE entry) { LIMITED_METHOD_CONTRACT; return (VTableCallHolder*)entry; } + +private: + // VTableCallStub follows here. It is dynamically sized on allocation because it could + // use short/long instruction sizes for the mov/jmp, depending on the slot value. +}; + #include <poppack.h> @@ -895,6 +955,49 @@ ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry) return resolveHolder; } +void VTableCallHolder::Initialize(unsigned slot) +{ + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + _ASSERTE(MethodTable::VTableIndir_t::isRelative == false /* TODO: NYI */); + + VTableCallStub* pStub = stub(); + BYTE* p = (BYTE*)pStub->entryPoint(); + + // mov eax,[ecx] : eax = MethodTable pointer + *(UINT16*)p = 0x018b; p += 2; + + // mov eax,[eax+vtable offset] : eax = vtable pointer + if (offsetOfIndirection >= 0x80) + { + *(UINT16*)p = 0x808b; p += 2; + *(UINT32*)p = offsetOfIndirection; p += 4; + } + else + { + *(UINT16*)p = 0x408b; p += 2; + *p++ = (BYTE)offsetOfIndirection; + } + + // jmp dword ptr [eax+slot] + if (offsetAfterIndirection >= 0x80) + { + *(UINT16*)p = 0xa0ff; p += 2; + *(UINT32*)p = offsetAfterIndirection; p += 4; + } + else + { + *(UINT16*)p = 0x60ff; p += 2; + *p++ = (BYTE)offsetAfterIndirection; + } + + // Store the slot value here for convenience. Not a real instruction (unreachable anyways) + *(UINT32*)p = slot; p += 4; + + _ASSERT(p == (BYTE*)stub()->entryPoint() + VTableCallHolder::GetHolderSize(slot)); + _ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot)); +} + #endif // DACCESS_COMPILE VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress) @@ -932,6 +1035,10 @@ VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE s { stubKind = SK_RESOLVE; } + else if (firstWord == 0x018b) + { + stubKind = SK_VTABLECALL; + } else { BYTE firstByte = ((BYTE*) stubStartAddress)[0]; diff --git a/src/vm/jitinterface.cpp b/src/vm/jitinterface.cpp index efaa340151..4423c98f8a 100644 --- a/src/vm/jitinterface.cpp +++ b/src/vm/jitinterface.cpp @@ -5155,6 +5155,8 @@ void CEEInfo::getCallInfo( INDEBUG(memset(pResult, 0xCC, sizeof(*pResult))); + pResult->stubLookup.lookupKind.needsRuntimeLookup = false; + MethodDesc* pMD = (MethodDesc *)pResolvedToken->hMethod; TypeHandle th(pResolvedToken->hClass); @@ -5460,13 +5462,18 @@ void CEEInfo::getCallInfo( pResult->nullInstanceCheck = TRUE; } // Non-interface dispatches go through the vtable. - // We'll special virtual calls to target methods in the corelib assembly when compiling in R2R mode and generate fragile-NI-like callsites for improved performance. We - // can do that because today we'll always service the corelib assembly and the runtime in one bundle. Any caller in the corelib version bubble can benefit from this - // performance optimization. - else if (!pTargetMD->IsInterface() && (!IsReadyToRunCompilation() || CallerAndCalleeInSystemVersionBubble((MethodDesc*)callerHandle, pTargetMD))) + else if (!pTargetMD->IsInterface()) { pResult->kind = CORINFO_VIRTUALCALL_VTABLE; pResult->nullInstanceCheck = TRUE; + + // We'll special virtual calls to target methods in the corelib assembly when compiling in R2R mode, and generate fragile-NI-like callsites for improved performance. We + // can do that because today we'll always service the corelib assembly and the runtime in one bundle. Any caller in the corelib version bubble can benefit from this + // performance optimization. + if (IsReadyToRunCompilation() && !CallerAndCalleeInSystemVersionBubble((MethodDesc*)callerHandle, pTargetMD)) + { + pResult->kind = CORINFO_VIRTUALCALL_STUB; + } } else { @@ -5504,8 +5511,6 @@ void CEEInfo::getCallInfo( } else { - pResult->stubLookup.lookupKind.needsRuntimeLookup = false; - BYTE * indcell = NULL; if (!(flags & CORINFO_CALLINFO_KINDONLY) && !isVerifyOnly()) diff --git a/src/vm/loaderallocator.cpp b/src/vm/loaderallocator.cpp index 7033abf8f2..acb9bcacf3 100644 --- a/src/vm/loaderallocator.cpp +++ b/src/vm/loaderallocator.cpp @@ -1027,7 +1027,8 @@ void LoaderAllocator::ActivateManagedTracking() #endif // !CROSSGEN_COMPILE -// We don't actually allocate a low frequency heap for collectible types +// We don't actually allocate a low frequency heap for collectible types. +// This is carefully tuned to sum up to 16 pages to reduce waste. #define COLLECTIBLE_LOW_FREQUENCY_HEAP_SIZE (0 * GetOsPageSize()) #define COLLECTIBLE_HIGH_FREQUENCY_HEAP_SIZE (3 * GetOsPageSize()) #define COLLECTIBLE_STUB_HEAP_SIZE GetOsPageSize() diff --git a/src/vm/prestub.cpp b/src/vm/prestub.cpp index b614e341a2..78e9f22e17 100644 --- a/src/vm/prestub.cpp +++ b/src/vm/prestub.cpp @@ -2286,20 +2286,24 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl // Get the stub manager for this module VirtualCallStubManager *pMgr = pModule->GetLoaderAllocator()->GetVirtualCallStubManager(); - DispatchToken token; - if (pMT->IsInterface()) - token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); - else - token = DispatchToken::CreateDispatchToken(slot); - OBJECTREF *protectedObj = pEMFrame->GetThisPtr(); _ASSERTE(protectedObj != NULL); if (*protectedObj == NULL) { COMPlusThrow(kNullReferenceException); } - - StubCallSite callSite(pIndirection, pEMFrame->GetReturnAddress()); - pCode = pMgr->ResolveWorker(&callSite, protectedObj, token, VirtualCallStubManager::SK_LOOKUP); + + DispatchToken token; + if (pMT->IsInterface() || MethodTable::VTableIndir_t::isRelative) + { + token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); + StubCallSite callSite(pIndirection, pEMFrame->GetReturnAddress()); + pCode = pMgr->ResolveWorker(&callSite, protectedObj, token, VirtualCallStubManager::SK_LOOKUP); + } + else + { + pCode = pMgr->GetVTableCallStub(slot); + *EnsureWritableExecutablePages((TADDR *)pIndirection) = pCode; + } _ASSERTE(pCode != NULL); } else diff --git a/src/vm/virtualcallstub.cpp b/src/vm/virtualcallstub.cpp index 657200b2ee..78a37483b5 100644 --- a/src/vm/virtualcallstub.cpp +++ b/src/vm/virtualcallstub.cpp @@ -36,6 +36,7 @@ UINT32 g_site_write_mono = 0; //# of call site backpatch writes to poi UINT32 g_stub_lookup_counter = 0; //# of lookup stubs UINT32 g_stub_mono_counter = 0; //# of dispatch stubs UINT32 g_stub_poly_counter = 0; //# of resolve stubs +UINT32 g_stub_vtable_counter = 0; //# of vtable call stubs UINT32 g_stub_space = 0; //# of bytes of stubs UINT32 g_reclaim_counter = 0; //# of times a ReclaimAll was performed @@ -239,6 +240,8 @@ void VirtualCallStubManager::LoggingDump() WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); sprintf_s(szPrintStr, COUNTOF(szPrintStr), OUTPUT_FORMAT_INT, "stub_poly_counter", g_stub_poly_counter); WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); + sprintf_s(szPrintStr, COUNTOF(szPrintStr), OUTPUT_FORMAT_INT, "stub_vtable_counter", g_stub_vtable_counter); + WriteFile(g_hStubLogFile, szPrintStr, (DWORD)strlen(szPrintStr), &dwWriteByte, NULL); sprintf_s(szPrintStr, COUNTOF(szPrintStr), OUTPUT_FORMAT_INT, "stub_space", g_stub_space); WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); @@ -501,6 +504,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA NewHolder<BucketTable> resolvers_holder(new BucketTable(CALL_STUB_MIN_BUCKETS)); NewHolder<BucketTable> dispatchers_holder(new BucketTable(CALL_STUB_MIN_BUCKETS*2)); NewHolder<BucketTable> lookups_holder(new BucketTable(CALL_STUB_MIN_BUCKETS)); + NewHolder<BucketTable> vtableCallers_holder(new BucketTable(CALL_STUB_MIN_BUCKETS)); NewHolder<BucketTable> cache_entries_holder(new BucketTable(CALL_STUB_MIN_BUCKETS)); // @@ -521,6 +525,8 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA DWORD dispatch_heap_commit_size; DWORD resolve_heap_reserve_size; DWORD resolve_heap_commit_size; + DWORD vtable_heap_reserve_size; + DWORD vtable_heap_commit_size; // // Setup an expected number of items to commit and reserve @@ -538,6 +544,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA lookup_heap_commit_size = 24; lookup_heap_reserve_size = 250; dispatch_heap_commit_size = 24; dispatch_heap_reserve_size = 600; resolve_heap_commit_size = 24; resolve_heap_reserve_size = 300; + vtable_heap_commit_size = 24; vtable_heap_reserve_size = 600; } else if (parentDomain->IsSharedDomain()) { @@ -550,6 +557,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA lookup_heap_commit_size = 24; lookup_heap_reserve_size = 200; dispatch_heap_commit_size = 24; dispatch_heap_reserve_size = 450; resolve_heap_commit_size = 24; resolve_heap_reserve_size = 200; + vtable_heap_commit_size = 24; vtable_heap_reserve_size = 450; } else { @@ -559,6 +567,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA lookup_heap_commit_size = 8; lookup_heap_reserve_size = 8; dispatch_heap_commit_size = 8; dispatch_heap_reserve_size = 8; resolve_heap_commit_size = 8; resolve_heap_reserve_size = 8; + vtable_heap_commit_size = 8; vtable_heap_reserve_size = 8; } #ifdef _WIN64 @@ -571,7 +580,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA #endif // - // Convert the number of items into a size in bytes to commit abd reserve + // Convert the number of items into a size in bytes to commit and reserve // indcell_heap_reserve_size *= sizeof(void *); indcell_heap_commit_size *= sizeof(void *); @@ -593,6 +602,9 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA resolve_heap_reserve_size *= sizeof(ResolveHolder); resolve_heap_commit_size *= sizeof(ResolveHolder); + vtable_heap_reserve_size *= static_cast<DWORD>(VTableCallHolder::GetHolderSize(0)); + vtable_heap_commit_size *= static_cast<DWORD>(VTableCallHolder::GetHolderSize(0)); + // // Align up all of the commit and reserve sizes // @@ -611,6 +623,9 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA resolve_heap_reserve_size = (DWORD) ALIGN_UP(resolve_heap_reserve_size, GetOsPageSize()); resolve_heap_commit_size = (DWORD) ALIGN_UP(resolve_heap_commit_size, GetOsPageSize()); + vtable_heap_reserve_size = (DWORD) ALIGN_UP(vtable_heap_reserve_size, GetOsPageSize()); + vtable_heap_commit_size = (DWORD) ALIGN_UP(vtable_heap_commit_size, GetOsPageSize()); + BYTE * initReservedMem = NULL; if (!isCollectible) @@ -619,7 +634,8 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA cache_entry_heap_reserve_size + lookup_heap_reserve_size + dispatch_heap_reserve_size + - resolve_heap_reserve_size; + resolve_heap_reserve_size + + vtable_heap_reserve_size; DWORD dwTotalReserveMemSize = (DWORD) ALIGN_UP(dwTotalReserveMemSizeCalc, VIRTUAL_ALLOC_RESERVE_GRANULARITY); @@ -629,13 +645,14 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA if (dwWastedReserveMemSize != 0) { DWORD cWastedPages = dwWastedReserveMemSize / GetOsPageSize(); - DWORD cPagesPerHeap = cWastedPages / 5; - DWORD cPagesRemainder = cWastedPages % 5; // We'll throw this at the resolve heap + DWORD cPagesPerHeap = cWastedPages / 6; + DWORD cPagesRemainder = cWastedPages % 6; // We'll throw this at the resolve heap indcell_heap_reserve_size += cPagesPerHeap * GetOsPageSize(); cache_entry_heap_reserve_size += cPagesPerHeap * GetOsPageSize(); lookup_heap_reserve_size += cPagesPerHeap * GetOsPageSize(); dispatch_heap_reserve_size += cPagesPerHeap * GetOsPageSize(); + vtable_heap_reserve_size += cPagesPerHeap * GetOsPageSize(); resolve_heap_reserve_size += cPagesPerHeap * GetOsPageSize(); resolve_heap_reserve_size += cPagesRemainder * GetOsPageSize(); } @@ -644,7 +661,8 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA cache_entry_heap_reserve_size + lookup_heap_reserve_size + dispatch_heap_reserve_size + - resolve_heap_reserve_size) == + resolve_heap_reserve_size + + vtable_heap_reserve_size) == dwTotalReserveMemSize); } @@ -672,12 +690,20 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA resolve_heap_reserve_size = GetOsPageSize(); resolve_heap_commit_size = GetOsPageSize(); + // Heap for the collectible case is carefully tuned to sum up to 16 pages. Today, we only use the + // vtable jump stubs in the R2R scenario, which is unlikely to be loaded in the collectible context, + // so we'll keep the heap numbers at zero for now. If we ever use vtable stubs in the collectible + // scenario, we'll just allocate the memory on demand. + vtable_heap_reserve_size = 0; + vtable_heap_commit_size = 0; + #ifdef _DEBUG DWORD dwTotalReserveMemSizeCalc = indcell_heap_reserve_size + cache_entry_heap_reserve_size + lookup_heap_reserve_size + dispatch_heap_reserve_size + - resolve_heap_reserve_size; + resolve_heap_reserve_size + + vtable_heap_reserve_size; #endif DWORD dwActualVSDSize = 0; @@ -756,6 +782,19 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA initReservedMem += resolve_heap_reserve_size; + // Hot memory, Writable, Execute, write exactly once + NewHolder<LoaderHeap> vtable_heap_holder( + new LoaderHeap(vtable_heap_reserve_size, vtable_heap_commit_size, + initReservedMem, vtable_heap_reserve_size, +#ifdef ENABLE_PERF_COUNTERS + &(GetPerfCounters().m_Loading.cbLoaderHeapSize), +#else + NULL, +#endif + &vtable_rangeList, TRUE)); + + initReservedMem += vtable_heap_reserve_size; + // Allocate the initial counter block NewHolder<counter_block> m_counters_holder(new counter_block); @@ -767,12 +806,13 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA lookup_heap = lookup_heap_holder; lookup_heap_holder.SuppressRelease(); dispatch_heap = dispatch_heap_holder; dispatch_heap_holder.SuppressRelease(); resolve_heap = resolve_heap_holder; resolve_heap_holder.SuppressRelease(); + vtable_heap = vtable_heap_holder; vtable_heap_holder.SuppressRelease(); cache_entry_heap = cache_entry_heap_holder; cache_entry_heap_holder.SuppressRelease(); resolvers = resolvers_holder; resolvers_holder.SuppressRelease(); dispatchers = dispatchers_holder; dispatchers_holder.SuppressRelease(); lookups = lookups_holder; lookups_holder.SuppressRelease(); - + vtableCallers = vtableCallers_holder; vtableCallers_holder.SuppressRelease(); cache_entries = cache_entries_holder; cache_entries_holder.SuppressRelease(); m_counters = m_counters_holder; m_counters_holder.SuppressRelease(); @@ -832,11 +872,13 @@ VirtualCallStubManager::~VirtualCallStubManager() if (lookup_heap) { delete lookup_heap; lookup_heap = NULL;} if (dispatch_heap) { delete dispatch_heap; dispatch_heap = NULL;} if (resolve_heap) { delete resolve_heap; resolve_heap = NULL;} + if (vtable_heap) { delete vtable_heap; vtable_heap = NULL;} if (cache_entry_heap) { delete cache_entry_heap; cache_entry_heap = NULL;} if (resolvers) { delete resolvers; resolvers = NULL;} if (dispatchers) { delete dispatchers; dispatchers = NULL;} if (lookups) { delete lookups; lookups = NULL;} + if (vtableCallers) { delete vtableCallers; vtableCallers = NULL;} if (cache_entries) { delete cache_entries; cache_entries = NULL;} // Now get rid of the memory taken by the counter_blocks @@ -1075,6 +1117,8 @@ BOOL VirtualCallStubManager::DoTraceStub(PCODE stubStartAddress, TraceDestinatio { LIMITED_METHOD_CONTRACT; + LOG((LF_CORDB, LL_EVERYTHING, "VirtualCallStubManager::DoTraceStub called\n")); + _ASSERTE(CheckIsStub_Internal(stubStartAddress)); #ifdef FEATURE_PREJIT @@ -1191,6 +1235,68 @@ PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, DWORD slot) RETURN (stub); } +PCODE VirtualCallStubManager::GetVTableCallStub(DWORD slot) +{ + CONTRACT(PCODE) { + THROWS; + GC_TRIGGERS; + MODE_ANY; + INJECT_FAULT(COMPlusThrowOM();); + PRECONDITION(!MethodTable::VTableIndir_t::isRelative /* Not yet supported */); + POSTCONDITION(RETVAL != NULL); + } CONTRACT_END; + + GCX_COOP(); // This is necessary for BucketTable synchronization + + PCODE stub = CALL_STUB_EMPTY_ENTRY; + + VTableCallEntry entry; + Prober probe(&entry); + if (vtableCallers->SetUpProber(DispatchToken::CreateDispatchToken(slot).To_SIZE_T(), 0, &probe)) + { + if ((stub = (PCODE)(vtableCallers->Find(&probe))) == CALL_STUB_EMPTY_ENTRY) + { + VTableCallHolder *pHolder = GenerateVTableCallStub(slot); + stub = (PCODE)(vtableCallers->Add((size_t)(pHolder->stub()->entryPoint()), &probe)); + } + } + + _ASSERTE(stub != CALL_STUB_EMPTY_ENTRY); + RETURN(stub); +} + +VTableCallHolder* VirtualCallStubManager::GenerateVTableCallStub(DWORD slot) +{ + CONTRACT(VTableCallHolder*) { + THROWS; + GC_TRIGGERS; + MODE_ANY; + INJECT_FAULT(COMPlusThrowOM();); + PRECONDITION(!MethodTable::VTableIndir_t::isRelative /* Not yet supported */); + POSTCONDITION(RETVAL != NULL); + } CONTRACT_END; + + //allocate from the requisite heap and copy the template over it. + VTableCallHolder * pHolder = (VTableCallHolder*)(void*)vtable_heap->AllocAlignedMem(VTableCallHolder::GetHolderSize(slot), CODE_SIZE_ALIGN); + + pHolder->Initialize(slot); + ClrFlushInstructionCache(pHolder->stub(), pHolder->stub()->size()); + + AddToCollectibleVSDRangeList(pHolder); + + //incr our counters + stats.stub_vtable_counter++; + stats.stub_space += (UINT32)pHolder->stub()->size(); + LOG((LF_STUBS, LL_INFO10000, "GenerateVTableCallStub for slot " FMT_ADDR "at" FMT_ADDR "\n", + DBG_ADDR(slot), DBG_ADDR(pHolder->stub()))); + +#ifdef FEATURE_PERFMAP + PerfMap::LogStubs(__FUNCTION__, "GenerateVTableCallStub", (PCODE)pHolder->stub(), pHolder->stub()->size()); +#endif + + RETURN(pHolder); +} + #ifdef FEATURE_PREJIT extern "C" PCODE STDCALL StubDispatchFixupWorker(TransitionBlock * pTransitionBlock, TADDR siteAddrForRegisterIndirect, @@ -1457,6 +1563,12 @@ size_t VirtualCallStubManager::GetTokenFromStubQuick(VirtualCallStubManager * pM LookupHolder * lookupHolder = LookupHolder::FromLookupEntry(stub); return lookupHolder->stub()->token(); } + else if (kind == SK_VTABLECALL) + { + _ASSERTE(pMgr->isVTableCallStub(stub)); + VTableCallStub * vtableStub = (VTableCallStub *)PCODEToPINSTR(stub); + return vtableStub->token(); + } _ASSERTE(!"Should not get here."); @@ -3023,12 +3135,14 @@ void VirtualCallStubManager::LogStats() resolvers->LogStats(); dispatchers->LogStats(); lookups->LogStats(); + vtableCallers->LogStats(); cache_entries->LogStats(); g_site_counter += stats.site_counter; g_stub_lookup_counter += stats.stub_lookup_counter; g_stub_poly_counter += stats.stub_poly_counter; g_stub_mono_counter += stats.stub_mono_counter; + g_stub_vtable_counter += stats.stub_vtable_counter; g_site_write += stats.site_write; g_site_write_poly += stats.site_write_poly; g_site_write_mono += stats.site_write_mono; @@ -3043,6 +3157,7 @@ void VirtualCallStubManager::LogStats() stats.stub_lookup_counter = 0; stats.stub_poly_counter = 0; stats.stub_mono_counter = 0; + stats.stub_vtable_counter = 0; stats.site_write = 0; stats.site_write_poly = 0; stats.site_write_mono = 0; @@ -3369,6 +3484,7 @@ void BucketTable::Reclaim() // dispatchers token the expected MT // resolver token the stub calling convention // cache_entries token the expected method table +// vtableCallers token unused (zero) // BOOL BucketTable::SetUpProber(size_t keyA, size_t keyB, Prober *prober) { diff --git a/src/vm/virtualcallstub.h b/src/vm/virtualcallstub.h index b8984eab3c..1d22e467c4 100644 --- a/src/vm/virtualcallstub.h +++ b/src/vm/virtualcallstub.h @@ -38,6 +38,7 @@ class VirtualCallStubManagerManager; struct LookupHolder; struct DispatchHolder; struct ResolveHolder; +struct VTableCallHolder; ///////////////////////////////////////////////////////////////////////////////////// // Forward function declarations @@ -238,6 +239,9 @@ public: PCODE GetCallStub(TypeHandle ownerType, MethodDesc *pMD); PCODE GetCallStub(TypeHandle ownerType, DWORD slot); + // Stubs for vtable-based virtual calls with no lookups + PCODE GetVTableCallStub(DWORD slot); + // Generate an fresh indirection cell. BYTE* GenerateStubIndirection(PCODE stub, BOOL fUseRecycledCell = FALSE); @@ -272,6 +276,7 @@ public: resolve_rangeList(), dispatch_rangeList(), cache_entry_rangeList(), + vtable_rangeList(), parentDomain(NULL), isCollectible(false), m_initialReservedMemForHeaps(NULL), @@ -308,6 +313,7 @@ public: SK_LOOKUP, // Lookup Stubs are SLOW stubs that simply call into the runtime to do all work. SK_DISPATCH, // Dispatch Stubs have a fast check for one type otherwise jumps to runtime. Works for monomorphic sites SK_RESOLVE, // Resolve Stubs do a hash lookup before fallling back to the runtime. Works for polymorphic sites. + SK_VTABLECALL, // Stub that jumps to a target method using vtable-based indirections. Works for non-interface calls. SK_BREAKPOINT }; @@ -346,6 +352,11 @@ public: if (isResolvingStub(stubStartAddress)) return SK_RESOLVE; } + else if (predictedKind == SK_VTABLECALL) + { + if (isVTableCallStub(stubStartAddress)) + return SK_VTABLECALL; + } // This is the slow case. If the predict returned SK_UNKNOWN, SK_BREAKPOINT, // or the predict was found to be incorrect when checked against the RangeLists @@ -356,6 +367,8 @@ public: return SK_LOOKUP; else if (isResolvingStub(stubStartAddress)) return SK_RESOLVE; + else if (isVTableCallStub(stubStartAddress)) + return SK_VTABLECALL; return SK_UNKNOWN; } @@ -392,6 +405,14 @@ public: return GetLookupRangeList()->IsInRange(stubStartAddress); } + BOOL isVTableCallStub(PCODE stubStartAddress) + { + WRAPPER_NO_CONTRACT; + SUPPORTS_DAC; + + return GetVTableCallRangeList()->IsInRange(stubStartAddress); + } + static BOOL isDispatchingStubStatic(PCODE addr) { WRAPPER_NO_CONTRACT; @@ -416,11 +437,20 @@ public: return stubKind == SK_LOOKUP; } + static BOOL isVtableCallStubStatic(PCODE addr) + { + WRAPPER_NO_CONTRACT; + StubKind stubKind; + FindStubManager(addr, &stubKind); + return stubKind == SK_VTABLECALL; + } + //use range lists to track the chunks of memory that are part of each heap LockedRangeList lookup_rangeList; LockedRangeList resolve_rangeList; LockedRangeList dispatch_rangeList; LockedRangeList cache_entry_rangeList; + LockedRangeList vtable_rangeList; // Get dac-ized pointers to rangelist. RangeList* GetLookupRangeList() @@ -450,6 +480,12 @@ public: TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, cache_entry_rangeList); return PTR_RangeList(addr); } + RangeList* GetVTableCallRangeList() + { + SUPPORTS_DAC; + TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, vtable_rangeList); + return PTR_RangeList(addr); + } private: @@ -475,6 +511,8 @@ private: LookupHolder *GenerateLookupStub(PCODE addrOfResolver, size_t dispatchToken); + VTableCallHolder* GenerateVTableCallStub(DWORD slot); + template <typename STUB_HOLDER> void AddToCollectibleVSDRangeList(STUB_HOLDER *holder) { @@ -687,6 +725,7 @@ private: PTR_LoaderHeap lookup_heap; // lookup stubs go here PTR_LoaderHeap dispatch_heap; // dispatch stubs go here PTR_LoaderHeap resolve_heap; // resolve stubs go here + PTR_LoaderHeap vtable_heap; // vtable-based jump stubs go here #ifdef _TARGET_AMD64_ // When we layout the stub heaps, we put them close together in a sequential order @@ -707,6 +746,7 @@ private: BucketTable * cache_entries; // hash table of dispatch token/target structs for dispatch cache BucketTable * dispatchers; // hash table of dispatching stubs keyed by tokens/actualtype BucketTable * resolvers; // hash table of resolvers keyed by tokens/resolverstub + BucketTable * vtableCallers; // hash table of vtable call stubs keyed by slot values // This structure is used to keep track of the fail counters. // We only need one fail counter per ResolveStub, @@ -758,6 +798,7 @@ public: UINT32 stub_lookup_counter; //# of lookup stubs UINT32 stub_poly_counter; //# of resolve stubs UINT32 stub_mono_counter; //# of dispatch stubs + UINT32 stub_vtable_counter; //# of vtable call stubs UINT32 site_write; //# of call site backpatch writes UINT32 site_write_poly; //# of call site backpatch writes to point to resolve stubs UINT32 site_write_mono; //# of call site backpatch writes to point to dispatch stubs @@ -1061,6 +1102,44 @@ private: }; #endif // USES_LOOKUP_STUBS +class VTableCallEntry : public Entry +{ +public: + //Creates an entry that wraps vtable call stub + VTableCallEntry(size_t s) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(VirtualCallStubManager::isVtableCallStubStatic((PCODE)s)); + stub = (VTableCallStub*)s; + } + + //default contructor to allow stack and inline allocation of vtable call entries + VTableCallEntry() { LIMITED_METHOD_CONTRACT; stub = NULL; } + + //implementations of abstract class Entry + BOOL Equals(size_t keyA, size_t keyB) + { + WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); + } + + size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); } + size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; } + + void SetContents(size_t contents) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(VirtualCallStubManager::isVtableCallStubStatic((PCODE)contents)); + stub = VTableCallHolder::FromVTableCallEntry((PCODE)contents)->stub(); + } + + //extract the token of the underlying lookup stub + + inline size_t Token() { LIMITED_METHOD_CONTRACT; return stub ? stub->token() : 0; } + +private: + VTableCallStub* stub; //the stub the entry wrapping +}; + /********************************************************************************************** ResolveCacheEntry wraps a ResolveCacheElem and provides lookup functionality for entries that were created that may be added to the ResolveCache diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp index 8efeedded6..e0acd819c9 100644 --- a/src/zap/zapinfo.cpp +++ b/src/zap/zapinfo.cpp @@ -2145,29 +2145,28 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, return; } -#ifdef FEATURE_READYTORUN_COMPILER if (IsReadyToRunCompilation()) { ZapImport * pImport = m_pImage->GetImportTable()->GetStubDispatchCell(pResolvedToken); pResult->stubLookup.constLookup.accessType = IAT_PVALUE; pResult->stubLookup.constLookup.addr = pImport; - break; } -#endif - - CORINFO_CLASS_HANDLE calleeOwner = pResolvedToken->hClass; - CORINFO_METHOD_HANDLE callee = pResolvedToken->hMethod; - _ASSERTE(callee == pResult->hMethod); + else + { - // - // Create the indirection cell - // - pTarget = m_pImage->GetImportTable()->GetStubDispatchCell(calleeOwner, callee); + CORINFO_CLASS_HANDLE calleeOwner = pResolvedToken->hClass; + CORINFO_METHOD_HANDLE callee = pResolvedToken->hMethod; + _ASSERTE(callee == pResult->hMethod); - pResult->stubLookup.constLookup.accessType = IAT_PVALUE; + // + // Create the indirection cell + // + pTarget = m_pImage->GetImportTable()->GetStubDispatchCell(calleeOwner, callee); - pResult->stubLookup.constLookup.addr = pTarget; + pResult->stubLookup.constLookup.accessType = IAT_PVALUE; + pResult->stubLookup.constLookup.addr = pTarget; + } } break; @@ -2183,7 +2182,6 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, return; case CORINFO_CALL: -#ifdef FEATURE_READYTORUN_COMPILER if (IsReadyToRunCompilation()) { // Constrained token is not interesting with this transforms @@ -2207,12 +2205,11 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, pResult->codePointerLookup.constLookup.accessType = IAT_PVALUE; pResult->codePointerLookup.constLookup.addr = pImport; } -#endif break; case CORINFO_VIRTUALCALL_VTABLE: - // READYTORUN: FUTURE: support for vtable-based calls (currently, only calls within the CoreLib version bubble is supported, and the codegen we generate - // is the same as the fragile NI (because CoreLib and the runtime will always be updated together anyways - this is a special case) + // Only calls within the CoreLib version bubble support fragile NI codegen with vtable based calls, for better performance (because + // CoreLib and the runtime will always be updated together anyways - this is a special case) break; case CORINFO_VIRTUALCALL_LDVIRTFTN: @@ -2240,7 +2237,6 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, break; } -#ifdef FEATURE_READYTORUN_COMPILER if (IsReadyToRunCompilation() && pResult->sig.hasTypeArg()) { if (pResult->exactContextNeedsRuntimeLookup) @@ -2272,8 +2268,8 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, AppendConditionalImport(pImport); } } -#endif } + BOOL ZapInfo::canAccessFamily(CORINFO_METHOD_HANDLE hCaller, CORINFO_CLASS_HANDLE hInstanceType) { @@ -2285,7 +2281,6 @@ BOOL ZapInfo::isRIDClassDomainID (CORINFO_CLASS_HANDLE cls) return m_pEEJitInfo->isRIDClassDomainID(cls); } - unsigned ZapInfo::getClassDomainID (CORINFO_CLASS_HANDLE cls, void **ppIndirection) { _ASSERTE(ppIndirection != NULL); |