// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. /****************************************************************************/ /* gccover.cpp */ /****************************************************************************/ /* This file holds code that is designed to test GC pointer tracking in fully interruptible code. We basically do a GC everywhere we can in jitted code */ /****************************************************************************/ #include "common.h" #ifdef HAVE_GCCOVER #pragma warning(disable:4663) #include "eeconfig.h" #include "gms.h" #include "utsem.h" #include "gccover.h" #include "virtualcallstub.h" #include "threadsuspend.h" #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM_) #include "gcinfodecoder.h" #endif #include "disassembler.h" /****************************************************************************/ MethodDesc* AsMethodDesc(size_t addr); static SLOT getTargetOfCall(SLOT instrPtr, PCONTEXT regs, SLOT*nextInstr); bool isCallToStopForGCJitHelper(SLOT instrPtr); #if defined(_TARGET_ARM_) || defined(_TARGET_ARM64_) static void replaceSafePointInstructionWithGcStressInstr(UINT32 safePointOffset, LPVOID codeStart); static bool replaceInterruptibleRangesWithGcStressInstr (UINT32 startOffset, UINT32 stopOffset, LPVOID codeStart); #endif static MethodDesc* getTargetMethodDesc(PCODE target) { MethodDesc* targetMD = ExecutionManager::GetCodeMethodDesc(target); if (targetMD == 0) { VirtualCallStubManager::StubKind vsdStubKind = VirtualCallStubManager::SK_UNKNOWN; VirtualCallStubManager *pVSDStubManager = VirtualCallStubManager::FindStubManager(target, &vsdStubKind); if (vsdStubKind != VirtualCallStubManager::SK_BREAKPOINT && vsdStubKind != VirtualCallStubManager::SK_UNKNOWN) { DispatchToken token = VirtualCallStubManager::GetTokenFromStubQuick(pVSDStubManager, target, vsdStubKind); _ASSERTE(token.IsValid()); targetMD = VirtualCallStubManager::GetInterfaceMethodDescFromToken(token); } else { targetMD = AsMethodDesc(size_t(MethodDesc::GetMethodDescFromStubAddr(target, TRUE))); } } return targetMD; } void SetupAndSprinkleBreakpoints( MethodDesc * pMD, EECodeInfo * pCodeInfo, IJitManager::MethodRegionInfo methodRegionInfo, BOOL fZapped ) { // Allocate room for the GCCoverageInfo and copy of the method instructions size_t memSize = sizeof(GCCoverageInfo) + methodRegionInfo.hotSize + methodRegionInfo.coldSize; GCCoverageInfo* gcCover = (GCCoverageInfo*)(void*) pMD->GetLoaderAllocatorForCode()->GetHighFrequencyHeap()->AllocAlignedMem(memSize, CODE_SIZE_ALIGN); memset(gcCover, 0, sizeof(GCCoverageInfo)); gcCover->methodRegion = methodRegionInfo; gcCover->codeMan = pCodeInfo->GetCodeManager(); gcCover->gcInfoToken = pCodeInfo->GetGCInfoToken(); gcCover->callerThread = 0; gcCover->doingEpilogChecks = true; gcCover->lastMD = pMD; /* pass pMD to SprinkleBreakpoints */ gcCover->SprinkleBreakpoints(gcCover->savedCode, gcCover->methodRegion.hotStartAddress, gcCover->methodRegion.hotSize, 0, fZapped); // This is not required for ARM* as the above call does the work for both hot & cold regions #if !defined(_TARGET_ARM_) && !defined(_TARGET_ARM64_) if (gcCover->methodRegion.coldSize != 0) { gcCover->SprinkleBreakpoints(gcCover->savedCode + gcCover->methodRegion.hotSize, gcCover->methodRegion.coldStartAddress, gcCover->methodRegion.coldSize, gcCover->methodRegion.hotSize, fZapped); } #endif gcCover->lastMD = NULL; /* clear lastMD */ _ASSERTE(!pMD->m_GcCover); *EnsureWritablePages(&pMD->m_GcCover) = gcCover; } void SetupAndSprinkleBreakpointsForJittedMethod(MethodDesc * pMD, PCODE codeStart ) { EECodeInfo codeInfo(codeStart); _ASSERTE(codeInfo.IsValid()); _ASSERTE(codeInfo.GetRelOffset() == 0); IJitManager::MethodRegionInfo methodRegionInfo; codeInfo.GetMethodRegionInfo(&methodRegionInfo); _ASSERTE(PCODEToPINSTR(codeStart) == methodRegionInfo.hotStartAddress); #ifdef _DEBUG if (!g_pConfig->SkipGCCoverage(pMD->GetModule()->GetSimpleName())) #endif SetupAndSprinkleBreakpoints(pMD, &codeInfo, methodRegionInfo, FALSE ); } /****************************************************************************/ /* called when a method is first jitted when GCStress level 4 or 8 is on */ void SetupGcCoverage(MethodDesc* pMD, BYTE* methodStartPtr) { #ifdef _DEBUG if (!g_pConfig->ShouldGcCoverageOnMethod(pMD->m_pszDebugMethodName)) { return; } #endif if (pMD->m_GcCover) return; // // In the gcstress=4 case, we can easily piggy-back onto the JITLock because we // have a JIT operation that needs to take that lock already. But in the case of // gcstress=8, we cannot do this because the code already exists, and if gccoverage // were not in the picture, we're happy to race to do the prestub work because all // threads end up with the same answer and don't leak any resources in the process. // // However, with gccoverage, we need to exclude all other threads from mucking with // the code while we fill in the breakpoints and make our shadow copy of the code. // { BaseDomain* pDomain = pMD->GetDomain(); // Enter the global lock which protects the list of all functions being JITd JitListLock::LockHolder pJitLock(pDomain->GetJitLock()); // It is possible that another thread stepped in before we entered the global lock for the first time. if (pMD->m_GcCover) { // We came in to jit but someone beat us so return the jitted method! return; } else { const char *description = "jit lock (gc cover)"; #ifdef _DEBUG description = pMD->m_pszDebugMethodName; #endif ReleaseHolder pEntry(JitListLockEntry::Find(pJitLock, pMD->GetInitialCodeVersion(), description)); // We have an entry now, we can release the global lock pJitLock.Release(); // Take the entry lock { JitListLockEntry::LockHolder pEntryLock(pEntry, FALSE); if (pEntryLock.DeadlockAwareAcquire()) { // we have the lock... } else { // Note that at this point we don't have the lock, but that's OK because the // thread which does have the lock is blocked waiting for us. } if (pMD->m_GcCover) { return; } PCODE codeStart = (PCODE) methodStartPtr; SetupAndSprinkleBreakpointsForJittedMethod(pMD, codeStart ); } } } } #ifdef FEATURE_PREJIT void SetupGcCoverageForNativeMethod(MethodDesc* pMD, PCODE codeStart, IJitManager::MethodRegionInfo& methodRegionInfo ) { EECodeInfo codeInfo(codeStart); _ASSERTE(codeInfo.IsValid()); _ASSERTE(codeInfo.GetRelOffset() == 0); _ASSERTE(PCODEToPINSTR(codeStart) == methodRegionInfo.hotStartAddress); SetupAndSprinkleBreakpoints(pMD, &codeInfo, methodRegionInfo, TRUE ); } void SetupGcCoverageForNativeImage(Module* module) { // Disable IBC logging here because of NGen image is not fully initialized yet. Eager bound // indirection cells are not initialized yet and so IBC logging would crash while attempting to dereference them. IBCLoggingDisabler disableLogging; #if 0 // Debug code LPWSTR wszSetupGcCoverage = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_SetupGcCoverage); if (!wszSetupGcCoverage) { printf("wszSetupGcCoverage is NULL. Will not SetupGcCoverage for any module.\n"); return; } else { if ((wcscmp(W("*"), wszSetupGcCoverage) == 0) || // "*" means will gcstress all modules (wcsstr(module->GetDebugName(), wszSetupGcCoverage) != NULL)) { printf("[%ws] matched %ws\n", wszSetupGcCoverage, module->GetDebugName()); // Fall through } else { printf("[%ws] NOT match %ws\n", wszSetupGcCoverage, module->GetDebugName()); return; } } #endif #ifdef _DEBUG if (g_pConfig->SkipGCCoverage(module->GetSimpleName())) return; #endif MethodIterator mi(module); while (mi.Next()) { PTR_MethodDesc pMD = mi.GetMethodDesc(); PCODE pMethodStart = mi.GetMethodStartAddress(); IJitManager::MethodRegionInfo methodRegionInfo; mi.GetMethodRegionInfo(&methodRegionInfo); SetupGcCoverageForNativeMethod(pMD, pMethodStart, methodRegionInfo); } } #endif #ifdef _TARGET_AMD64_ class GCCoverageRangeEnumerator { private: ICodeManager *m_pCodeManager; GCInfoToken m_pvGCTable; BYTE *m_codeStart; BYTE *m_codeEnd; BYTE *m_curFuncletEnd; BYTE *m_nextFunclet; BYTE* GetNextFunclet () { if (m_nextFunclet == NULL) return m_codeEnd; BYTE *pCurFunclet = (BYTE*)EECodeInfo::findNextFunclet(m_nextFunclet, m_codeEnd - m_nextFunclet, (LPVOID*)&m_curFuncletEnd); m_nextFunclet = (pCurFunclet != NULL) ? m_curFuncletEnd : NULL; if (pCurFunclet == NULL) return m_codeEnd; LOG((LF_JIT, LL_INFO1000, "funclet range %p-%p\n", pCurFunclet, m_curFuncletEnd)); // // workaround - adjust the funclet end address to exclude uninterruptible // code at the end of each funclet. The jit currently puts data like // jump tables in the code portion of the allocation, instead of the // read-only portion. // // TODO: If the entire range is uninterruptible, we should skip the // entire funclet. // unsigned ofsLastInterruptible = m_pCodeManager->FindEndOfLastInterruptibleRegion( static_cast(pCurFunclet - m_codeStart), static_cast(m_curFuncletEnd - m_codeStart), m_pvGCTable); if (ofsLastInterruptible) { m_curFuncletEnd = m_codeStart + ofsLastInterruptible; LOG((LF_JIT, LL_INFO1000, "adjusted end to %p\n", m_curFuncletEnd)); } return pCurFunclet; } public: GCCoverageRangeEnumerator (ICodeManager *pCodeManager, GCInfoToken pvGCTable, BYTE *codeStart, SIZE_T codeSize) { m_pCodeManager = pCodeManager; m_pvGCTable = pvGCTable; m_codeStart = codeStart; m_codeEnd = codeStart + codeSize; m_nextFunclet = codeStart; GetNextFunclet(); } // Checks that the given pointer is inside of a range where gc should be // tested. If not, increments the pointer until it is, and returns the // new pointer. BYTE *EnsureInRange (BYTE *cur) { if (cur >= m_curFuncletEnd) { cur = GetNextFunclet(); } return cur; } BYTE *SkipToNextRange () { return GetNextFunclet(); } }; #endif // _TARGET_AMD64_ // When Sprinking break points, we must make sure that certain calls to // Thread-suspension routines inlined into the managed method are not // converted to GC-Stress points. Otherwise, this will lead to race // conditions with the GC. // // For example, for an inlined PInvoke stub, the JIT generates the following code // // call CORINFO_HELP_INIT_PINVOKE_FRAME // Obtain the thread pointer // // mov byte ptr[rsi + 12], 0 // Switch to preemptive mode [thread->premptiveGcDisabled = 0] // call rax // The actual native call, in preemptive mode // mov byte ptr[rsi + 12], 1 // Switch the thread to Cooperative mode // cmp dword ptr[(reloc 0x7ffd1bb77148)], 0 // if(g_TrapReturningThreads) // je SHORT G_M40565_IG05 // call[CORINFO_HELP_STOP_FOR_GC] // Call JIT_RareDisableHelper() // // // For the SprinkleBreakPoints() routine, the JIT_RareDisableHelper() itself will // look like an ordinary indirect call/safepoint. So, it may rewrite it with // a TRAP to perform GC // // call CORINFO_HELP_INIT_PINVOKE_FRAME // Obtain the thread pointer // // mov byte ptr[rsi + 12], 0 // Switch to preemptive mode [thread->premptiveGcDisabled = 0] // cli // INTERRUPT_INSTR_CALL // mov byte ptr[rsi + 12], 1 // Switch the thread to Cooperative mode // cmp dword ptr[(reloc 0x7ffd1bb77148)], 0 // if(g_TrapReturningThreads) // je SHORT G_M40565_IG05 // cli // INTERRUPT_INSTR_CALL // // // Now, a managed thread (T) can race with the GC as follows: // 1) At the first safepoint, we notice that T is in preemptive mode during the call for GCStress // So, it is put it in cooperative mode for the purpose of GCStress(fPremptiveGcDisabledForGcStress) // 2) We DoGCStress(). Start off background GC in a different thread. // 3) Then the thread T is put back to preemptive mode (because that's where it was). // Thread T continues execution along with the GC thread. // 4) The Jitted code puts thread T to cooperative mode, as part of PInvoke epilog // 5) Now instead of CORINFO_HELP_STOP_FOR_GC(), we hit the GCStress trap and start // another round of GCStress while in Cooperative mode. // 6) Now, thread T can modify the stack (ex: RedirectionFrame setup) while the GC thread is scanning it. // // This problem can be avoided by not inserting traps-for-GC in place of calls to CORINFO_HELP_STOP_FOR_GC() // // How do we identify the calls to CORINFO_HELP_STOP_FOR_GC()? // Since this is a GCStress only requirement, its not worth special identification in the GcInfo // Since CORINFO_HELP_STOP_FOR_GC() calls are realized as indirect calls by the JIT, we cannot identify // them by address at the time of SprinkleBreakpoints(). // So, we actually let the SprinkleBreakpoints() replace the call to CORINFO_HELP_STOP_FOR_GC() with a trap, // and revert it back to the original instruction the first time we hit the trap in OnGcCoverageInterrupt(). // // Similarly, inserting breakpoints can be avoided for JIT_PollGC() and JIT_StressGC(). #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_) extern "C" FCDECL0(VOID, JIT_RareDisableHelper); #else FCDECL0(VOID, JIT_RareDisableHelper); #endif /****************************************************************************/ /* sprinkle interupt instructions that will stop on every GCSafe location regionOffsetAdj - Represents the offset of the current region from the beginning of the method (is 0 for hot region) */ void GCCoverageInfo::SprinkleBreakpoints( BYTE * saveAddr, PCODE pCode, size_t codeSize, size_t regionOffsetAdj, BOOL fZapped) { #if (defined(_TARGET_X86_) || defined(_TARGET_AMD64_)) && USE_DISASSEMBLER BYTE * codeStart = (BYTE *)pCode; memcpy(saveAddr, codeStart, codeSize); // For prejitted code we have to remove the write-protect on the code page if (fZapped) { DWORD oldProtect; ClrVirtualProtect(codeStart, codeSize, PAGE_EXECUTE_READWRITE, &oldProtect); } SLOT cur; BYTE* codeEnd = codeStart + codeSize; EECodeInfo codeInfo((PCODE)codeStart); static ConfigDWORD fGcStressOnDirectCalls; // ConfigDWORD must be a static variable #ifdef _TARGET_AMD64_ GCCoverageRangeEnumerator rangeEnum(codeMan, gcInfoToken, codeStart, codeSize); GcInfoDecoder safePointDecoder(gcInfoToken, (GcInfoDecoderFlags)0, 0); bool fSawPossibleSwitch = false; #endif cur = codeStart; Disassembler disassembler; // When we find a direct call instruction and we are partially-interruptible // we determine the target and place a breakpoint after the call // to simulate the hijack // However, we need to wait until we disassemble the instruction // after the call in order to put the breakpoint or we'll mess up // the disassembly // This variable is non-null if the previous instruction was a direct call, // and we have found it's target MethodDesc MethodDesc* prevDirectCallTargetMD = NULL; /* TODO. Simulating the hijack could cause problems in cases where the return register is not always a valid GC ref on the return offset. That could happen if we got to the return offset via a branch and not via return from the preceding call. However, this has not been an issue so far. Example: mov eax, someval test eax, eax jCC AFTERCALL call MethodWhichReturnsGCobject // return value is not used AFTERCALL: */ while (cur < codeEnd) { _ASSERTE(*cur != INTERRUPT_INSTR && *cur != INTERRUPT_INSTR_CALL); MethodDesc* targetMD = NULL; InstructionType instructionType; size_t len = disassembler.DisassembleInstruction(cur, codeEnd - cur, &instructionType); #ifdef _TARGET_AMD64_ // REVISIT_TODO apparently the jit does not use the entire RUNTIME_FUNCTION range // for code. It uses some for switch tables. Because the first few offsets // may be decodable as instructions, we can't reason about where we should // encounter invalid instructions. However, we do not want to silently skip // large chunks of methods just becuase the JIT started emitting a new // instruction, so only assume it is a switch table if we've seen the switch // code (an indirect unconditional jump) if ((len == 0) && fSawPossibleSwitch) { LOG((LF_JIT, LL_WARNING, "invalid instruction at %p (possibly start of switch table)\n", cur)); cur = rangeEnum.SkipToNextRange(); prevDirectCallTargetMD = NULL; fSawPossibleSwitch = false; continue; } #endif _ASSERTE(len > 0); _ASSERTE(len <= (size_t)(codeEnd-cur)); switch(instructionType) { case InstructionType::Call_IndirectUnconditional: #ifdef _TARGET_AMD64_ if(safePointDecoder.IsSafePoint((UINT32)(cur + len - codeStart + regionOffsetAdj))) #endif { *cur = INTERRUPT_INSTR_CALL; // return value. May need to protect } break; case InstructionType::Call_DirectUnconditional: if(fGcStressOnDirectCalls.val(CLRConfig::INTERNAL_GcStressOnDirectCalls)) { #ifdef _TARGET_AMD64_ if(safePointDecoder.IsSafePoint((UINT32)(cur + len - codeStart + regionOffsetAdj))) #endif { SLOT nextInstr; SLOT target = getTargetOfCall(cur, NULL, &nextInstr); if (target != 0) { // JIT_RareDisableHelper() is expected to be an indirect call. // If we encounter a direct call (in future), skip the call _ASSERTE(target != (SLOT)JIT_RareDisableHelper); targetMD = getTargetMethodDesc((PCODE)target); } } } break; #ifdef _TARGET_AMD64_ case InstructionType::Branch_IndirectUnconditional: fSawPossibleSwitch = true; break; #endif default: // Clang issues an error saying that some enum values are not handled in the switch, that's intended break; } if (prevDirectCallTargetMD != 0) { if (prevDirectCallTargetMD->ReturnsObject(true) != MetaSig::RETNONOBJ) *cur = INTERRUPT_INSTR_PROTECT_RET; else *cur = INTERRUPT_INSTR; } // For fully interruptible code, we end up whacking every instruction // to INTERRUPT_INSTR. For non-fully interruptible code, we end // up only touching the call instructions (specially so that we // can really do the GC on the instruction just after the call). _ASSERTE(FitsIn((cur - codeStart) + regionOffsetAdj)); if (codeMan->IsGcSafe(&codeInfo, static_cast((cur - codeStart) + regionOffsetAdj))) *cur = INTERRUPT_INSTR; #ifdef _TARGET_X86_ // we will whack every instruction in the prolog and epilog to make certain // our unwinding logic works there. if (codeMan->IsInPrologOrEpilog((cur - codeStart) + (DWORD)regionOffsetAdj, gcInfoToken, NULL)) { *cur = INTERRUPT_INSTR; } #endif // If we couldn't find the method desc targetMD is zero prevDirectCallTargetMD = targetMD; cur += len; #ifdef _TARGET_AMD64_ SLOT newCur = rangeEnum.EnsureInRange(cur); if(newCur != cur) { prevDirectCallTargetMD = NULL; cur = newCur; fSawPossibleSwitch = false; } #endif } // If we are not able to place an interrupt at the first instruction, this means that // we are partially interruptible with no prolog. Just don't bother to do the // the epilog checks, since the epilog will be trival (a single return instr) assert(codeSize > 0); if ((regionOffsetAdj==0) && (*codeStart != INTERRUPT_INSTR)) doingEpilogChecks = false; #elif defined(_TARGET_ARM_) || defined(_TARGET_ARM64_) //Save the method code from hotRegion memcpy(saveAddr, (BYTE*)methodRegion.hotStartAddress, methodRegion.hotSize); if (methodRegion.coldSize > 0) { //Save the method code from coldRegion memcpy(saveAddr+methodRegion.hotSize, (BYTE*)methodRegion.coldStartAddress, methodRegion.coldSize); } // For prejitted code we have to remove the write-protect on the code page if (fZapped) { DWORD oldProtect; ClrVirtualProtect((BYTE*)methodRegion.hotStartAddress, methodRegion.hotSize, PAGE_EXECUTE_READWRITE, &oldProtect); if (methodRegion.coldSize > 0) { ClrVirtualProtect((BYTE*)methodRegion.coldStartAddress, methodRegion.coldSize, PAGE_EXECUTE_READWRITE, &oldProtect); } } GcInfoDecoder safePointDecoder(gcInfoToken, (GcInfoDecoderFlags)0, 0); assert(methodRegion.hotSize > 0); #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED safePointDecoder.EnumerateSafePoints(&replaceSafePointInstructionWithGcStressInstr,this); #endif // PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED safePointDecoder.EnumerateInterruptibleRanges(&replaceInterruptibleRangesWithGcStressInstr, this); FlushInstructionCache(GetCurrentProcess(), (BYTE*)methodRegion.hotStartAddress, methodRegion.hotSize); if (methodRegion.coldSize > 0) { FlushInstructionCache(GetCurrentProcess(), (BYTE*)methodRegion.coldStartAddress, methodRegion.coldSize); } #else _ASSERTE(!"not implemented for platform"); #endif // _TARGET_X86_ } #if defined(_TARGET_ARM_) || defined(_TARGET_ARM64_) #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED void replaceSafePointInstructionWithGcStressInstr(UINT32 safePointOffset, LPVOID pGCCover) { PCODE pCode = NULL; IJitManager::MethodRegionInfo *ptr = &(((GCCoverageInfo*)pGCCover)->methodRegion); //Get code address from offset if (safePointOffset < ptr->hotSize) pCode = ptr->hotStartAddress + safePointOffset; else if(safePointOffset - ptr->hotSize < ptr->coldSize) { SIZE_T coldOffset = safePointOffset - ptr->hotSize; pCode = ptr->coldStartAddress + coldOffset; } else { //For some methods( eg MCCTest.MyClass.GetSum2 in test file jit\jit64\mcc\interop\mcc_i07.il) gcinfo points to a safepoint //beyond the length of the method. So commenting the below assert. //_ASSERTE(safePointOffset - ptr->hotSize < ptr->coldSize); return; } SLOT instrPtr = (BYTE*)PCODEToPINSTR(pCode); // For code sequences of the type // BL func1 // BL func2 // Safe point 1 // mov r1 r0 // Safe point 2 // Both the above safe points instruction must be replaced with gcStress instruction. // However as the first safe point is already replaced with gcstress instruction, decoding of the call // instruction will fail when processing for the 2nd safe point. Therefore saved instruction must be used instead of // instrPtr for decoding the call instruction. SLOT savedInstrPtr = ((GCCoverageInfo*)pGCCover)->savedCode + safePointOffset; //Determine if instruction before the safe point is call using immediate (BLX Imm) or call by register (BLX Rm) BOOL instructionIsACallThroughRegister = FALSE; BOOL instructionIsACallThroughImmediate = FALSE; #if defined(_TARGET_ARM_) // call by register instruction is two bytes (BL Reg T1 encoding) WORD instr = *((WORD*)savedInstrPtr - 1); instr = instr & 0xff87; if((instr ^ 0x4780) == 0) // It is call by register instructionIsACallThroughRegister = TRUE; // call using immediate instructions are 4 bytes (BL