diff options
author | Koundinya Veluri <kouvel@users.noreply.github.com> | 2018-01-25 12:01:32 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-25 12:01:32 -0800 |
commit | 209415618ca5d1a5d1d9e39ca78d643d0935534e (patch) | |
tree | d83c946783390afbb52e3e0f968018c38dfd2560 | |
parent | e9985126acb0f1efd7c780faac4e66bc798b73c0 (diff) | |
download | coreclr-209415618ca5d1a5d1d9e39ca78d643d0935534e.tar.gz coreclr-209415618ca5d1a5d1d9e39ca78d643d0935534e.tar.bz2 coreclr-209415618ca5d1a5d1d9e39ca78d643d0935534e.zip |
Enable tiered jitting for R2R methods (#15967)
Enable tiered jitting for R2R methods
- Included R2R methods and generics over value types in CoreLib for tiered jitting. Tier 0 for R2R methods is the precompiled code if available, and tier 1 is selectively scheduled based on call counting.
- Added a delay before starting to count calls for tier 1 promotion. The delay is a short duration after frequent tier 0 jitting stops (current heuristic for identifying startup).
- Startup time and steady-state performance have improved on JitBench. There is a regression shortly following startup due to call counting and tier 1 jitting, for a short duration before steady-state performance stabilizes.
- Added two new config values, one for configuring the call count threshold for promoting to tier 1, and another for specifying the delay from the last tier 0 JIT invocation before starting to count calls
-rw-r--r-- | src/inc/clrconfigvalues.h | 2 | ||||
-rw-r--r-- | src/vm/arm/cgencpu.h | 28 | ||||
-rw-r--r-- | src/vm/arm64/cgencpu.h | 27 | ||||
-rw-r--r-- | src/vm/callcounter.cpp | 11 | ||||
-rw-r--r-- | src/vm/callcounter.h | 2 | ||||
-rw-r--r-- | src/vm/ceemain.cpp | 15 | ||||
-rw-r--r-- | src/vm/codeversion.cpp | 15 | ||||
-rw-r--r-- | src/vm/eeconfig.cpp | 10 | ||||
-rw-r--r-- | src/vm/eeconfig.h | 4 | ||||
-rw-r--r-- | src/vm/i386/stublinkerx86.cpp | 42 | ||||
-rw-r--r-- | src/vm/i386/stublinkerx86.h | 17 | ||||
-rw-r--r-- | src/vm/methodtablebuilder.cpp | 1 | ||||
-rw-r--r-- | src/vm/precode.cpp | 23 | ||||
-rw-r--r-- | src/vm/precode.h | 1 | ||||
-rw-r--r-- | src/vm/prestub.cpp | 24 | ||||
-rw-r--r-- | src/vm/tieredcompilation.cpp | 179 | ||||
-rw-r--r-- | src/vm/tieredcompilation.h | 18 |
17 files changed, 395 insertions, 24 deletions
diff --git a/src/inc/clrconfigvalues.h b/src/inc/clrconfigvalues.h index a0b205a4f5..95179d8ed2 100644 --- a/src/inc/clrconfigvalues.h +++ b/src/inc/clrconfigvalues.h @@ -649,6 +649,8 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_HillClimbing_GainExponent, /// #ifdef FEATURE_TIERED_COMPILATION RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation, W("EXPERIMENTAL_TieredCompilation"), 0, "Enables tiered compilation") +RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountThreshold, W("TieredCompilation_Tier1CallCountThreshold"), 30, "Number of times a method must be called after which it is promoted to tier 1.") +RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs, W("TieredCompilation_Tier1CallCountingDelayMs"), 100, "Delay in milliseconds since process startup or the last tier 0 JIT before call counting begins for tier 1 promotion.") #endif diff --git a/src/vm/arm/cgencpu.h b/src/vm/arm/cgencpu.h index 8da2b2b3cc..3997dbf75b 100644 --- a/src/vm/arm/cgencpu.h +++ b/src/vm/arm/cgencpu.h @@ -29,6 +29,8 @@ class BaseDomain; class ZapNode; struct ArgLocDesc; +extern PCODE GetPreStubEntryPoint(); + #define USE_REDIRECT_FOR_GCSTRESS // CPU-dependent functions @@ -1113,6 +1115,19 @@ struct StubPrecode { return m_pTarget; } + void ResetTargetInterlocked() + { + CONTRACTL + { + THROWS; + GC_TRIGGERS; + } + CONTRACTL_END; + + EnsureWritableExecutablePages(&m_pTarget); + InterlockedExchange((LONG*)&m_pTarget, (LONG)GetPreStubEntryPoint()); + } + BOOL SetTargetInterlocked(TADDR target, TADDR expected) { CONTRACTL @@ -1206,6 +1221,19 @@ struct FixupPrecode { return m_pTarget; } + void ResetTargetInterlocked() + { + CONTRACTL + { + THROWS; + GC_TRIGGERS; + } + CONTRACTL_END; + + EnsureWritableExecutablePages(&m_pTarget); + InterlockedExchange((LONG*)&m_pTarget, (LONG)GetEEFuncEntryPoint(PrecodeFixupThunk)); + } + BOOL SetTargetInterlocked(TADDR target, TADDR expected) { CONTRACTL diff --git a/src/vm/arm64/cgencpu.h b/src/vm/arm64/cgencpu.h index a168cdc162..7e3d62056a 100644 --- a/src/vm/arm64/cgencpu.h +++ b/src/vm/arm64/cgencpu.h @@ -24,6 +24,7 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal); class ComCallMethodDesc; +extern PCODE GetPreStubEntryPoint(); #define COMMETHOD_PREPAD 24 // # extra bytes to allocate in addition to sizeof(ComCallMethodDesc) #ifdef FEATURE_COMINTEROP @@ -572,6 +573,19 @@ struct StubPrecode { return m_pTarget; } + void ResetTargetInterlocked() + { + CONTRACTL + { + THROWS; + GC_TRIGGERS; + } + CONTRACTL_END; + + EnsureWritableExecutablePages(&m_pTarget); + InterlockedExchange64((LONGLONG*)&m_pTarget, (TADDR)GetPreStubEntryPoint()); + } + BOOL SetTargetInterlocked(TADDR target, TADDR expected) { CONTRACTL @@ -685,6 +699,19 @@ struct FixupPrecode { return m_pTarget; } + void ResetTargetInterlocked() + { + CONTRACTL + { + THROWS; + GC_TRIGGERS; + } + CONTRACTL_END; + + EnsureWritableExecutablePages(&m_pTarget); + InterlockedExchange64((LONGLONG*)&m_pTarget, (TADDR)GetEEFuncEntryPoint(PrecodeFixupThunk)); + } + BOOL SetTargetInterlocked(TADDR target, TADDR expected) { CONTRACTL diff --git a/src/vm/callcounter.cpp b/src/vm/callcounter.cpp index 14d9e6e6a4..641b61198a 100644 --- a/src/vm/callcounter.cpp +++ b/src/vm/callcounter.cpp @@ -32,11 +32,18 @@ CallCounter::CallCounter() // Returns TRUE if no future invocations are needed (we reached the count we cared about) // and FALSE otherwise. It is permissible to keep calling even when TRUE was previously // returned and multi-threaded race conditions will surely cause this to occur. -BOOL CallCounter::OnMethodCalled(MethodDesc* pMethodDesc) +void CallCounter::OnMethodCalled( + MethodDesc* pMethodDesc, + TieredCompilationManager *pTieredCompilationManager, + BOOL* shouldStopCountingCallsRef, + BOOL* wasPromotedToTier1Ref) { STANDARD_VM_CONTRACT; _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation()); + _ASSERTE(pTieredCompilationManager != nullptr); + _ASSERTE(shouldStopCountingCallsRef != nullptr); + _ASSERTE(wasPromotedToTier1Ref != nullptr); // PERF: This as a simple to implement, but not so performant, call counter // Currently this is only called until we reach a fixed call count and then @@ -75,7 +82,7 @@ BOOL CallCounter::OnMethodCalled(MethodDesc* pMethodDesc) } } - return GetAppDomain()->GetTieredCompilationManager()->OnMethodCalled(pMethodDesc, callCount); + pTieredCompilationManager->OnMethodCalled(pMethodDesc, callCount, shouldStopCountingCallsRef, wasPromotedToTier1Ref); } #endif // FEATURE_TIERED_COMPILATION diff --git a/src/vm/callcounter.h b/src/vm/callcounter.h index ed98ccb1c8..4e9a5d32ec 100644 --- a/src/vm/callcounter.h +++ b/src/vm/callcounter.h @@ -70,7 +70,7 @@ public: CallCounter(); #endif - BOOL OnMethodCalled(MethodDesc* pMethodDesc); + void OnMethodCalled(MethodDesc* pMethodDesc, TieredCompilationManager *pTieredCompilationManager, BOOL* shouldStopCountingCallsRef, BOOL* wasPromotedToTier1Ref); private: diff --git a/src/vm/ceemain.cpp b/src/vm/ceemain.cpp index 3f6492bbe5..e9b914e0cf 100644 --- a/src/vm/ceemain.cpp +++ b/src/vm/ceemain.cpp @@ -1102,7 +1102,16 @@ void EEStartupHelper(COINITIEE fFlags) hr = S_OK; STRESS_LOG0(LF_STARTUP, LL_ALWAYS, "===================EEStartup Completed==================="); -#if defined(_DEBUG) && !defined(CROSSGEN_COMPILE) +#ifndef CROSSGEN_COMPILE + +#ifdef FEATURE_TIERED_COMPILATION + if (g_pConfig->TieredCompilation()) + { + SystemDomain::System()->DefaultDomain()->GetTieredCompilationManager()->InitiateTier1CountingDelay(); + } +#endif + +#ifdef _DEBUG //if g_fEEStarted was false when we loaded the System Module, we did not run ExpandAll on it. In //this case, make sure we run ExpandAll here. The rationale is that if we Jit before g_fEEStarted @@ -1120,7 +1129,9 @@ void EEStartupHelper(COINITIEE fFlags) // Perform mscorlib consistency check if requested g_Mscorlib.CheckExtended(); -#endif // _DEBUG && !CROSSGEN_COMPILE +#endif // _DEBUG + +#endif // !CROSSGEN_COMPILE ErrExit: ; } diff --git a/src/vm/codeversion.cpp b/src/vm/codeversion.cpp index 10d3013f35..da808e8839 100644 --- a/src/vm/codeversion.cpp +++ b/src/vm/codeversion.cpp @@ -2177,12 +2177,14 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(MethodDesc* pMethodD // attempt to publish the active version still under the lock if (FAILED(hr = PublishNativeCodeVersion(pMethodDesc, activeVersion, fEESuspend))) { - // if we need an EESuspend to publish then start over. We have to leave the lock in order to suspend, - // and when we leave the lock the active version might change again. However now we know that suspend + // If we need an EESuspend to publish then start over. We have to leave the lock in order to suspend, + // and when we leave the lock the active version might change again. However now we know that suspend is + // necessary. if (hr == CORPROF_E_RUNTIME_SUSPEND_REQUIRED) { _ASSERTE(!fEESuspend); fEESuspend = true; + continue; // skip RestartEE() below since SuspendEE() has not been called yet } else { @@ -2215,6 +2217,8 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(MethodDesc* pMethodD HRESULT CodeVersionManager::PublishNativeCodeVersion(MethodDesc* pMethod, NativeCodeVersion nativeCodeVersion, BOOL fEESuspended) { + // TODO: This function needs to make sure it does not change the precode's target if call counting is in progress. Track + // whether call counting is currently being done for the method, and use a lock to ensure the expected precode target. LIMITED_METHOD_CONTRACT; _ASSERTE(LockOwnedByCurrentThread()); _ASSERTE(pMethod->IsVersionable()); @@ -2236,7 +2240,12 @@ HRESULT CodeVersionManager::PublishNativeCodeVersion(MethodDesc* pMethod, Native { EX_TRY { - hr = pPrecode->SetTargetInterlocked(pCode, FALSE) ? S_OK : E_FAIL; + pPrecode->SetTargetInterlocked(pCode, FALSE); + + // SetTargetInterlocked() would return false if it lost the race with another thread. That is fine, this thread + // can continue assuming it was successful, similarly to it successfully updating the target and another thread + // updating the target again shortly afterwards. + hr = S_OK; } EX_CATCH_HRESULT(hr); return hr; diff --git a/src/vm/eeconfig.cpp b/src/vm/eeconfig.cpp index 95a7133bb9..ab83463256 100644 --- a/src/vm/eeconfig.cpp +++ b/src/vm/eeconfig.cpp @@ -376,6 +376,8 @@ HRESULT EEConfig::Init() #if defined(FEATURE_TIERED_COMPILATION) fTieredCompilation = false; + tieredCompilation_tier1CallCountThreshold = 1; + tieredCompilation_tier1CallCountingDelayMs = 0; #endif #if defined(FEATURE_GDBJIT) && defined(_DEBUG) @@ -1239,6 +1241,14 @@ HRESULT EEConfig::sync() #if defined(FEATURE_TIERED_COMPILATION) fTieredCompilation = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation) != 0; + tieredCompilation_tier1CallCountThreshold = + CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Tier1CallCountThreshold); + if (tieredCompilation_tier1CallCountThreshold < 1) + { + tieredCompilation_tier1CallCountThreshold = 1; + } + tieredCompilation_tier1CallCountingDelayMs = + CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs); #endif #if defined(FEATURE_GDBJIT) && defined(_DEBUG) diff --git a/src/vm/eeconfig.h b/src/vm/eeconfig.h index 9bc0073bf3..5c88f42ac7 100644 --- a/src/vm/eeconfig.h +++ b/src/vm/eeconfig.h @@ -285,6 +285,8 @@ public: // Tiered Compilation config #if defined(FEATURE_TIERED_COMPILATION) bool TieredCompilation(void) const {LIMITED_METHOD_CONTRACT; return fTieredCompilation; } + DWORD TieredCompilation_Tier1CallCountThreshold() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_tier1CallCountThreshold; } + DWORD TieredCompilation_Tier1CallCountingDelayMs() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_tier1CallCountingDelayMs; } #endif #if defined(FEATURE_GDBJIT) && defined(_DEBUG) @@ -1107,6 +1109,8 @@ private: //---------------------------------------------------------------- #if defined(FEATURE_TIERED_COMPILATION) bool fTieredCompilation; + DWORD tieredCompilation_tier1CallCountThreshold; + DWORD tieredCompilation_tier1CallCountingDelayMs; #endif #if defined(FEATURE_GDBJIT) && defined(_DEBUG) diff --git a/src/vm/i386/stublinkerx86.cpp b/src/vm/i386/stublinkerx86.cpp index 14b9701a29..a11c6a3212 100644 --- a/src/vm/i386/stublinkerx86.cpp +++ b/src/vm/i386/stublinkerx86.cpp @@ -6416,6 +6416,21 @@ void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) #ifndef DACCESS_COMPILE +void rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, MethodDesc* pMD) +{ + CONTRACTL + { + THROWS; // Creating a JumpStub could throw OutOfMemory + GC_TRIGGERS; + } + CONTRACTL_END; + + INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD); + + _ASSERTE(IS_ALIGNED(pRel32, sizeof(INT32))); + FastInterlockExchange((LONG*)pRel32, (LONG)targetRel32); +} + BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD) { CONTRACTL @@ -6535,6 +6550,33 @@ void FixupPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int } } +void FixupPrecode::ResetTargetInterlocked() +{ + CONTRACTL + { + THROWS; // Creating a JumpStub could throw OutOfMemory + GC_NOTRIGGER; + } + CONTRACTL_END; + + FixupPrecode newValue = *this; + newValue.m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk + newValue.m_type = FixupPrecode::TypePrestub; + + PCODE target = (PCODE)GetEEFuncEntryPoint(PrecodeFixupThunk); + MethodDesc* pMD = (MethodDesc*)GetMethodDesc(); + newValue.m_rel32 = +#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS + pMD->IsLCGMethod() ? + rel32UsingPreallocatedJumpStub(&m_rel32, target, GetDynamicMethodEntryJumpStub()) : +#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS + rel32UsingJumpStub(&m_rel32, target, pMD); + + _ASSERTE(IS_ALIGNED(this, sizeof(INT64))); + EnsureWritableExecutablePages(this, sizeof(INT64)); + FastInterlockExchangeLong((INT64*)this, *(INT64*)&newValue); +} + BOOL FixupPrecode::SetTargetInterlocked(TADDR target, TADDR expected) { CONTRACTL diff --git a/src/vm/i386/stublinkerx86.h b/src/vm/i386/stublinkerx86.h index 50dc3b35a5..229ab0767b 100644 --- a/src/vm/i386/stublinkerx86.h +++ b/src/vm/i386/stublinkerx86.h @@ -10,6 +10,8 @@ struct ArrayOpScript; class MetaSig; +extern PCODE GetPreStubEntryPoint(); + //======================================================================= #define X86_INSTR_CALL_REL32 0xE8 // call rel32 @@ -454,6 +456,7 @@ inline TADDR rel32Decode(/*PTR_INT32*/ TADDR pRel32) return pRel32 + 4 + *PTR_INT32(pRel32); } +void rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, MethodDesc* pMD); BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD); //------------------------------------------------------------------------ @@ -533,6 +536,19 @@ struct StubPrecode { return rel32Decode(PTR_HOST_MEMBER_TADDR(StubPrecode, this, m_rel32)); } + void ResetTargetInterlocked() + { + CONTRACTL + { + THROWS; + GC_TRIGGERS; + } + CONTRACTL_END; + + EnsureWritableExecutablePages(&m_rel32); + return rel32SetInterlocked(&m_rel32, GetPreStubEntryPoint(), (MethodDesc*)GetMethodDesc()); + } + BOOL SetTargetInterlocked(TADDR target, TADDR expected) { CONTRACTL @@ -714,6 +730,7 @@ struct FixupPrecode { return rel32Decode(PTR_HOST_MEMBER_TADDR(FixupPrecode, this, m_rel32)); } + void ResetTargetInterlocked(); BOOL SetTargetInterlocked(TADDR target, TADDR expected); static BOOL IsFixupPrecodeByASM(TADDR addr) diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp index 83116f94f5..43071609cc 100644 --- a/src/vm/methodtablebuilder.cpp +++ b/src/vm/methodtablebuilder.cpp @@ -6983,7 +6983,6 @@ MethodTableBuilder::NeedsNativeCodeSlot(bmtMDMethod * pMDMethod) #ifdef FEATURE_TIERED_COMPILATION // Keep in-sync with MethodDesc::IsEligibleForTieredCompilation() if (g_pConfig->TieredCompilation() && - !GetModule()->HasNativeOrReadyToRunImage() && (pMDMethod->GetMethodType() == METHOD_TYPE_NORMAL || pMDMethod->GetMethodType() == METHOD_TYPE_INSTANTIATED)) { return TRUE; diff --git a/src/vm/precode.cpp b/src/vm/precode.cpp index 8891d5a903..103fc03959 100644 --- a/src/vm/precode.cpp +++ b/src/vm/precode.cpp @@ -425,6 +425,29 @@ void Precode::Init(PrecodeType t, MethodDesc* pMD, LoaderAllocator *pLoaderAlloc _ASSERTE(IsValidType(GetType())); } +void Precode::ResetTargetInterlocked() +{ + WRAPPER_NO_CONTRACT; + + PrecodeType precodeType = GetType(); + switch (precodeType) + { + case PRECODE_STUB: + AsStubPrecode()->ResetTargetInterlocked(); + break; + +#ifdef HAS_FIXUP_PRECODE + case PRECODE_FIXUP: + AsFixupPrecode()->ResetTargetInterlocked(); + break; +#endif // HAS_FIXUP_PRECODE + + default: + UnexpectedPrecodeType("Precode::ResetTargetInterlocked", precodeType); + break; + } +} + BOOL Precode::SetTargetInterlocked(PCODE target, BOOL fOnlyRedirectFromPrestub) { WRAPPER_NO_CONTRACT; diff --git a/src/vm/precode.h b/src/vm/precode.h index 8947192482..1a61253921 100644 --- a/src/vm/precode.h +++ b/src/vm/precode.h @@ -261,6 +261,7 @@ public: void Init(PrecodeType t, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); #ifndef DACCESS_COMPILE + void ResetTargetInterlocked(); BOOL SetTargetInterlocked(PCODE target, BOOL fOnlyRedirectFromPrestub = TRUE); // Reset precode to point to prestub diff --git a/src/vm/prestub.cpp b/src/vm/prestub.cpp index 8934f25d67..cd857134ab 100644 --- a/src/vm/prestub.cpp +++ b/src/vm/prestub.cpp @@ -730,6 +730,13 @@ PCODE MethodDesc::JitCompileCodeLockedEventWrapper(PrepareCodeConfig* pConfig, J } +#ifdef FEATURE_TIERED_COMPILATION + if (g_pConfig->TieredCompilation() && !flags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_TIER1)) + { + GetAppDomain()->GetTieredCompilationManager()->OnTier0JitInvoked(); + } +#endif // FEATURE_TIERED_COMPILATION + #ifdef FEATURE_STACK_SAMPLING StackSampler::RecordJittingInfo(this, flags); #endif // FEATURE_STACK_SAMPLING @@ -1699,11 +1706,14 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT) // for this method only then do we back-patch it. BOOL fCanBackpatchPrestub = TRUE; #ifdef FEATURE_TIERED_COMPILATION + TieredCompilationManager* pTieredCompilationManager = nullptr; BOOL fEligibleForTieredCompilation = IsEligibleForTieredCompilation(); + BOOL fWasPromotedToTier1 = FALSE; if (fEligibleForTieredCompilation) { + pTieredCompilationManager = GetAppDomain()->GetTieredCompilationManager(); CallCounter * pCallCounter = GetCallCounter(); - fCanBackpatchPrestub = pCallCounter->OnMethodCalled(this); + pCallCounter->OnMethodCalled(this, pTieredCompilationManager, &fCanBackpatchPrestub, &fWasPromotedToTier1); } #endif @@ -1715,6 +1725,12 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT) (!fIsPointingToPrestub && IsVersionableWithJumpStamp())) { pCode = GetCodeVersionManager()->PublishVersionableCodeIfNecessary(this, fCanBackpatchPrestub); + + if (pTieredCompilationManager != nullptr && fCanBackpatchPrestub && pCode != NULL && !fWasPromotedToTier1) + { + pTieredCompilationManager->OnMethodCallCountingStoppedWithoutTier1Promotion(this); + } + fIsPointingToPrestub = IsPointingToPrestub(); } #endif @@ -1733,10 +1749,10 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT) if (pCode) { - // The only reason we are still pointing to prestub is because the call counter - // prevented it. We should still short circuit and return the code without + // The only reasons we are still pointing to prestub is because the call counter + // prevented it or this thread lost the race with another thread in updating the + // entry point. We should still short circuit and return the code without // backpatching. - _ASSERTE(!fCanBackpatchPrestub); RETURN pCode; } diff --git a/src/vm/tieredcompilation.cpp b/src/vm/tieredcompilation.cpp index 48c6670fb2..f89f4f2e6b 100644 --- a/src/vm/tieredcompilation.cpp +++ b/src/vm/tieredcompilation.cpp @@ -81,11 +81,16 @@ TieredCompilationManager::TieredCompilationManager() : m_isAppDomainShuttingDown(FALSE), m_countOptimizationThreadsRunning(0), - m_callCountOptimizationThreshhold(30), - m_optimizationQuantumMs(50) + m_callCountOptimizationThreshhold(1), + m_optimizationQuantumMs(50), + m_methodsPendingCountingForTier1(nullptr), + m_tier1CountingDelayTimerHandle(nullptr), + m_wasTier0JitInvokedSinceCountingDelayReset(false) { LIMITED_METHOD_CONTRACT; m_lock.Init(LOCK_TYPE_DEFAULT); + + // On Unix, we can reach here before EEConfig is initialized, so defer config-based initialization to Init() } // Called at AppDomain Init @@ -102,29 +107,115 @@ void TieredCompilationManager::Init(ADID appDomainId) SpinLockHolder holder(&m_lock); m_domainId = appDomainId; + m_callCountOptimizationThreshhold = g_pConfig->TieredCompilation_Tier1CallCountThreshold(); m_asyncWorkDoneEvent.CreateManualEventNoThrow(TRUE); } +void TieredCompilationManager::InitiateTier1CountingDelay() +{ + WRAPPER_NO_CONTRACT; + _ASSERTE(g_pConfig->TieredCompilation()); + _ASSERTE(m_methodsPendingCountingForTier1 == nullptr); + _ASSERTE(m_tier1CountingDelayTimerHandle == nullptr); + + DWORD delayMs = g_pConfig->TieredCompilation_Tier1CallCountingDelayMs(); + if (delayMs == 0) + { + return; + } + + m_tier1CountingDelayLock.Init(LOCK_TYPE_DEFAULT); + + NewHolder<SArray<MethodDesc*>> methodsPendingCountingHolder = new(nothrow) SArray<MethodDesc*>(); + if (methodsPendingCountingHolder == nullptr) + { + return; + } + + NewHolder<ThreadpoolMgr::TimerInfoContext> timerContextHolder = new(nothrow) ThreadpoolMgr::TimerInfoContext(); + if (timerContextHolder == nullptr) + { + return; + } + + timerContextHolder->AppDomainId = m_domainId; + timerContextHolder->TimerId = 0; + if (!ThreadpoolMgr::CreateTimerQueueTimer( + &m_tier1CountingDelayTimerHandle, + Tier1DelayTimerCallback, + timerContextHolder, + delayMs, + (DWORD)-1 /* Period, non-repeating */, + 0 /* flags */)) + { + _ASSERTE(m_tier1CountingDelayTimerHandle == nullptr); + return; + } + + m_methodsPendingCountingForTier1 = methodsPendingCountingHolder.Extract(); + timerContextHolder.SuppressRelease(); // the timer context is automatically deleted by the timer infrastructure +} + +void TieredCompilationManager::OnTier0JitInvoked() +{ + LIMITED_METHOD_CONTRACT; + + if (m_methodsPendingCountingForTier1 != nullptr) + { + m_wasTier0JitInvokedSinceCountingDelayReset = true; + } +} + // Called each time code in this AppDomain has been run. This is our sole entrypoint to begin // tiered compilation for now. Returns TRUE if no more notifications are necessary, but // more notifications may come anyways. // // currentCallCount is pre-incremented, that is to say the value is 1 on first call for a given // method. -BOOL TieredCompilationManager::OnMethodCalled(MethodDesc* pMethodDesc, DWORD currentCallCount) +void TieredCompilationManager::OnMethodCalled( + MethodDesc* pMethodDesc, + DWORD currentCallCount, + BOOL* shouldStopCountingCallsRef, + BOOL* wasPromotedToTier1Ref) { - STANDARD_VM_CONTRACT; + WRAPPER_NO_CONTRACT; + _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation()); + _ASSERTE(shouldStopCountingCallsRef != nullptr); + _ASSERTE(wasPromotedToTier1Ref != nullptr); + + *shouldStopCountingCallsRef = + m_methodsPendingCountingForTier1 != nullptr || currentCallCount >= m_callCountOptimizationThreshhold; + *wasPromotedToTier1Ref = currentCallCount >= m_callCountOptimizationThreshhold; + + if (currentCallCount == m_callCountOptimizationThreshhold) + { + AsyncPromoteMethodToTier1(pMethodDesc); + } +} + +void TieredCompilationManager::OnMethodCallCountingStoppedWithoutTier1Promotion(MethodDesc* pMethodDesc) +{ + WRAPPER_NO_CONTRACT; + _ASSERTE(pMethodDesc != nullptr); + _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation()); - if (currentCallCount < m_callCountOptimizationThreshhold) + if (g_pConfig->TieredCompilation_Tier1CallCountingDelayMs() == 0) { - return FALSE; // continue notifications for this method + return; } - else if (currentCallCount > m_callCountOptimizationThreshhold) + { - return TRUE; // stop notifications for this method + SpinLockHolder holder(&m_tier1CountingDelayLock); + if (m_methodsPendingCountingForTier1 != nullptr) + { + // Record the method to resume counting later (see Tier1DelayTimerCallback) + m_methodsPendingCountingForTier1->Append(pMethodDesc); + return; + } } - AsyncPromoteMethodToTier1(pMethodDesc); - return TRUE; + + // Rare race condition with the timer callback + ResumeCountingCalls(pMethodDesc); } void TieredCompilationManager::AsyncPromoteMethodToTier1(MethodDesc* pMethodDesc) @@ -258,6 +349,74 @@ void TieredCompilationManager::Shutdown(BOOL fBlockUntilAsyncWorkIsComplete) } } +VOID WINAPI TieredCompilationManager::Tier1DelayTimerCallback(PVOID parameter, BOOLEAN timerFired) +{ + WRAPPER_NO_CONTRACT; + _ASSERTE(timerFired); + + GCX_COOP(); + ThreadpoolMgr::TimerInfoContext* timerContext = (ThreadpoolMgr::TimerInfoContext*)parameter; + ManagedThreadBase::ThreadPool(timerContext->AppDomainId, Tier1DelayTimerCallbackInAppDomain, nullptr); +} + +void TieredCompilationManager::Tier1DelayTimerCallbackInAppDomain(LPVOID parameter) +{ + WRAPPER_NO_CONTRACT; + GetAppDomain()->GetTieredCompilationManager()->Tier1DelayTimerCallbackWorker(); +} + +void TieredCompilationManager::Tier1DelayTimerCallbackWorker() +{ + WRAPPER_NO_CONTRACT; + + // Reschedule the timer if a tier 0 JIT has been invoked since the timer was started to further delay call counting + if (m_wasTier0JitInvokedSinceCountingDelayReset) + { + m_wasTier0JitInvokedSinceCountingDelayReset = false; + + _ASSERTE(m_tier1CountingDelayTimerHandle != nullptr); + if (ThreadpoolMgr::ChangeTimerQueueTimer( + m_tier1CountingDelayTimerHandle, + g_pConfig->TieredCompilation_Tier1CallCountingDelayMs(), + (DWORD)-1 /* Period, non-repeating */)) + { + return; + } + } + + // Exchange the list of methods pending counting for tier 1 + SArray<MethodDesc*>* methodsPendingCountingForTier1; + { + SpinLockHolder holder(&m_tier1CountingDelayLock); + methodsPendingCountingForTier1 = m_methodsPendingCountingForTier1; + _ASSERTE(methodsPendingCountingForTier1 != nullptr); + m_methodsPendingCountingForTier1 = nullptr; + } + + // Install call counters + MethodDesc** methods = methodsPendingCountingForTier1->GetElements(); + COUNT_T methodCount = methodsPendingCountingForTier1->GetCount(); + for (COUNT_T i = 0; i < methodCount; ++i) + { + ResumeCountingCalls(methods[i]); + } + delete methodsPendingCountingForTier1; + + // Delete the timer + _ASSERTE(m_tier1CountingDelayTimerHandle != nullptr); + ThreadpoolMgr::DeleteTimerQueueTimer(m_tier1CountingDelayTimerHandle, nullptr); + m_tier1CountingDelayTimerHandle = nullptr; +} + +void TieredCompilationManager::ResumeCountingCalls(MethodDesc* pMethodDesc) +{ + WRAPPER_NO_CONTRACT; + _ASSERTE(pMethodDesc != nullptr); + _ASSERTE(pMethodDesc->IsVersionableWithPrecode()); + + pMethodDesc->GetPrecode()->ResetTargetInterlocked(); +} + // This is the initial entrypoint for the background thread, called by // the threadpool. DWORD WINAPI TieredCompilationManager::StaticOptimizeMethodsCallback(void *args) diff --git a/src/vm/tieredcompilation.h b/src/vm/tieredcompilation.h index 9f6187244a..95dbb741fc 100644 --- a/src/vm/tieredcompilation.h +++ b/src/vm/tieredcompilation.h @@ -25,7 +25,12 @@ public: #endif void Init(ADID appDomainId); - BOOL OnMethodCalled(MethodDesc* pMethodDesc, DWORD currentCallCount); + + void InitiateTier1CountingDelay(); + void OnTier0JitInvoked(); + + void OnMethodCalled(MethodDesc* pMethodDesc, DWORD currentCallCount, BOOL* shouldStopCountingCallsRef, BOOL* wasPromotedToTier1Ref); + void OnMethodCallCountingStoppedWithoutTier1Promotion(MethodDesc* pMethodDesc); void AsyncPromoteMethodToTier1(MethodDesc* pMethodDesc); static void ShutdownAllDomains(); void Shutdown(BOOL fBlockUntilAsyncWorkIsComplete); @@ -33,6 +38,11 @@ public: private: + static VOID WINAPI Tier1DelayTimerCallback(PVOID parameter, BOOLEAN timerFired); + static void Tier1DelayTimerCallbackInAppDomain(LPVOID parameter); + void Tier1DelayTimerCallbackWorker(); + static void ResumeCountingCalls(MethodDesc* pMethodDesc); + static DWORD StaticOptimizeMethodsCallback(void* args); void OptimizeMethodsCallback(); void OptimizeMethod(NativeCodeVersion nativeCodeVersion); @@ -50,6 +60,12 @@ private: DWORD m_countOptimizationThreadsRunning; DWORD m_callCountOptimizationThreshhold; DWORD m_optimizationQuantumMs; + + SpinLock m_tier1CountingDelayLock; + SArray<MethodDesc*>* m_methodsPendingCountingForTier1; + HANDLE m_tier1CountingDelayTimerHandle; + bool m_wasTier0JitInvokedSinceCountingDelayReset; + CLREvent m_asyncWorkDoneEvent; }; |