summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKoundinya Veluri <kouvel@users.noreply.github.com>2018-01-25 12:01:32 -0800
committerGitHub <noreply@github.com>2018-01-25 12:01:32 -0800
commit209415618ca5d1a5d1d9e39ca78d643d0935534e (patch)
treed83c946783390afbb52e3e0f968018c38dfd2560
parente9985126acb0f1efd7c780faac4e66bc798b73c0 (diff)
downloadcoreclr-209415618ca5d1a5d1d9e39ca78d643d0935534e.tar.gz
coreclr-209415618ca5d1a5d1d9e39ca78d643d0935534e.tar.bz2
coreclr-209415618ca5d1a5d1d9e39ca78d643d0935534e.zip
Enable tiered jitting for R2R methods (#15967)
Enable tiered jitting for R2R methods - Included R2R methods and generics over value types in CoreLib for tiered jitting. Tier 0 for R2R methods is the precompiled code if available, and tier 1 is selectively scheduled based on call counting. - Added a delay before starting to count calls for tier 1 promotion. The delay is a short duration after frequent tier 0 jitting stops (current heuristic for identifying startup). - Startup time and steady-state performance have improved on JitBench. There is a regression shortly following startup due to call counting and tier 1 jitting, for a short duration before steady-state performance stabilizes. - Added two new config values, one for configuring the call count threshold for promoting to tier 1, and another for specifying the delay from the last tier 0 JIT invocation before starting to count calls
-rw-r--r--src/inc/clrconfigvalues.h2
-rw-r--r--src/vm/arm/cgencpu.h28
-rw-r--r--src/vm/arm64/cgencpu.h27
-rw-r--r--src/vm/callcounter.cpp11
-rw-r--r--src/vm/callcounter.h2
-rw-r--r--src/vm/ceemain.cpp15
-rw-r--r--src/vm/codeversion.cpp15
-rw-r--r--src/vm/eeconfig.cpp10
-rw-r--r--src/vm/eeconfig.h4
-rw-r--r--src/vm/i386/stublinkerx86.cpp42
-rw-r--r--src/vm/i386/stublinkerx86.h17
-rw-r--r--src/vm/methodtablebuilder.cpp1
-rw-r--r--src/vm/precode.cpp23
-rw-r--r--src/vm/precode.h1
-rw-r--r--src/vm/prestub.cpp24
-rw-r--r--src/vm/tieredcompilation.cpp179
-rw-r--r--src/vm/tieredcompilation.h18
17 files changed, 395 insertions, 24 deletions
diff --git a/src/inc/clrconfigvalues.h b/src/inc/clrconfigvalues.h
index a0b205a4f5..95179d8ed2 100644
--- a/src/inc/clrconfigvalues.h
+++ b/src/inc/clrconfigvalues.h
@@ -649,6 +649,8 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_HillClimbing_GainExponent,
///
#ifdef FEATURE_TIERED_COMPILATION
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation, W("EXPERIMENTAL_TieredCompilation"), 0, "Enables tiered compilation")
+RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountThreshold, W("TieredCompilation_Tier1CallCountThreshold"), 30, "Number of times a method must be called after which it is promoted to tier 1.")
+RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs, W("TieredCompilation_Tier1CallCountingDelayMs"), 100, "Delay in milliseconds since process startup or the last tier 0 JIT before call counting begins for tier 1 promotion.")
#endif
diff --git a/src/vm/arm/cgencpu.h b/src/vm/arm/cgencpu.h
index 8da2b2b3cc..3997dbf75b 100644
--- a/src/vm/arm/cgencpu.h
+++ b/src/vm/arm/cgencpu.h
@@ -29,6 +29,8 @@ class BaseDomain;
class ZapNode;
struct ArgLocDesc;
+extern PCODE GetPreStubEntryPoint();
+
#define USE_REDIRECT_FOR_GCSTRESS
// CPU-dependent functions
@@ -1113,6 +1115,19 @@ struct StubPrecode {
return m_pTarget;
}
+ void ResetTargetInterlocked()
+ {
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ EnsureWritableExecutablePages(&m_pTarget);
+ InterlockedExchange((LONG*)&m_pTarget, (LONG)GetPreStubEntryPoint());
+ }
+
BOOL SetTargetInterlocked(TADDR target, TADDR expected)
{
CONTRACTL
@@ -1206,6 +1221,19 @@ struct FixupPrecode {
return m_pTarget;
}
+ void ResetTargetInterlocked()
+ {
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ EnsureWritableExecutablePages(&m_pTarget);
+ InterlockedExchange((LONG*)&m_pTarget, (LONG)GetEEFuncEntryPoint(PrecodeFixupThunk));
+ }
+
BOOL SetTargetInterlocked(TADDR target, TADDR expected)
{
CONTRACTL
diff --git a/src/vm/arm64/cgencpu.h b/src/vm/arm64/cgencpu.h
index a168cdc162..7e3d62056a 100644
--- a/src/vm/arm64/cgencpu.h
+++ b/src/vm/arm64/cgencpu.h
@@ -24,6 +24,7 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal);
class ComCallMethodDesc;
+extern PCODE GetPreStubEntryPoint();
#define COMMETHOD_PREPAD 24 // # extra bytes to allocate in addition to sizeof(ComCallMethodDesc)
#ifdef FEATURE_COMINTEROP
@@ -572,6 +573,19 @@ struct StubPrecode {
return m_pTarget;
}
+ void ResetTargetInterlocked()
+ {
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ EnsureWritableExecutablePages(&m_pTarget);
+ InterlockedExchange64((LONGLONG*)&m_pTarget, (TADDR)GetPreStubEntryPoint());
+ }
+
BOOL SetTargetInterlocked(TADDR target, TADDR expected)
{
CONTRACTL
@@ -685,6 +699,19 @@ struct FixupPrecode {
return m_pTarget;
}
+ void ResetTargetInterlocked()
+ {
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ EnsureWritableExecutablePages(&m_pTarget);
+ InterlockedExchange64((LONGLONG*)&m_pTarget, (TADDR)GetEEFuncEntryPoint(PrecodeFixupThunk));
+ }
+
BOOL SetTargetInterlocked(TADDR target, TADDR expected)
{
CONTRACTL
diff --git a/src/vm/callcounter.cpp b/src/vm/callcounter.cpp
index 14d9e6e6a4..641b61198a 100644
--- a/src/vm/callcounter.cpp
+++ b/src/vm/callcounter.cpp
@@ -32,11 +32,18 @@ CallCounter::CallCounter()
// Returns TRUE if no future invocations are needed (we reached the count we cared about)
// and FALSE otherwise. It is permissible to keep calling even when TRUE was previously
// returned and multi-threaded race conditions will surely cause this to occur.
-BOOL CallCounter::OnMethodCalled(MethodDesc* pMethodDesc)
+void CallCounter::OnMethodCalled(
+ MethodDesc* pMethodDesc,
+ TieredCompilationManager *pTieredCompilationManager,
+ BOOL* shouldStopCountingCallsRef,
+ BOOL* wasPromotedToTier1Ref)
{
STANDARD_VM_CONTRACT;
_ASSERTE(pMethodDesc->IsEligibleForTieredCompilation());
+ _ASSERTE(pTieredCompilationManager != nullptr);
+ _ASSERTE(shouldStopCountingCallsRef != nullptr);
+ _ASSERTE(wasPromotedToTier1Ref != nullptr);
// PERF: This as a simple to implement, but not so performant, call counter
// Currently this is only called until we reach a fixed call count and then
@@ -75,7 +82,7 @@ BOOL CallCounter::OnMethodCalled(MethodDesc* pMethodDesc)
}
}
- return GetAppDomain()->GetTieredCompilationManager()->OnMethodCalled(pMethodDesc, callCount);
+ pTieredCompilationManager->OnMethodCalled(pMethodDesc, callCount, shouldStopCountingCallsRef, wasPromotedToTier1Ref);
}
#endif // FEATURE_TIERED_COMPILATION
diff --git a/src/vm/callcounter.h b/src/vm/callcounter.h
index ed98ccb1c8..4e9a5d32ec 100644
--- a/src/vm/callcounter.h
+++ b/src/vm/callcounter.h
@@ -70,7 +70,7 @@ public:
CallCounter();
#endif
- BOOL OnMethodCalled(MethodDesc* pMethodDesc);
+ void OnMethodCalled(MethodDesc* pMethodDesc, TieredCompilationManager *pTieredCompilationManager, BOOL* shouldStopCountingCallsRef, BOOL* wasPromotedToTier1Ref);
private:
diff --git a/src/vm/ceemain.cpp b/src/vm/ceemain.cpp
index 3f6492bbe5..e9b914e0cf 100644
--- a/src/vm/ceemain.cpp
+++ b/src/vm/ceemain.cpp
@@ -1102,7 +1102,16 @@ void EEStartupHelper(COINITIEE fFlags)
hr = S_OK;
STRESS_LOG0(LF_STARTUP, LL_ALWAYS, "===================EEStartup Completed===================");
-#if defined(_DEBUG) && !defined(CROSSGEN_COMPILE)
+#ifndef CROSSGEN_COMPILE
+
+#ifdef FEATURE_TIERED_COMPILATION
+ if (g_pConfig->TieredCompilation())
+ {
+ SystemDomain::System()->DefaultDomain()->GetTieredCompilationManager()->InitiateTier1CountingDelay();
+ }
+#endif
+
+#ifdef _DEBUG
//if g_fEEStarted was false when we loaded the System Module, we did not run ExpandAll on it. In
//this case, make sure we run ExpandAll here. The rationale is that if we Jit before g_fEEStarted
@@ -1120,7 +1129,9 @@ void EEStartupHelper(COINITIEE fFlags)
// Perform mscorlib consistency check if requested
g_Mscorlib.CheckExtended();
-#endif // _DEBUG && !CROSSGEN_COMPILE
+#endif // _DEBUG
+
+#endif // !CROSSGEN_COMPILE
ErrExit: ;
}
diff --git a/src/vm/codeversion.cpp b/src/vm/codeversion.cpp
index 10d3013f35..da808e8839 100644
--- a/src/vm/codeversion.cpp
+++ b/src/vm/codeversion.cpp
@@ -2177,12 +2177,14 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(MethodDesc* pMethodD
// attempt to publish the active version still under the lock
if (FAILED(hr = PublishNativeCodeVersion(pMethodDesc, activeVersion, fEESuspend)))
{
- // if we need an EESuspend to publish then start over. We have to leave the lock in order to suspend,
- // and when we leave the lock the active version might change again. However now we know that suspend
+ // If we need an EESuspend to publish then start over. We have to leave the lock in order to suspend,
+ // and when we leave the lock the active version might change again. However now we know that suspend is
+ // necessary.
if (hr == CORPROF_E_RUNTIME_SUSPEND_REQUIRED)
{
_ASSERTE(!fEESuspend);
fEESuspend = true;
+ continue; // skip RestartEE() below since SuspendEE() has not been called yet
}
else
{
@@ -2215,6 +2217,8 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(MethodDesc* pMethodD
HRESULT CodeVersionManager::PublishNativeCodeVersion(MethodDesc* pMethod, NativeCodeVersion nativeCodeVersion, BOOL fEESuspended)
{
+ // TODO: This function needs to make sure it does not change the precode's target if call counting is in progress. Track
+ // whether call counting is currently being done for the method, and use a lock to ensure the expected precode target.
LIMITED_METHOD_CONTRACT;
_ASSERTE(LockOwnedByCurrentThread());
_ASSERTE(pMethod->IsVersionable());
@@ -2236,7 +2240,12 @@ HRESULT CodeVersionManager::PublishNativeCodeVersion(MethodDesc* pMethod, Native
{
EX_TRY
{
- hr = pPrecode->SetTargetInterlocked(pCode, FALSE) ? S_OK : E_FAIL;
+ pPrecode->SetTargetInterlocked(pCode, FALSE);
+
+ // SetTargetInterlocked() would return false if it lost the race with another thread. That is fine, this thread
+ // can continue assuming it was successful, similarly to it successfully updating the target and another thread
+ // updating the target again shortly afterwards.
+ hr = S_OK;
}
EX_CATCH_HRESULT(hr);
return hr;
diff --git a/src/vm/eeconfig.cpp b/src/vm/eeconfig.cpp
index 95a7133bb9..ab83463256 100644
--- a/src/vm/eeconfig.cpp
+++ b/src/vm/eeconfig.cpp
@@ -376,6 +376,8 @@ HRESULT EEConfig::Init()
#if defined(FEATURE_TIERED_COMPILATION)
fTieredCompilation = false;
+ tieredCompilation_tier1CallCountThreshold = 1;
+ tieredCompilation_tier1CallCountingDelayMs = 0;
#endif
#if defined(FEATURE_GDBJIT) && defined(_DEBUG)
@@ -1239,6 +1241,14 @@ HRESULT EEConfig::sync()
#if defined(FEATURE_TIERED_COMPILATION)
fTieredCompilation = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation) != 0;
+ tieredCompilation_tier1CallCountThreshold =
+ CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Tier1CallCountThreshold);
+ if (tieredCompilation_tier1CallCountThreshold < 1)
+ {
+ tieredCompilation_tier1CallCountThreshold = 1;
+ }
+ tieredCompilation_tier1CallCountingDelayMs =
+ CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs);
#endif
#if defined(FEATURE_GDBJIT) && defined(_DEBUG)
diff --git a/src/vm/eeconfig.h b/src/vm/eeconfig.h
index 9bc0073bf3..5c88f42ac7 100644
--- a/src/vm/eeconfig.h
+++ b/src/vm/eeconfig.h
@@ -285,6 +285,8 @@ public:
// Tiered Compilation config
#if defined(FEATURE_TIERED_COMPILATION)
bool TieredCompilation(void) const {LIMITED_METHOD_CONTRACT; return fTieredCompilation; }
+ DWORD TieredCompilation_Tier1CallCountThreshold() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_tier1CallCountThreshold; }
+ DWORD TieredCompilation_Tier1CallCountingDelayMs() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_tier1CallCountingDelayMs; }
#endif
#if defined(FEATURE_GDBJIT) && defined(_DEBUG)
@@ -1107,6 +1109,8 @@ private: //----------------------------------------------------------------
#if defined(FEATURE_TIERED_COMPILATION)
bool fTieredCompilation;
+ DWORD tieredCompilation_tier1CallCountThreshold;
+ DWORD tieredCompilation_tier1CallCountingDelayMs;
#endif
#if defined(FEATURE_GDBJIT) && defined(_DEBUG)
diff --git a/src/vm/i386/stublinkerx86.cpp b/src/vm/i386/stublinkerx86.cpp
index 14b9701a29..a11c6a3212 100644
--- a/src/vm/i386/stublinkerx86.cpp
+++ b/src/vm/i386/stublinkerx86.cpp
@@ -6416,6 +6416,21 @@ void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
#ifndef DACCESS_COMPILE
+void rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, MethodDesc* pMD)
+{
+ CONTRACTL
+ {
+ THROWS; // Creating a JumpStub could throw OutOfMemory
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD);
+
+ _ASSERTE(IS_ALIGNED(pRel32, sizeof(INT32)));
+ FastInterlockExchange((LONG*)pRel32, (LONG)targetRel32);
+}
+
BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD)
{
CONTRACTL
@@ -6535,6 +6550,33 @@ void FixupPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int
}
}
+void FixupPrecode::ResetTargetInterlocked()
+{
+ CONTRACTL
+ {
+ THROWS; // Creating a JumpStub could throw OutOfMemory
+ GC_NOTRIGGER;
+ }
+ CONTRACTL_END;
+
+ FixupPrecode newValue = *this;
+ newValue.m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
+ newValue.m_type = FixupPrecode::TypePrestub;
+
+ PCODE target = (PCODE)GetEEFuncEntryPoint(PrecodeFixupThunk);
+ MethodDesc* pMD = (MethodDesc*)GetMethodDesc();
+ newValue.m_rel32 =
+#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
+ pMD->IsLCGMethod() ?
+ rel32UsingPreallocatedJumpStub(&m_rel32, target, GetDynamicMethodEntryJumpStub()) :
+#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
+ rel32UsingJumpStub(&m_rel32, target, pMD);
+
+ _ASSERTE(IS_ALIGNED(this, sizeof(INT64)));
+ EnsureWritableExecutablePages(this, sizeof(INT64));
+ FastInterlockExchangeLong((INT64*)this, *(INT64*)&newValue);
+}
+
BOOL FixupPrecode::SetTargetInterlocked(TADDR target, TADDR expected)
{
CONTRACTL
diff --git a/src/vm/i386/stublinkerx86.h b/src/vm/i386/stublinkerx86.h
index 50dc3b35a5..229ab0767b 100644
--- a/src/vm/i386/stublinkerx86.h
+++ b/src/vm/i386/stublinkerx86.h
@@ -10,6 +10,8 @@
struct ArrayOpScript;
class MetaSig;
+extern PCODE GetPreStubEntryPoint();
+
//=======================================================================
#define X86_INSTR_CALL_REL32 0xE8 // call rel32
@@ -454,6 +456,7 @@ inline TADDR rel32Decode(/*PTR_INT32*/ TADDR pRel32)
return pRel32 + 4 + *PTR_INT32(pRel32);
}
+void rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, MethodDesc* pMD);
BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD);
//------------------------------------------------------------------------
@@ -533,6 +536,19 @@ struct StubPrecode {
return rel32Decode(PTR_HOST_MEMBER_TADDR(StubPrecode, this, m_rel32));
}
+ void ResetTargetInterlocked()
+ {
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ EnsureWritableExecutablePages(&m_rel32);
+ return rel32SetInterlocked(&m_rel32, GetPreStubEntryPoint(), (MethodDesc*)GetMethodDesc());
+ }
+
BOOL SetTargetInterlocked(TADDR target, TADDR expected)
{
CONTRACTL
@@ -714,6 +730,7 @@ struct FixupPrecode {
return rel32Decode(PTR_HOST_MEMBER_TADDR(FixupPrecode, this, m_rel32));
}
+ void ResetTargetInterlocked();
BOOL SetTargetInterlocked(TADDR target, TADDR expected);
static BOOL IsFixupPrecodeByASM(TADDR addr)
diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp
index 83116f94f5..43071609cc 100644
--- a/src/vm/methodtablebuilder.cpp
+++ b/src/vm/methodtablebuilder.cpp
@@ -6983,7 +6983,6 @@ MethodTableBuilder::NeedsNativeCodeSlot(bmtMDMethod * pMDMethod)
#ifdef FEATURE_TIERED_COMPILATION
// Keep in-sync with MethodDesc::IsEligibleForTieredCompilation()
if (g_pConfig->TieredCompilation() &&
- !GetModule()->HasNativeOrReadyToRunImage() &&
(pMDMethod->GetMethodType() == METHOD_TYPE_NORMAL || pMDMethod->GetMethodType() == METHOD_TYPE_INSTANTIATED))
{
return TRUE;
diff --git a/src/vm/precode.cpp b/src/vm/precode.cpp
index 8891d5a903..103fc03959 100644
--- a/src/vm/precode.cpp
+++ b/src/vm/precode.cpp
@@ -425,6 +425,29 @@ void Precode::Init(PrecodeType t, MethodDesc* pMD, LoaderAllocator *pLoaderAlloc
_ASSERTE(IsValidType(GetType()));
}
+void Precode::ResetTargetInterlocked()
+{
+ WRAPPER_NO_CONTRACT;
+
+ PrecodeType precodeType = GetType();
+ switch (precodeType)
+ {
+ case PRECODE_STUB:
+ AsStubPrecode()->ResetTargetInterlocked();
+ break;
+
+#ifdef HAS_FIXUP_PRECODE
+ case PRECODE_FIXUP:
+ AsFixupPrecode()->ResetTargetInterlocked();
+ break;
+#endif // HAS_FIXUP_PRECODE
+
+ default:
+ UnexpectedPrecodeType("Precode::ResetTargetInterlocked", precodeType);
+ break;
+ }
+}
+
BOOL Precode::SetTargetInterlocked(PCODE target, BOOL fOnlyRedirectFromPrestub)
{
WRAPPER_NO_CONTRACT;
diff --git a/src/vm/precode.h b/src/vm/precode.h
index 8947192482..1a61253921 100644
--- a/src/vm/precode.h
+++ b/src/vm/precode.h
@@ -261,6 +261,7 @@ public:
void Init(PrecodeType t, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator);
#ifndef DACCESS_COMPILE
+ void ResetTargetInterlocked();
BOOL SetTargetInterlocked(PCODE target, BOOL fOnlyRedirectFromPrestub = TRUE);
// Reset precode to point to prestub
diff --git a/src/vm/prestub.cpp b/src/vm/prestub.cpp
index 8934f25d67..cd857134ab 100644
--- a/src/vm/prestub.cpp
+++ b/src/vm/prestub.cpp
@@ -730,6 +730,13 @@ PCODE MethodDesc::JitCompileCodeLockedEventWrapper(PrepareCodeConfig* pConfig, J
}
+#ifdef FEATURE_TIERED_COMPILATION
+ if (g_pConfig->TieredCompilation() && !flags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_TIER1))
+ {
+ GetAppDomain()->GetTieredCompilationManager()->OnTier0JitInvoked();
+ }
+#endif // FEATURE_TIERED_COMPILATION
+
#ifdef FEATURE_STACK_SAMPLING
StackSampler::RecordJittingInfo(this, flags);
#endif // FEATURE_STACK_SAMPLING
@@ -1699,11 +1706,14 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT)
// for this method only then do we back-patch it.
BOOL fCanBackpatchPrestub = TRUE;
#ifdef FEATURE_TIERED_COMPILATION
+ TieredCompilationManager* pTieredCompilationManager = nullptr;
BOOL fEligibleForTieredCompilation = IsEligibleForTieredCompilation();
+ BOOL fWasPromotedToTier1 = FALSE;
if (fEligibleForTieredCompilation)
{
+ pTieredCompilationManager = GetAppDomain()->GetTieredCompilationManager();
CallCounter * pCallCounter = GetCallCounter();
- fCanBackpatchPrestub = pCallCounter->OnMethodCalled(this);
+ pCallCounter->OnMethodCalled(this, pTieredCompilationManager, &fCanBackpatchPrestub, &fWasPromotedToTier1);
}
#endif
@@ -1715,6 +1725,12 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT)
(!fIsPointingToPrestub && IsVersionableWithJumpStamp()))
{
pCode = GetCodeVersionManager()->PublishVersionableCodeIfNecessary(this, fCanBackpatchPrestub);
+
+ if (pTieredCompilationManager != nullptr && fCanBackpatchPrestub && pCode != NULL && !fWasPromotedToTier1)
+ {
+ pTieredCompilationManager->OnMethodCallCountingStoppedWithoutTier1Promotion(this);
+ }
+
fIsPointingToPrestub = IsPointingToPrestub();
}
#endif
@@ -1733,10 +1749,10 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT)
if (pCode)
{
- // The only reason we are still pointing to prestub is because the call counter
- // prevented it. We should still short circuit and return the code without
+ // The only reasons we are still pointing to prestub is because the call counter
+ // prevented it or this thread lost the race with another thread in updating the
+ // entry point. We should still short circuit and return the code without
// backpatching.
- _ASSERTE(!fCanBackpatchPrestub);
RETURN pCode;
}
diff --git a/src/vm/tieredcompilation.cpp b/src/vm/tieredcompilation.cpp
index 48c6670fb2..f89f4f2e6b 100644
--- a/src/vm/tieredcompilation.cpp
+++ b/src/vm/tieredcompilation.cpp
@@ -81,11 +81,16 @@
TieredCompilationManager::TieredCompilationManager() :
m_isAppDomainShuttingDown(FALSE),
m_countOptimizationThreadsRunning(0),
- m_callCountOptimizationThreshhold(30),
- m_optimizationQuantumMs(50)
+ m_callCountOptimizationThreshhold(1),
+ m_optimizationQuantumMs(50),
+ m_methodsPendingCountingForTier1(nullptr),
+ m_tier1CountingDelayTimerHandle(nullptr),
+ m_wasTier0JitInvokedSinceCountingDelayReset(false)
{
LIMITED_METHOD_CONTRACT;
m_lock.Init(LOCK_TYPE_DEFAULT);
+
+ // On Unix, we can reach here before EEConfig is initialized, so defer config-based initialization to Init()
}
// Called at AppDomain Init
@@ -102,29 +107,115 @@ void TieredCompilationManager::Init(ADID appDomainId)
SpinLockHolder holder(&m_lock);
m_domainId = appDomainId;
+ m_callCountOptimizationThreshhold = g_pConfig->TieredCompilation_Tier1CallCountThreshold();
m_asyncWorkDoneEvent.CreateManualEventNoThrow(TRUE);
}
+void TieredCompilationManager::InitiateTier1CountingDelay()
+{
+ WRAPPER_NO_CONTRACT;
+ _ASSERTE(g_pConfig->TieredCompilation());
+ _ASSERTE(m_methodsPendingCountingForTier1 == nullptr);
+ _ASSERTE(m_tier1CountingDelayTimerHandle == nullptr);
+
+ DWORD delayMs = g_pConfig->TieredCompilation_Tier1CallCountingDelayMs();
+ if (delayMs == 0)
+ {
+ return;
+ }
+
+ m_tier1CountingDelayLock.Init(LOCK_TYPE_DEFAULT);
+
+ NewHolder<SArray<MethodDesc*>> methodsPendingCountingHolder = new(nothrow) SArray<MethodDesc*>();
+ if (methodsPendingCountingHolder == nullptr)
+ {
+ return;
+ }
+
+ NewHolder<ThreadpoolMgr::TimerInfoContext> timerContextHolder = new(nothrow) ThreadpoolMgr::TimerInfoContext();
+ if (timerContextHolder == nullptr)
+ {
+ return;
+ }
+
+ timerContextHolder->AppDomainId = m_domainId;
+ timerContextHolder->TimerId = 0;
+ if (!ThreadpoolMgr::CreateTimerQueueTimer(
+ &m_tier1CountingDelayTimerHandle,
+ Tier1DelayTimerCallback,
+ timerContextHolder,
+ delayMs,
+ (DWORD)-1 /* Period, non-repeating */,
+ 0 /* flags */))
+ {
+ _ASSERTE(m_tier1CountingDelayTimerHandle == nullptr);
+ return;
+ }
+
+ m_methodsPendingCountingForTier1 = methodsPendingCountingHolder.Extract();
+ timerContextHolder.SuppressRelease(); // the timer context is automatically deleted by the timer infrastructure
+}
+
+void TieredCompilationManager::OnTier0JitInvoked()
+{
+ LIMITED_METHOD_CONTRACT;
+
+ if (m_methodsPendingCountingForTier1 != nullptr)
+ {
+ m_wasTier0JitInvokedSinceCountingDelayReset = true;
+ }
+}
+
// Called each time code in this AppDomain has been run. This is our sole entrypoint to begin
// tiered compilation for now. Returns TRUE if no more notifications are necessary, but
// more notifications may come anyways.
//
// currentCallCount is pre-incremented, that is to say the value is 1 on first call for a given
// method.
-BOOL TieredCompilationManager::OnMethodCalled(MethodDesc* pMethodDesc, DWORD currentCallCount)
+void TieredCompilationManager::OnMethodCalled(
+ MethodDesc* pMethodDesc,
+ DWORD currentCallCount,
+ BOOL* shouldStopCountingCallsRef,
+ BOOL* wasPromotedToTier1Ref)
{
- STANDARD_VM_CONTRACT;
+ WRAPPER_NO_CONTRACT;
+ _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation());
+ _ASSERTE(shouldStopCountingCallsRef != nullptr);
+ _ASSERTE(wasPromotedToTier1Ref != nullptr);
+
+ *shouldStopCountingCallsRef =
+ m_methodsPendingCountingForTier1 != nullptr || currentCallCount >= m_callCountOptimizationThreshhold;
+ *wasPromotedToTier1Ref = currentCallCount >= m_callCountOptimizationThreshhold;
+
+ if (currentCallCount == m_callCountOptimizationThreshhold)
+ {
+ AsyncPromoteMethodToTier1(pMethodDesc);
+ }
+}
+
+void TieredCompilationManager::OnMethodCallCountingStoppedWithoutTier1Promotion(MethodDesc* pMethodDesc)
+{
+ WRAPPER_NO_CONTRACT;
+ _ASSERTE(pMethodDesc != nullptr);
+ _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation());
- if (currentCallCount < m_callCountOptimizationThreshhold)
+ if (g_pConfig->TieredCompilation_Tier1CallCountingDelayMs() == 0)
{
- return FALSE; // continue notifications for this method
+ return;
}
- else if (currentCallCount > m_callCountOptimizationThreshhold)
+
{
- return TRUE; // stop notifications for this method
+ SpinLockHolder holder(&m_tier1CountingDelayLock);
+ if (m_methodsPendingCountingForTier1 != nullptr)
+ {
+ // Record the method to resume counting later (see Tier1DelayTimerCallback)
+ m_methodsPendingCountingForTier1->Append(pMethodDesc);
+ return;
+ }
}
- AsyncPromoteMethodToTier1(pMethodDesc);
- return TRUE;
+
+ // Rare race condition with the timer callback
+ ResumeCountingCalls(pMethodDesc);
}
void TieredCompilationManager::AsyncPromoteMethodToTier1(MethodDesc* pMethodDesc)
@@ -258,6 +349,74 @@ void TieredCompilationManager::Shutdown(BOOL fBlockUntilAsyncWorkIsComplete)
}
}
+VOID WINAPI TieredCompilationManager::Tier1DelayTimerCallback(PVOID parameter, BOOLEAN timerFired)
+{
+ WRAPPER_NO_CONTRACT;
+ _ASSERTE(timerFired);
+
+ GCX_COOP();
+ ThreadpoolMgr::TimerInfoContext* timerContext = (ThreadpoolMgr::TimerInfoContext*)parameter;
+ ManagedThreadBase::ThreadPool(timerContext->AppDomainId, Tier1DelayTimerCallbackInAppDomain, nullptr);
+}
+
+void TieredCompilationManager::Tier1DelayTimerCallbackInAppDomain(LPVOID parameter)
+{
+ WRAPPER_NO_CONTRACT;
+ GetAppDomain()->GetTieredCompilationManager()->Tier1DelayTimerCallbackWorker();
+}
+
+void TieredCompilationManager::Tier1DelayTimerCallbackWorker()
+{
+ WRAPPER_NO_CONTRACT;
+
+ // Reschedule the timer if a tier 0 JIT has been invoked since the timer was started to further delay call counting
+ if (m_wasTier0JitInvokedSinceCountingDelayReset)
+ {
+ m_wasTier0JitInvokedSinceCountingDelayReset = false;
+
+ _ASSERTE(m_tier1CountingDelayTimerHandle != nullptr);
+ if (ThreadpoolMgr::ChangeTimerQueueTimer(
+ m_tier1CountingDelayTimerHandle,
+ g_pConfig->TieredCompilation_Tier1CallCountingDelayMs(),
+ (DWORD)-1 /* Period, non-repeating */))
+ {
+ return;
+ }
+ }
+
+ // Exchange the list of methods pending counting for tier 1
+ SArray<MethodDesc*>* methodsPendingCountingForTier1;
+ {
+ SpinLockHolder holder(&m_tier1CountingDelayLock);
+ methodsPendingCountingForTier1 = m_methodsPendingCountingForTier1;
+ _ASSERTE(methodsPendingCountingForTier1 != nullptr);
+ m_methodsPendingCountingForTier1 = nullptr;
+ }
+
+ // Install call counters
+ MethodDesc** methods = methodsPendingCountingForTier1->GetElements();
+ COUNT_T methodCount = methodsPendingCountingForTier1->GetCount();
+ for (COUNT_T i = 0; i < methodCount; ++i)
+ {
+ ResumeCountingCalls(methods[i]);
+ }
+ delete methodsPendingCountingForTier1;
+
+ // Delete the timer
+ _ASSERTE(m_tier1CountingDelayTimerHandle != nullptr);
+ ThreadpoolMgr::DeleteTimerQueueTimer(m_tier1CountingDelayTimerHandle, nullptr);
+ m_tier1CountingDelayTimerHandle = nullptr;
+}
+
+void TieredCompilationManager::ResumeCountingCalls(MethodDesc* pMethodDesc)
+{
+ WRAPPER_NO_CONTRACT;
+ _ASSERTE(pMethodDesc != nullptr);
+ _ASSERTE(pMethodDesc->IsVersionableWithPrecode());
+
+ pMethodDesc->GetPrecode()->ResetTargetInterlocked();
+}
+
// This is the initial entrypoint for the background thread, called by
// the threadpool.
DWORD WINAPI TieredCompilationManager::StaticOptimizeMethodsCallback(void *args)
diff --git a/src/vm/tieredcompilation.h b/src/vm/tieredcompilation.h
index 9f6187244a..95dbb741fc 100644
--- a/src/vm/tieredcompilation.h
+++ b/src/vm/tieredcompilation.h
@@ -25,7 +25,12 @@ public:
#endif
void Init(ADID appDomainId);
- BOOL OnMethodCalled(MethodDesc* pMethodDesc, DWORD currentCallCount);
+
+ void InitiateTier1CountingDelay();
+ void OnTier0JitInvoked();
+
+ void OnMethodCalled(MethodDesc* pMethodDesc, DWORD currentCallCount, BOOL* shouldStopCountingCallsRef, BOOL* wasPromotedToTier1Ref);
+ void OnMethodCallCountingStoppedWithoutTier1Promotion(MethodDesc* pMethodDesc);
void AsyncPromoteMethodToTier1(MethodDesc* pMethodDesc);
static void ShutdownAllDomains();
void Shutdown(BOOL fBlockUntilAsyncWorkIsComplete);
@@ -33,6 +38,11 @@ public:
private:
+ static VOID WINAPI Tier1DelayTimerCallback(PVOID parameter, BOOLEAN timerFired);
+ static void Tier1DelayTimerCallbackInAppDomain(LPVOID parameter);
+ void Tier1DelayTimerCallbackWorker();
+ static void ResumeCountingCalls(MethodDesc* pMethodDesc);
+
static DWORD StaticOptimizeMethodsCallback(void* args);
void OptimizeMethodsCallback();
void OptimizeMethod(NativeCodeVersion nativeCodeVersion);
@@ -50,6 +60,12 @@ private:
DWORD m_countOptimizationThreadsRunning;
DWORD m_callCountOptimizationThreshhold;
DWORD m_optimizationQuantumMs;
+
+ SpinLock m_tier1CountingDelayLock;
+ SArray<MethodDesc*>* m_methodsPendingCountingForTier1;
+ HANDLE m_tier1CountingDelayTimerHandle;
+ bool m_wasTier0JitInvokedSinceCountingDelayReset;
+
CLREvent m_asyncWorkDoneEvent;
};