diff options
author | Noah Falk <noahfalk@users.noreply.github.com> | 2019-05-21 21:52:18 -0700 |
---|---|---|
committer | David Mason <davmason@microsoft.com> | 2019-05-21 21:52:18 -0700 |
commit | 671772c20a27c050df3d7d11391ea4f7de05165c (patch) | |
tree | 6e20b07feb90c3ef3b7b5bc1634e445b3c7f5f98 | |
parent | 29810a78e5b93d8da9fb921d096226d249fc75a5 (diff) | |
download | coreclr-671772c20a27c050df3d7d11391ea4f7de05165c.tar.gz coreclr-671772c20a27c050df3d7d11391ea4f7de05165c.tar.bz2 coreclr-671772c20a27c050df3d7d11391ea4f7de05165c.zip |
Fix profiler crash on shutdown (#22712)
Fixes issue #22176. Use the profiler evacuation counters to ensure that we
don't callback into the profiler when it has already been released.
Previously we only did this as part of the attach/detach feature, but this
is required for correctness during standard shutdown given that managed
threads are still running concurrently.
-rw-r--r-- | src/vm/ceemain.cpp | 9 | ||||
-rw-r--r-- | src/vm/eetoprofinterfaceimpl.cpp | 4 | ||||
-rw-r--r-- | src/vm/profdetach.cpp | 91 | ||||
-rw-r--r-- | src/vm/profdetach.h | 1 | ||||
-rw-r--r-- | src/vm/profilinghelper.cpp | 98 | ||||
-rw-r--r-- | src/vm/profilinghelper.h | 1 | ||||
-rw-r--r-- | src/vm/threads.cpp | 4 | ||||
-rw-r--r-- | src/vm/threads.h | 8 |
8 files changed, 112 insertions, 104 deletions
diff --git a/src/vm/ceemain.cpp b/src/vm/ceemain.cpp index 32dceea5e6..e03c78bcff 100644 --- a/src/vm/ceemain.cpp +++ b/src/vm/ceemain.cpp @@ -1605,6 +1605,15 @@ void STDMETHODCALLTYPE EEShutDownHelper(BOOL fIsDllUnloading) // profiler can make any last calls it needs to. Do this only if we // are not detaching + // NOTE: We haven't stopped other threads at this point and nothing is stopping + // callbacks from coming into the profiler even after Shutdown() has been called. + // See https://github.com/dotnet/coreclr/issues/22176 for an example of how that + // happens. + // Callbacks will be prevented when ProfilingAPIUtility::Terminate() changes the state + // to detached, which occurs shortly afterwards. It might be kinder to make the detaching + // transition before calling Shutdown(), but if we do we'd have to be very careful not + // to break profilers that were relying on being able to call various APIs during + // Shutdown(). I suspect this isn't something we'll ever do unless we get complaints. if (CORProfilerPresent()) { // If EEShutdown is not being called due to a ProcessDetach event, so diff --git a/src/vm/eetoprofinterfaceimpl.cpp b/src/vm/eetoprofinterfaceimpl.cpp index faae65f2eb..30b35a1cf6 100644 --- a/src/vm/eetoprofinterfaceimpl.cpp +++ b/src/vm/eetoprofinterfaceimpl.cpp @@ -99,13 +99,9 @@ enum ClrToProfEntrypointFlags kEE2PNoTrigger = 0x00000004, }; -#ifdef FEATURE_PROFAPI_ATTACH_DETACH #define ASSERT_EVAC_COUNTER_NONZERO() \ _ASSERTE((GetThreadNULLOk() == NULL) || \ (GetThreadNULLOk()->GetProfilerEvacuationCounter() != 0U)) -#else // FEATURE_PROFAPI_ATTACH_DETACH -#define ASSERT_EVAC_COUNTER_NONZERO() -#endif // FEATURE_PROFAPI_ATTACH_DETACH #define CHECK_PROFILER_STATUS(ee2pFlags) \ /* If one of these asserts fires, perhaps you forgot to use */ \ diff --git a/src/vm/profdetach.cpp b/src/vm/profdetach.cpp index 0181e83d7b..1594d42008 100644 --- a/src/vm/profdetach.cpp +++ b/src/vm/profdetach.cpp @@ -324,7 +324,7 @@ void ProfilingAPIDetach::ExecuteEvacuationLoop() // Give profiler a chance to return from its procs SleepWhileProfilerEvacuates(); } - while (!IsProfilerEvacuated()); + while (!ProfilingAPIUtility::IsProfilerEvacuated()); UnloadProfiler(); } @@ -442,96 +442,7 @@ void ProfilingAPIDetach::SleepWhileProfilerEvacuates() ClrSleepEx((DWORD) ui64SleepMilliseconds, FALSE /* alertable */); } -//--------------------------------------------------------------------------------------- -// -// Performs the evacuation checks by grabbing the thread store lock, iterating through -// all EE Threads, and querying each one's evacuation counter. If they're all 0, the -// profiler is ready to be unloaded. -// -// Return Value: -// Nonzero iff the profiler is fully evacuated and ready to be unloaded. -// - -// static -BOOL ProfilingAPIDetach::IsProfilerEvacuated() -{ - CONTRACTL - { - NOTHROW; - GC_TRIGGERS; - MODE_ANY; - CAN_TAKE_LOCK; - } - CONTRACTL_END; - _ASSERTE(g_profControlBlock.curProfStatus.Get() == kProfStatusDetaching); - - // Check evacuation counters on all the threads (see - // code:ProfilingAPIUtility::InitializeProfiling#LoadUnloadCallbackSynchronization - // for details). Doing this under the thread store lock not only ensures we can - // iterate through the Thread objects safely, but also forces us to serialize with - // the GC. The latter is important, as server GC enters the profiler on non-EE - // Threads, and so no evacuation counters might be incremented during server GC even - // though control could be entering the profiler. - { - ThreadStoreLockHolder TSLockHolder; - - Thread * pThread = ThreadStore::GetAllThreadList( - NULL, // cursor thread; always NULL to begin with - 0, // mask to AND with Thread::m_State to filter returned threads - 0); // bits to match the result of the above AND. (m_State & 0 == 0, - // so we won't filter out any threads) - - // Note that, by not filtering out any of the threads, we're intentionally including - // stuff like TS_Dead or TS_Unstarted. But that keeps us on the safe - // side. If an EE Thread object exists, we want to check its counters to be - // absolutely certain it isn't executing in a profiler. - - while (pThread != NULL) - { - // Note that pThread is still in motion as we check its evacuation counter. - // This is ok, because we've already changed the profiler status to - // kProfStatusDetaching and flushed CPU buffers. So at this point the counter - // will typically only go down to 0 (and not increment anymore), with one - // small exception (below). So if we get a read of 0 below, the counter will - // typically stay there. Specifically: - // * pThread is most likely not about to increment its evacuation counter - // from 0 to 1 because pThread sees that the status is - // kProfStatusDetaching. - // * Note that there is a small race where pThread might actually - // increment its evac counter from 0 to 1 (if it dirty-read the - // profiler status a tad too early), but that implies that when - // pThread rechecks the profiler status (clean read) then pThread - // will immediately decrement the evac counter back to 0 and avoid - // calling into the EEToProfInterfaceImpl pointer. - // - // (see - // code:ProfilingAPIUtility::InitializeProfiling#LoadUnloadCallbackSynchronization - // for details) - DWORD dwEvacCounter = pThread->GetProfilerEvacuationCounter(); - if (dwEvacCounter != 0) - { - LOG(( - LF_CORPROF, - LL_INFO100, - "**PROF: Profiler not yet evacuated because OS Thread ID 0x%x has evac counter of %d (decimal).\n", - pThread->GetOSThreadId(), - dwEvacCounter)); - return FALSE; - } - - pThread = ThreadStore::GetAllThreadList(pThread, 0, 0); - } - } - - // FUTURE: When rejit feature crew complete, add code to verify all rejitted - // functions are fully reverted and off of all stacks. If this is very easy to - // verify (e.g., checking a single value), consider putting it above the loop - // above so we can early-out quicker if rejitted code is still around. - - // We got this far without returning, so the profiler is fully evacuated - return TRUE; -} // --------------------------------------------------------------------------------------- // After we've verified a detaching profiler has fully evacuated, call this to unload the diff --git a/src/vm/profdetach.h b/src/vm/profdetach.h index f3d13b8f9e..a3026b12f1 100644 --- a/src/vm/profdetach.h +++ b/src/vm/profdetach.h @@ -54,7 +54,6 @@ public: static HRESULT CreateDetachThread(); static DWORD WINAPI ProfilingAPIDetachThreadStart(LPVOID lpParameter); static void ExecuteEvacuationLoop(); - static BOOL IsProfilerEvacuated(); static EEToProfInterfaceImpl * GetEEToProfPtr(); diff --git a/src/vm/profilinghelper.cpp b/src/vm/profilinghelper.cpp index ba410b1a70..7a686b6c2f 100644 --- a/src/vm/profilinghelper.cpp +++ b/src/vm/profilinghelper.cpp @@ -94,7 +94,7 @@ // use g_profControlBlock.pProfInterface. And if the reader clean-reads the status before // the buffers were flushed, then the reader will have incremented its evacuation counter // first, which the writer will be sure to see in (c). For more details about how the -// evacuation counters work, see code:ProfilingAPIDetach::IsProfilerEvacuated. +// evacuation counters work, see code:ProfilingAPIUtility::IsProfilerEvacuated. // // WHEN ARE BEGIN/END_PIN_PROFILER REQUIRED? // @@ -1246,6 +1246,97 @@ HRESULT ProfilingAPIUtility::LoadProfiler( //--------------------------------------------------------------------------------------- // +// Performs the evacuation checks by grabbing the thread store lock, iterating through +// all EE Threads, and querying each one's evacuation counter. If they're all 0, the +// profiler is ready to be unloaded. +// +// Return Value: +// Nonzero iff the profiler is fully evacuated and ready to be unloaded. +// + +// static +BOOL ProfilingAPIUtility::IsProfilerEvacuated() +{ + CONTRACTL + { + NOTHROW; + GC_TRIGGERS; + MODE_ANY; + CAN_TAKE_LOCK; + } + CONTRACTL_END; + + _ASSERTE(g_profControlBlock.curProfStatus.Get() == kProfStatusDetaching); + + // Check evacuation counters on all the threads (see + // code:ProfilingAPIUtility::InitializeProfiling#LoadUnloadCallbackSynchronization + // for details). Doing this under the thread store lock not only ensures we can + // iterate through the Thread objects safely, but also forces us to serialize with + // the GC. The latter is important, as server GC enters the profiler on non-EE + // Threads, and so no evacuation counters might be incremented during server GC even + // though control could be entering the profiler. + { + ThreadStoreLockHolder TSLockHolder; + + Thread * pThread = ThreadStore::GetAllThreadList( + NULL, // cursor thread; always NULL to begin with + 0, // mask to AND with Thread::m_State to filter returned threads + 0); // bits to match the result of the above AND. (m_State & 0 == 0, + // so we won't filter out any threads) + + // Note that, by not filtering out any of the threads, we're intentionally including + // stuff like TS_Dead or TS_Unstarted. But that keeps us on the safe + // side. If an EE Thread object exists, we want to check its counters to be + // absolutely certain it isn't executing in a profiler. + + while (pThread != NULL) + { + // Note that pThread is still in motion as we check its evacuation counter. + // This is ok, because we've already changed the profiler status to + // kProfStatusDetaching and flushed CPU buffers. So at this point the counter + // will typically only go down to 0 (and not increment anymore), with one + // small exception (below). So if we get a read of 0 below, the counter will + // typically stay there. Specifically: + // * pThread is most likely not about to increment its evacuation counter + // from 0 to 1 because pThread sees that the status is + // kProfStatusDetaching. + // * Note that there is a small race where pThread might actually + // increment its evac counter from 0 to 1 (if it dirty-read the + // profiler status a tad too early), but that implies that when + // pThread rechecks the profiler status (clean read) then pThread + // will immediately decrement the evac counter back to 0 and avoid + // calling into the EEToProfInterfaceImpl pointer. + // + // (see + // code:ProfilingAPIUtility::InitializeProfiling#LoadUnloadCallbackSynchronization + // for details) + DWORD dwEvacCounter = pThread->GetProfilerEvacuationCounter(); + if (dwEvacCounter != 0) + { + LOG(( + LF_CORPROF, + LL_INFO100, + "**PROF: Profiler not yet evacuated because OS Thread ID 0x%x has evac counter of %d (decimal).\n", + pThread->GetOSThreadId(), + dwEvacCounter)); + return FALSE; + } + + pThread = ThreadStore::GetAllThreadList(pThread, 0, 0); + } + } + + // FUTURE: When rejit feature crew complete, add code to verify all rejitted + // functions are fully reverted and off of all stacks. If this is very easy to + // verify (e.g., checking a single value), consider putting it above the loop + // above so we can early-out quicker if rejitted code is still around. + + // We got this far without returning, so the profiler is fully evacuated + return TRUE; +} + +//--------------------------------------------------------------------------------------- +// // This is the top-most level of profiling API teardown, and is called directly by // EEShutDownHelper() (in ceemain.cpp). This cleans up internal structures relating to // the Profiling API. If we're not in process teardown, then this also releases the @@ -1300,6 +1391,7 @@ void ProfilingAPIUtility::TerminateProfiling() // them give a GetEEToProfPtr() equal to g_profControlBlock.pProfInterface. return; } +#endif // FEATURE_PROFAPI_ATTACH_DETACH if (g_profControlBlock.curProfStatus.Get() == kProfStatusActive) { @@ -1311,13 +1403,13 @@ void ProfilingAPIUtility::TerminateProfiling() // that the status has been changed to kProfStatusDetaching, no new threads will // attempt to enter the profiler. But use the detach evacuation counters to see // if other threads already began to enter the profiler. - if (!ProfilingAPIDetach::IsProfilerEvacuated()) + if (!ProfilingAPIUtility::IsProfilerEvacuated()) { // Other threads might be entering the profiler, so just skip cleanup return; } } -#endif // FEATURE_PROFAPI_ATTACH_DETACH + // If we have a profiler callback wrapper and / or info implementation // active, then terminate them. diff --git a/src/vm/profilinghelper.h b/src/vm/profilinghelper.h index 2682d8a6d1..6847a901d2 100644 --- a/src/vm/profilinghelper.h +++ b/src/vm/profilinghelper.h @@ -54,6 +54,7 @@ public: UINT cbClientData, DWORD dwConcurrentGCWaitTimeoutInMs); + static BOOL IsProfilerEvacuated(); static void TerminateProfiling(); static void LogProfError(int iStringResourceID, ...); static void LogProfInfo(int iStringResourceID, ...); diff --git a/src/vm/threads.cpp b/src/vm/threads.cpp index 4abc89155a..40e846bf40 100644 --- a/src/vm/threads.cpp +++ b/src/vm/threads.cpp @@ -1368,9 +1368,9 @@ Thread::Thread() m_debuggerCantStop = 0; m_fInteropDebuggingHijacked = FALSE; m_profilerCallbackState = 0; -#ifdef FEATURE_PROFAPI_ATTACH_DETACH +#ifdef PROFILING_SUPPORTED m_dwProfilerEvacuationCounter = 0; -#endif // FEATURE_PROFAPI_ATTACH_DETACH +#endif // PROFILING_SUPPORTED m_pProfilerFilterContext = NULL; diff --git a/src/vm/threads.h b/src/vm/threads.h index 6b7fe45368..2cbf0e0674 100644 --- a/src/vm/threads.h +++ b/src/vm/threads.h @@ -3848,7 +3848,7 @@ private: //--------------------------------------------------------------- DWORD m_profilerCallbackState; -#if defined(FEATURE_PROFAPI_ATTACH_DETACH) || defined(DATA_PROFAPI_ATTACH_DETACH) +#if defined(PROFILING_SUPPORTED) || defined(PROFILING_SUPPORTED_DATA) //--------------------------------------------------------------- // m_dwProfilerEvacuationCounter keeps track of how many profiler // callback calls remain on the stack @@ -3856,7 +3856,7 @@ private: // Why volatile? // See code:ProfilingAPIUtility::InitializeProfiling#LoadUnloadCallbackSynchronization. Volatile<DWORD> m_dwProfilerEvacuationCounter; -#endif // defined(FEATURE_PROFAPI_ATTACH_DETACH) || defined(DATA_PROFAPI_ATTACH_DETACH) +#endif // defined(PROFILING_SUPPORTED) || defined(PROFILING_SUPPORTED_DATA) private: UINT32 m_workerThreadPoolCompletionCount; @@ -4035,7 +4035,7 @@ public: return m_pProfilerFilterContext; } -#ifdef FEATURE_PROFAPI_ATTACH_DETACH +#ifdef PROFILING_SUPPORTED FORCEINLINE DWORD GetProfilerEvacuationCounter(void) { @@ -4057,7 +4057,7 @@ public: m_dwProfilerEvacuationCounter--; } -#endif // FEATURE_PROFAPI_ATTACH_DETACH +#endif // PROFILING_SUPPORTED //------------------------------------------------------------------------- // The hijack lock enforces that a thread on which a profiler is currently |