summaryrefslogtreecommitdiff
path: root/src/vm/win32threadpool.cpp
diff options
context:
space:
mode:
authorKoundinya Veluri <kouvel@users.noreply.github.com>2017-09-22 00:17:16 -0700
committerGitHub <noreply@github.com>2017-09-22 00:17:16 -0700
commit6cee2edabe83ebb4899c38859f6e36eb461a3759 (patch)
tree02da26ad16de120435d377c2182673bbf0a519e9 /src/vm/win32threadpool.cpp
parent43cf34fe74b435427ffb62f54fc730b4b46c4c22 (diff)
downloadcoreclr-6cee2edabe83ebb4899c38859f6e36eb461a3759.tar.gz
coreclr-6cee2edabe83ebb4899c38859f6e36eb461a3759.tar.bz2
coreclr-6cee2edabe83ebb4899c38859f6e36eb461a3759.zip
Improve thread pool worker thread's spinning for work (#13921)
Improve thread pool worker thread's spinning for work Closes https://github.com/dotnet/coreclr/issues/5928 Replaced UnfairSemaphore with a new implementation in CLRLifoSemaphore - UnfairSemaphore had a some benefits: - It tracked the number of spinners and avoids waking up waiters as long as the signal count can be satisfied by spinners - Since spinners get priority over waiters, that's the main "unfair" part of it that allows hot threads to remain hot and cold threads to remain cold. However, waiters are still released in FIFO order. - Spinning helps with throughput when incoming work is bursty - All of the above benefits were retained in CLRLifoSemaphore and some were improved: - Similarly to UnfairSemaphore, the number of spinners are tracked and preferenced to avoid waking up waiters - For waiting, on Windows, a I/O completion port is used since it releases waiters in LIFO order. For Unix, added a prioritized wait function to the PAL to register waiters in reverse order for LIFO release behavior. This allows cold waiters to time out more easily since they will be used less frequently. - Similarly to SemaphoreSlim, the number of waiters that were signaled to wake but have not yet woken is tracked to help avoid waking up an excessive number of waiters - Added some YieldProcessorNormalized() calls to the spin loop. This avoids thrashing on Sleep(0) by adding a delay to the spin loop to allow it to be more effective when there are no threads to switch to, or the only other threads to switch to are other similar spinners. - Removed the processor count multiplier on the max spin count and retuned the default max spin count. The processor count multiplier was causing excessive CPU usage on machines with many processors.
Diffstat (limited to 'src/vm/win32threadpool.cpp')
-rw-r--r--src/vm/win32threadpool.cpp119
1 files changed, 57 insertions, 62 deletions
diff --git a/src/vm/win32threadpool.cpp b/src/vm/win32threadpool.cpp
index eabbcb93ae..97c020a4b6 100644
--- a/src/vm/win32threadpool.cpp
+++ b/src/vm/win32threadpool.cpp
@@ -103,6 +103,7 @@ DWORD ThreadpoolMgr::NextCompletedWorkRequestsTime;
LARGE_INTEGER ThreadpoolMgr::CurrentSampleStartTime;
+unsigned int ThreadpoolMgr::WorkerThreadSpinLimit;
int ThreadpoolMgr::ThreadAdjustmentInterval;
#define INVALID_HANDLE ((HANDLE) -1)
@@ -136,8 +137,8 @@ CLREvent * ThreadpoolMgr::RetiredCPWakeupEvent; // wakeup event for comple
CrstStatic ThreadpoolMgr::WaitThreadsCriticalSection;
ThreadpoolMgr::LIST_ENTRY ThreadpoolMgr::WaitThreadsHead;
-ThreadpoolMgr::UnfairSemaphore* ThreadpoolMgr::WorkerSemaphore;
-CLRSemaphore* ThreadpoolMgr::RetiredWorkerSemaphore;
+CLRLifoSemaphore* ThreadpoolMgr::WorkerSemaphore;
+CLRLifoSemaphore* ThreadpoolMgr::RetiredWorkerSemaphore;
CrstStatic ThreadpoolMgr::TimerQueueCriticalSection;
HANDLE ThreadpoolMgr::TimerThread=NULL;
@@ -353,6 +354,7 @@ BOOL ThreadpoolMgr::Initialize()
EX_TRY
{
+ WorkerThreadSpinLimit = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_ThreadPool_UnfairSemaphoreSpinLimit);
ThreadAdjustmentInterval = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_HillClimbing_SampleIntervalLow);
pADTPCount->InitResources();
@@ -370,26 +372,26 @@ BOOL ThreadpoolMgr::Initialize()
RetiredCPWakeupEvent->CreateAutoEvent(FALSE);
_ASSERTE(RetiredCPWakeupEvent->IsValid());
- int spinLimitPerProcessor = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_ThreadPool_UnfairSemaphoreSpinLimit);
- WorkerSemaphore = new UnfairSemaphore(ThreadCounter::MaxPossibleCount, spinLimitPerProcessor);
+ WorkerSemaphore = new CLRLifoSemaphore();
+ WorkerSemaphore->Create(0, ThreadCounter::MaxPossibleCount);
- RetiredWorkerSemaphore = new CLRSemaphore();
+ RetiredWorkerSemaphore = new CLRLifoSemaphore();
RetiredWorkerSemaphore->Create(0, ThreadCounter::MaxPossibleCount);
- //ThreadPool_CPUGroup
- if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
+ //ThreadPool_CPUGroup
+ if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
RecycledLists.Initialize( CPUGroupInfo::GetNumActiveProcessors() );
else
- RecycledLists.Initialize( g_SystemInfo.dwNumberOfProcessors );
- /*
- {
- SYSTEM_INFO sysInfo;
+ RecycledLists.Initialize( g_SystemInfo.dwNumberOfProcessors );
+ /*
+ {
+ SYSTEM_INFO sysInfo;
- ::GetSystemInfo( &sysInfo );
+ ::GetSystemInfo( &sysInfo );
- RecycledLists.Initialize( sysInfo.dwNumberOfProcessors );
- }
- */
+ RecycledLists.Initialize( sysInfo.dwNumberOfProcessors );
+ }
+ */
}
EX_CATCH
{
@@ -1034,9 +1036,7 @@ void ThreadpoolMgr::MaybeAddWorkingWorker()
if (toUnretire > 0)
{
- LONG previousCount;
- INDEBUG(BOOL success =) RetiredWorkerSemaphore->Release((LONG)toUnretire, &previousCount);
- _ASSERTE(success);
+ RetiredWorkerSemaphore->Release(toUnretire);
}
if (toRelease > 0)
@@ -2055,10 +2055,7 @@ Retire:
while (true)
{
RetryRetire:
- DWORD result = RetiredWorkerSemaphore->Wait(AppX::IsAppXProcess() ? WorkerTimeoutAppX : WorkerTimeout, FALSE);
- _ASSERTE(WAIT_OBJECT_0 == result || WAIT_TIMEOUT == result);
-
- if (WAIT_OBJECT_0 == result)
+ if (RetiredWorkerSemaphore->Wait(AppX::IsAppXProcess() ? WorkerTimeoutAppX : WorkerTimeout))
{
foundWork = true;
@@ -2134,59 +2131,57 @@ WaitForWork:
FireEtwThreadPoolWorkerThreadWait(counts.NumActive, counts.NumRetired, GetClrInstanceId());
RetryWaitForWork:
- if (!WorkerSemaphore->Wait(AppX::IsAppXProcess() ? WorkerTimeoutAppX : WorkerTimeout))
+ if (WorkerSemaphore->Wait(AppX::IsAppXProcess() ? WorkerTimeoutAppX : WorkerTimeout, WorkerThreadSpinLimit, NumberOfProcessors))
{
- if (!IsIoPending())
- {
- //
- // We timed out, and are about to exit. This puts us in a very similar situation to the
- // retirement case above - someone may think we're still waiting, and go ahead and:
- //
- // 1) Increment NumWorking
- // 2) Signal WorkerSemaphore
- //
- // The solution is much like retirement; when we're decrementing NumActive, we need to make
- // sure it doesn't drop below NumWorking. If it would, then we need to go back and wait
- // again.
- //
+ foundWork = true;
+ goto Work;
+ }
- DangerousNonHostedSpinLockHolder tal(&ThreadAdjustmentLock);
+ if (!IsIoPending())
+ {
+ //
+ // We timed out, and are about to exit. This puts us in a very similar situation to the
+ // retirement case above - someone may think we're still waiting, and go ahead and:
+ //
+ // 1) Increment NumWorking
+ // 2) Signal WorkerSemaphore
+ //
+ // The solution is much like retirement; when we're decrementing NumActive, we need to make
+ // sure it doesn't drop below NumWorking. If it would, then we need to go back and wait
+ // again.
+ //
- // counts volatile read paired with CompareExchangeCounts loop set
- counts = WorkerCounter.DangerousGetDirtyCounts();
- while (true)
- {
- if (counts.NumActive == counts.NumWorking)
- {
- goto RetryWaitForWork;
- }
+ DangerousNonHostedSpinLockHolder tal(&ThreadAdjustmentLock);
- newCounts = counts;
- newCounts.NumActive--;
+ // counts volatile read paired with CompareExchangeCounts loop set
+ counts = WorkerCounter.DangerousGetDirtyCounts();
+ while (true)
+ {
+ if (counts.NumActive == counts.NumWorking)
+ {
+ goto RetryWaitForWork;
+ }
- // if we timed out while active, then Hill Climbing needs to be told that we need fewer threads
- newCounts.MaxWorking = max(MinLimitTotalWorkerThreads, min(newCounts.NumActive, newCounts.MaxWorking));
+ newCounts = counts;
+ newCounts.NumActive--;
- oldCounts = WorkerCounter.CompareExchangeCounts(newCounts, counts);
+ // if we timed out while active, then Hill Climbing needs to be told that we need fewer threads
+ newCounts.MaxWorking = max(MinLimitTotalWorkerThreads, min(newCounts.NumActive, newCounts.MaxWorking));
- if (oldCounts == counts)
- {
- HillClimbingInstance.ForceChange(newCounts.MaxWorking, ThreadTimedOut);
- goto Exit;
- }
+ oldCounts = WorkerCounter.CompareExchangeCounts(newCounts, counts);
- counts = oldCounts;
+ if (oldCounts == counts)
+ {
+ HillClimbingInstance.ForceChange(newCounts.MaxWorking, ThreadTimedOut);
+ goto Exit;
}
- }
- else
- {
- goto RetryWaitForWork;
+
+ counts = oldCounts;
}
}
else
{
- foundWork = true;
- goto Work;
+ goto RetryWaitForWork;
}
Exit: