summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKoundinya Veluri <kouvel@users.noreply.github.com>2017-10-31 12:04:44 -0700
committerGitHub <noreply@github.com>2017-10-31 12:04:44 -0700
commit0bbce0cd3bf5f1e811e727d3922fade3b3b87a03 (patch)
treefb4da4b55371bbdec623b46f496f1fd008d12289 /src
parentedc9a0370aabf55ee5ad8693d0a46171208ff549 (diff)
downloadcoreclr-0bbce0cd3bf5f1e811e727d3922fade3b3b87a03.tar.gz
coreclr-0bbce0cd3bf5f1e811e727d3922fade3b3b87a03.tar.bz2
coreclr-0bbce0cd3bf5f1e811e727d3922fade3b3b87a03.zip
Clean up YieldProcessorNormalized (#14739)
Move YieldProcessorNormalized into separate files Clean up YieldProcessorNormalized
Diffstat (limited to 'src')
-rw-r--r--src/vm/CMakeLists.txt1
-rw-r--r--src/vm/common.h1
-rw-r--r--src/vm/comsynchronizable.cpp8
-rw-r--r--src/vm/synch.cpp2
-rw-r--r--src/vm/threads.cpp105
-rw-r--r--src/vm/threads.h72
-rw-r--r--src/vm/yieldprocessornormalized.cpp105
-rw-r--r--src/vm/yieldprocessornormalized.h103
8 files changed, 214 insertions, 183 deletions
diff --git a/src/vm/CMakeLists.txt b/src/vm/CMakeLists.txt
index 8b9219dee3..aefc77113c 100644
--- a/src/vm/CMakeLists.txt
+++ b/src/vm/CMakeLists.txt
@@ -116,6 +116,7 @@ set(VM_SOURCES_DAC_AND_WKS_COMMON
versionresilienthashcode.cpp
virtualcallstub.cpp
win32threadpool.cpp
+ yieldprocessornormalized.cpp
zapsig.cpp
)
diff --git a/src/vm/common.h b/src/vm/common.h
index 8f37574027..54ebf362c9 100644
--- a/src/vm/common.h
+++ b/src/vm/common.h
@@ -313,6 +313,7 @@ namespace Loader
#include "pedecoder.h"
#include "sstring.h"
#include "slist.h"
+#include "yieldprocessornormalized.h"
#include "eeconfig.h"
diff --git a/src/vm/comsynchronizable.cpp b/src/vm/comsynchronizable.cpp
index 1d7541a74a..472ca34feb 100644
--- a/src/vm/comsynchronizable.cpp
+++ b/src/vm/comsynchronizable.cpp
@@ -1658,9 +1658,7 @@ FCIMPL1(void, ThreadNative::SpinWait, int iterations)
//
if (iterations <= 100000)
{
- YieldProcessorNormalizationInfo normalizationInfo;
- for (int i = 0; i < iterations; i++)
- YieldProcessorNormalized(normalizationInfo);
+ YieldProcessorNormalized(YieldProcessorNormalizationInfo(), iterations);
return;
}
@@ -1670,9 +1668,7 @@ FCIMPL1(void, ThreadNative::SpinWait, int iterations)
HELPER_METHOD_FRAME_BEGIN_NOPOLL();
GCX_PREEMP();
- YieldProcessorNormalizationInfo normalizationInfo;
- for (int i = 0; i < iterations; i++)
- YieldProcessorNormalized(normalizationInfo);
+ YieldProcessorNormalized(YieldProcessorNormalizationInfo(), iterations);
HELPER_METHOD_FRAME_END();
}
diff --git a/src/vm/synch.cpp b/src/vm/synch.cpp
index 31ed23546a..c21e4f53a0 100644
--- a/src/vm/synch.cpp
+++ b/src/vm/synch.cpp
@@ -841,7 +841,7 @@ bool CLRLifoSemaphore::Wait(DWORD timeoutMs, UINT32 spinCount, UINT32 processorC
}
#else // !_TARGET_ARM64_
const UINT32 Sleep0Threshold = 10;
- YieldProcessorWithBackOffNormalizationInfo normalizationInfo;
+ YieldProcessorNormalizationInfo normalizationInfo;
#ifdef FEATURE_PAL
// The PAL's wait subsystem is quite slow, spin more to compensate for the more expensive wait
spinCount *= 2;
diff --git a/src/vm/threads.cpp b/src/vm/threads.cpp
index 941d3645be..d9ee637b1a 100644
--- a/src/vm/threads.cpp
+++ b/src/vm/threads.cpp
@@ -63,8 +63,6 @@ SPTR_IMPL(ThreadStore, ThreadStore, s_pThreadStore);
CONTEXT *ThreadStore::s_pOSContext = NULL;
CLREvent *ThreadStore::s_pWaitForStackCrawlEvent;
-static CrstStatic s_initializeYieldProcessorNormalizedCrst;
-
#ifndef DACCESS_COMPILE
@@ -1103,7 +1101,7 @@ void InitThreadManager()
}
CONTRACTL_END;
- s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock);
+ InitializeYieldProcessorNormalizedCrst();
// All patched helpers should fit into one page.
// If you hit this assert on retail build, there is most likely problem with BBT script.
@@ -11412,104 +11410,3 @@ ULONGLONG Thread::QueryThreadProcessorUsage()
return ullCurrentUsage - ullPreviousUsage;
}
#endif // FEATURE_APPDOMAIN_RESOURCE_MONITORING
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// YieldProcessorNormalized
-
-// Defaults are for when InitializeYieldProcessorNormalized has not yet been called or when no measurement is done, and are
-// tuned for Skylake processors
-int g_yieldsPerNormalizedYield = 1; // current value is for Skylake processors, this would be 9 for pre-Skylake
-int g_optimalMaxNormalizedYieldsPerSpinIteration = 7;
-
-static Volatile<bool> s_isYieldProcessorNormalizedInitialized = false;
-
-void InitializeYieldProcessorNormalized()
-{
- LIMITED_METHOD_CONTRACT;
-
- CrstHolder lock(&s_initializeYieldProcessorNormalizedCrst);
-
- if (s_isYieldProcessorNormalizedInitialized)
- {
- return;
- }
-
- // Intel pre-Skylake processor: measured typically 14-17 cycles per yield
- // Intel post-Skylake processor: measured typically 125-150 cycles per yield
- const int MeasureDurationMs = 10;
- const int MaxYieldsPerNormalizedYield = 10; // measured typically 8-9 on pre-Skylake
- const int MinNsPerNormalizedYield = 37; // measured typically 37-46 on post-Skylake
- const int NsPerOptimialMaxSpinIterationDuration = 272; // approx. 900 cycles, measured 281 on pre-Skylake, 263 on post-Skylake
- const int NsPerSecond = 1000 * 1000 * 1000;
-
- LARGE_INTEGER li;
- if (!QueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs)
- {
- // High precision clock not available or clock resolution is too low, resort to defaults
- s_isYieldProcessorNormalizedInitialized = true;
- return;
- }
- ULONGLONG ticksPerSecond = li.QuadPart;
-
- // Measure the nanosecond delay per yield
- ULONGLONG measureDurationTicks = ticksPerSecond / (1000 / MeasureDurationMs);
- unsigned int yieldCount = 0;
- QueryPerformanceCounter(&li);
- ULONGLONG startTicks = li.QuadPart;
- ULONGLONG elapsedTicks;
- do
- {
- // On some systems, querying the high performance counter has relatively significant overhead. Do enough yields to mask
- // the timing overhead. Assuming one yield has a delay of MinNsPerNormalizedYield, 1000 yields would have a delay in the
- // low microsecond range.
- for (int i = 0; i < 1000; ++i)
- {
- YieldProcessor();
- }
- yieldCount += 1000;
-
- QueryPerformanceCounter(&li);
- ULONGLONG nowTicks = li.QuadPart;
- elapsedTicks = nowTicks - startTicks;
- } while (elapsedTicks < measureDurationTicks);
- double nsPerYield = (double)elapsedTicks * NsPerSecond / ((double)yieldCount * ticksPerSecond);
- if (nsPerYield < 1)
- {
- nsPerYield = 1;
- }
-
- // Calculate the number of yields required to span the duration of a normalized yield
- int yieldsPerNormalizedYield = (int)(MinNsPerNormalizedYield / nsPerYield + 0.5);
- if (yieldsPerNormalizedYield < 1)
- {
- yieldsPerNormalizedYield = 1;
- }
- else if (yieldsPerNormalizedYield > MaxYieldsPerNormalizedYield)
- {
- yieldsPerNormalizedYield = MaxYieldsPerNormalizedYield;
- }
-
- // Calculate the maximum number of yields that would be optimal for a late spin iteration. Typically, we would not want to
- // spend excessive amounts of time (thousands of cycles) doing only YieldProcessor, as SwitchToThread/Sleep would do a
- // better job of allowing other work to run.
- int optimalMaxNormalizedYieldsPerSpinIteration =
- (int)(NsPerOptimialMaxSpinIterationDuration / (yieldsPerNormalizedYield * nsPerYield) + 0.5);
- if (optimalMaxNormalizedYieldsPerSpinIteration < 1)
- {
- optimalMaxNormalizedYieldsPerSpinIteration = 1;
- }
-
- g_yieldsPerNormalizedYield = yieldsPerNormalizedYield;
- g_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration;
- s_isYieldProcessorNormalizedInitialized = true;
-}
-
-void EnsureYieldProcessorNormalizedInitialized()
-{
- WRAPPER_NO_CONTRACT;
-
- if (!s_isYieldProcessorNormalizedInitialized)
- {
- InitializeYieldProcessorNormalized();
- }
-}
diff --git a/src/vm/threads.h b/src/vm/threads.h
index 05e01b3004..bae1db49f6 100644
--- a/src/vm/threads.h
+++ b/src/vm/threads.h
@@ -7476,76 +7476,4 @@ private:
BOOL Debug_IsLockedViaThreadSuspension();
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// YieldProcessorNormalized
-
-extern int g_yieldsPerNormalizedYield;
-extern int g_optimalMaxNormalizedYieldsPerSpinIteration;
-
-void InitializeYieldProcessorNormalized();
-void EnsureYieldProcessorNormalizedInitialized();
-
-class YieldProcessorNormalizationInfo
-{
-private:
- int yieldsPerNormalizedYield;
-
-public:
- YieldProcessorNormalizationInfo() : yieldsPerNormalizedYield(g_yieldsPerNormalizedYield)
- {
- }
-
- friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &);
-};
-
-FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo)
-{
- LIMITED_METHOD_CONTRACT;
-
- int n = normalizationInfo.yieldsPerNormalizedYield;
- while (--n >= 0)
- {
- YieldProcessor();
- }
-}
-
-class YieldProcessorWithBackOffNormalizationInfo
-{
-private:
- int yieldsPerNormalizedYield;
- int optimalMaxNormalizedYieldsPerSpinIteration;
- int optimalMaxYieldsPerSpinIteration;
-
-public:
- YieldProcessorWithBackOffNormalizationInfo()
- : yieldsPerNormalizedYield(g_yieldsPerNormalizedYield),
- optimalMaxNormalizedYieldsPerSpinIteration(g_optimalMaxNormalizedYieldsPerSpinIteration),
- optimalMaxYieldsPerSpinIteration(yieldsPerNormalizedYield * optimalMaxNormalizedYieldsPerSpinIteration)
- {
- }
-
- friend void YieldProcessorWithBackOffNormalized(const YieldProcessorWithBackOffNormalizationInfo &, unsigned int);
-};
-
-FORCEINLINE void YieldProcessorWithBackOffNormalized(
- const YieldProcessorWithBackOffNormalizationInfo &normalizationInfo,
- unsigned int spinIteration)
-{
- LIMITED_METHOD_CONTRACT;
-
- int n;
- if (spinIteration <= 30 && (1 << spinIteration) < normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration)
- {
- n = (1 << spinIteration) * normalizationInfo.yieldsPerNormalizedYield;
- }
- else
- {
- n = normalizationInfo.optimalMaxYieldsPerSpinIteration;
- }
- while (--n >= 0)
- {
- YieldProcessor();
- }
-}
-
#endif //__threads_h__
diff --git a/src/vm/yieldprocessornormalized.cpp b/src/vm/yieldprocessornormalized.cpp
new file mode 100644
index 0000000000..94daeb42f5
--- /dev/null
+++ b/src/vm/yieldprocessornormalized.cpp
@@ -0,0 +1,105 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "common.h"
+
+// Defaults are for when InitializeYieldProcessorNormalized has not yet been called or when no measurement is done, and are
+// tuned for Skylake processors
+unsigned int g_yieldsPerNormalizedYield = 1; // current value is for Skylake processors, this is expected to be ~9 for pre-Skylake
+unsigned int g_optimalMaxNormalizedYieldsPerSpinIteration = 7;
+
+static Volatile<bool> s_isYieldProcessorNormalizedInitialized = false;
+static CrstStatic s_initializeYieldProcessorNormalizedCrst;
+
+void InitializeYieldProcessorNormalizedCrst()
+{
+ WRAPPER_NO_CONTRACT;
+ s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock);
+}
+
+static void InitializeYieldProcessorNormalized()
+{
+ WRAPPER_NO_CONTRACT;
+
+ CrstHolder lock(&s_initializeYieldProcessorNormalizedCrst);
+
+ if (s_isYieldProcessorNormalizedInitialized)
+ {
+ return;
+ }
+
+ // Intel pre-Skylake processor: measured typically 14-17 cycles per yield
+ // Intel post-Skylake processor: measured typically 125-150 cycles per yield
+ const int MeasureDurationMs = 10;
+ const int NsPerSecond = 1000 * 1000 * 1000;
+
+ LARGE_INTEGER li;
+ if (!QueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs)
+ {
+ // High precision clock not available or clock resolution is too low, resort to defaults
+ s_isYieldProcessorNormalizedInitialized = true;
+ return;
+ }
+ ULONGLONG ticksPerSecond = li.QuadPart;
+
+ // Measure the nanosecond delay per yield
+ ULONGLONG measureDurationTicks = ticksPerSecond / (1000 / MeasureDurationMs);
+ unsigned int yieldCount = 0;
+ QueryPerformanceCounter(&li);
+ ULONGLONG startTicks = li.QuadPart;
+ ULONGLONG elapsedTicks;
+ do
+ {
+ // On some systems, querying the high performance counter has relatively significant overhead. Do enough yields to mask
+ // the timing overhead. Assuming one yield has a delay of MinNsPerNormalizedYield, 1000 yields would have a delay in the
+ // low microsecond range.
+ for (int i = 0; i < 1000; ++i)
+ {
+ YieldProcessor();
+ }
+ yieldCount += 1000;
+
+ QueryPerformanceCounter(&li);
+ ULONGLONG nowTicks = li.QuadPart;
+ elapsedTicks = nowTicks - startTicks;
+ } while (elapsedTicks < measureDurationTicks);
+ double nsPerYield = (double)elapsedTicks * NsPerSecond / ((double)yieldCount * ticksPerSecond);
+ if (nsPerYield < 1)
+ {
+ nsPerYield = 1;
+ }
+
+ // Calculate the number of yields required to span the duration of a normalized yield. Since nsPerYield is at least 1, this
+ // value is naturally limited to MinNsPerNormalizedYield.
+ int yieldsPerNormalizedYield = (int)(MinNsPerNormalizedYield / nsPerYield + 0.5);
+ if (yieldsPerNormalizedYield < 1)
+ {
+ yieldsPerNormalizedYield = 1;
+ }
+ _ASSERTE(yieldsPerNormalizedYield <= MinNsPerNormalizedYield);
+
+ // Calculate the maximum number of yields that would be optimal for a late spin iteration. Typically, we would not want to
+ // spend excessive amounts of time (thousands of cycles) doing only YieldProcessor, as SwitchToThread/Sleep would do a
+ // better job of allowing other work to run.
+ int optimalMaxNormalizedYieldsPerSpinIteration =
+ (int)(NsPerOptimalMaxSpinIterationDuration / (yieldsPerNormalizedYield * nsPerYield) + 0.5);
+ if (optimalMaxNormalizedYieldsPerSpinIteration < 1)
+ {
+ optimalMaxNormalizedYieldsPerSpinIteration = 1;
+ }
+
+ g_yieldsPerNormalizedYield = yieldsPerNormalizedYield;
+ g_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration;
+ s_isYieldProcessorNormalizedInitialized = true;
+}
+
+void EnsureYieldProcessorNormalizedInitialized()
+{
+ WRAPPER_NO_CONTRACT;
+
+ if (!s_isYieldProcessorNormalizedInitialized)
+ {
+ InitializeYieldProcessorNormalized();
+ }
+}
diff --git a/src/vm/yieldprocessornormalized.h b/src/vm/yieldprocessornormalized.h
new file mode 100644
index 0000000000..8fcf10b7ca
--- /dev/null
+++ b/src/vm/yieldprocessornormalized.h
@@ -0,0 +1,103 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#pragma once
+
+const unsigned int MinNsPerNormalizedYield = 37; // measured typically 37-46 on post-Skylake
+const unsigned int NsPerOptimalMaxSpinIterationDuration = 272; // approx. 900 cycles, measured 281 on pre-Skylake, 263 on post-Skylake
+
+extern unsigned int g_yieldsPerNormalizedYield;
+extern unsigned int g_optimalMaxNormalizedYieldsPerSpinIteration;
+
+void InitializeYieldProcessorNormalizedCrst();
+void EnsureYieldProcessorNormalizedInitialized();
+
+class YieldProcessorNormalizationInfo
+{
+private:
+ unsigned int yieldsPerNormalizedYield;
+ unsigned int optimalMaxNormalizedYieldsPerSpinIteration;
+ unsigned int optimalMaxYieldsPerSpinIteration;
+
+public:
+ YieldProcessorNormalizationInfo()
+ : yieldsPerNormalizedYield(g_yieldsPerNormalizedYield),
+ optimalMaxNormalizedYieldsPerSpinIteration(g_optimalMaxNormalizedYieldsPerSpinIteration),
+ optimalMaxYieldsPerSpinIteration(yieldsPerNormalizedYield * optimalMaxNormalizedYieldsPerSpinIteration)
+ {
+ }
+
+ friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &);
+ friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &, unsigned int);
+ friend void YieldProcessorWithBackOffNormalized(const YieldProcessorNormalizationInfo &, unsigned int);
+};
+
+FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ unsigned int n = normalizationInfo.yieldsPerNormalizedYield;
+ _ASSERTE(n != 0);
+ do
+ {
+ YieldProcessor();
+ } while (--n != 0);
+}
+
+FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo, unsigned int count)
+{
+ LIMITED_METHOD_CONTRACT;
+ _ASSERTE(count != 0);
+
+ if (sizeof(SIZE_T) <= sizeof(unsigned int))
+ {
+ // On platforms with a small SIZE_T, prevent overflow on the multiply below. normalizationInfo.yieldsPerNormalizedYield
+ // is limited to MinNsPerNormalizedYield by InitializeYieldProcessorNormalized().
+ const unsigned int MaxCount = (unsigned int)SIZE_T_MAX / MinNsPerNormalizedYield;
+ if (count > MaxCount)
+ {
+ count = MaxCount;
+ }
+ }
+
+ SIZE_T n = (SIZE_T)count * normalizationInfo.yieldsPerNormalizedYield;
+ _ASSERTE(n != 0);
+ do
+ {
+ YieldProcessor();
+ } while (--n != 0);
+}
+
+FORCEINLINE void YieldProcessorWithBackOffNormalized(
+ const YieldProcessorNormalizationInfo &normalizationInfo,
+ unsigned int spinIteration)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ // normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration cannot exceed the value below based on calculations done in
+ // InitializeYieldProcessorNormalized()
+ const unsigned int MaxOptimalMaxNormalizedYieldsPerSpinIteration =
+ NsPerOptimalMaxSpinIterationDuration * 3 / (MinNsPerNormalizedYield * 2) + 1;
+ _ASSERTE(normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration <= MaxOptimalMaxNormalizedYieldsPerSpinIteration);
+
+ // This shift value should be adjusted based on the asserted condition below
+ const UINT8 MaxShift = 3;
+ static_assert_no_msg(((unsigned int)1 << (MaxShift + 1)) >= MaxOptimalMaxNormalizedYieldsPerSpinIteration);
+
+ unsigned int n;
+ if (spinIteration <= MaxShift &&
+ ((unsigned int)1 << spinIteration) < normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration)
+ {
+ n = ((unsigned int)1 << spinIteration) * normalizationInfo.yieldsPerNormalizedYield;
+ }
+ else
+ {
+ n = normalizationInfo.optimalMaxYieldsPerSpinIteration;
+ }
+ _ASSERTE(n != 0);
+ do
+ {
+ YieldProcessor();
+ } while (--n != 0);
+}