// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. #pragma once // Undefine YieldProcessor to encourage using the normalized versions below instead. System_YieldProcessor() can be used where // the intention is to use the system-default implementation of YieldProcessor(). #define HAS_SYSTEM_YIELDPROCESSOR FORCEINLINE void System_YieldProcessor() { YieldProcessor(); } #ifdef YieldProcessor #undef YieldProcessor #endif #define YieldProcessor Dont_Use_YieldProcessor const unsigned int MinNsPerNormalizedYield = 37; // measured typically 37-46 on post-Skylake const unsigned int NsPerOptimalMaxSpinIterationDuration = 272; // approx. 900 cycles, measured 281 on pre-Skylake, 263 on post-Skylake extern unsigned int g_yieldsPerNormalizedYield; extern unsigned int g_optimalMaxNormalizedYieldsPerSpinIteration; void InitializeYieldProcessorNormalizedCrst(); void EnsureYieldProcessorNormalizedInitialized(); class YieldProcessorNormalizationInfo { private: unsigned int yieldsPerNormalizedYield; unsigned int optimalMaxNormalizedYieldsPerSpinIteration; unsigned int optimalMaxYieldsPerSpinIteration; public: YieldProcessorNormalizationInfo() : yieldsPerNormalizedYield(g_yieldsPerNormalizedYield), optimalMaxNormalizedYieldsPerSpinIteration(g_optimalMaxNormalizedYieldsPerSpinIteration), optimalMaxYieldsPerSpinIteration(yieldsPerNormalizedYield * optimalMaxNormalizedYieldsPerSpinIteration) { } friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &); friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &, unsigned int); friend void YieldProcessorNormalizedForPreSkylakeCount(const YieldProcessorNormalizationInfo &, unsigned int); friend void YieldProcessorWithBackOffNormalized(const YieldProcessorNormalizationInfo &, unsigned int); }; // See YieldProcessorNormalized() for preliminary info. Typical usage: // if (!condition) // { // YieldProcessorNormalizationInfo normalizationInfo; // do // { // YieldProcessorNormalized(normalizationInfo); // } while (!condition); // } FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo) { unsigned int n = normalizationInfo.yieldsPerNormalizedYield; _ASSERTE(n != 0); do { System_YieldProcessor(); } while (--n != 0); } // Delays execution of the current thread for a short duration. Unlike YieldProcessor(), an effort is made to normalize the // delay across processors. The actual delay may be meaningful in several ways, including but not limited to the following: // - The delay should be long enough that a tiny spin-wait like the following has a decent likelihood of observing a new value // for the condition (when changed by a different thread) on each iteration, otherwise it may unnecessary increase CPU usage // and decrease scalability of the operation. // while(!condition) // { // YieldProcessorNormalized(); // } // - The delay should be short enough that a tiny spin-wait like above would not miss multiple cross-thread changes to the // condition, otherwise it may unnecessarily increase latency of the operation // - In reasonably short spin-waits, the actual delay may not matter much. In unreasonably long spin-waits that progress in // yield count per iteration for each failed check of the condition, the progression can significantly magnify the second // issue above on later iterations. // - This function and variants are intended to provide a decent balance between the above issues, as ideal solutions to each // issue have trade-offs between them. If latency of the operation is far more important in the scenario, consider using // System_YieldProcessor() instead, which would issue a delay that is typically <= the delay issued by this method. FORCEINLINE void YieldProcessorNormalized() { YieldProcessorNormalized(YieldProcessorNormalizationInfo()); } // See YieldProcessorNormalized(count) for preliminary info. Typical usage: // if (!moreExpensiveCondition) // { // YieldProcessorNormalizationInfo normalizationInfo; // do // { // YieldProcessorNormalized(normalizationInfo, 2); // } while (!moreExpensiveCondition); // } FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo, unsigned int count) { _ASSERTE(count != 0); if (sizeof(SIZE_T) <= sizeof(unsigned int)) { // On platforms with a small SIZE_T, prevent overflow on the multiply below. normalizationInfo.yieldsPerNormalizedYield // is limited to MinNsPerNormalizedYield by InitializeYieldProcessorNormalized(). const unsigned int MaxCount = (unsigned int)SIZE_MAX / MinNsPerNormalizedYield; if (count > MaxCount) { count = MaxCount; } } SIZE_T n = (SIZE_T)count * normalizationInfo.yieldsPerNormalizedYield; _ASSERTE(n != 0); do { System_YieldProcessor(); } while (--n != 0); } // See YieldProcessorNormalized() for preliminary info. This function repeats the delay 'count' times. This overload is // preferred over the single-count overload when multiple yields are desired per spin-wait iteration. Typical usage: // while(!moreExpensiveCondition) // { // YieldProcessorNormalized(2); // } FORCEINLINE void YieldProcessorNormalized(unsigned int count) { YieldProcessorNormalized(YieldProcessorNormalizationInfo(), count); } // Please DO NOT use this function in new code! See YieldProcessorNormalizedForPreSkylakeCount(preSkylakeCount) for preliminary // info. Typical usage: // if (!condition) // { // YieldProcessorNormalizationInfo normalizationInfo; // do // { // YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, 100); // } while (!condition); // } FORCEINLINE void YieldProcessorNormalizedForPreSkylakeCount( const YieldProcessorNormalizationInfo &normalizationInfo, unsigned int preSkylakeCount) { _ASSERTE(preSkylakeCount != 0); if (sizeof(SIZE_T) <= sizeof(unsigned int)) { // On platforms with a small SIZE_T, prevent overflow on the multiply below. normalizationInfo.yieldsPerNormalizedYield // is limited to MinNsPerNormalizedYield by InitializeYieldProcessorNormalized(). const unsigned int MaxCount = (unsigned int)SIZE_MAX / MinNsPerNormalizedYield; if (preSkylakeCount > MaxCount) { preSkylakeCount = MaxCount; } } const unsigned int PreSkylakeCountToSkylakeCountDivisor = 8; SIZE_T n = (SIZE_T)preSkylakeCount * normalizationInfo.yieldsPerNormalizedYield / PreSkylakeCountToSkylakeCountDivisor; if (n == 0) { n = 1; } do { System_YieldProcessor(); } while (--n != 0); } // Please DO NOT use this function in new code! This function is to be used for old spin-wait loops that have not been retuned // for recent processors, and especially where the yield count may be unreasonably high. The function scales the yield count in // an attempt to normalize the total delay across processors, to approximately the total delay that would be issued on a // pre-Skylake processor. New code should be tuned with YieldProcessorNormalized() or variants instead. Typical usage: // while(!condition) // { // YieldProcessorNormalizedForPreSkylakeCount(100); // } FORCEINLINE void YieldProcessorNormalizedForPreSkylakeCount(unsigned int preSkylakeCount) { YieldProcessorNormalizedForPreSkylakeCount(YieldProcessorNormalizationInfo(), preSkylakeCount); } // See YieldProcessorNormalized() for preliminary info. This function is to be used when there is a decent possibility that the // condition would not be satisfied within a short duration. The current implementation increases the delay per spin-wait // iteration exponentially up to a limit. Typical usage: // if (!conditionThatMayNotBeSatisfiedSoon) // { // YieldProcessorNormalizationInfo normalizationInfo; // do // { // YieldProcessorWithBackOffNormalized(normalizationInfo); // maybe Sleep(0) occasionally // } while (!conditionThatMayNotBeSatisfiedSoon); // } FORCEINLINE void YieldProcessorWithBackOffNormalized( const YieldProcessorNormalizationInfo &normalizationInfo, unsigned int spinIteration) { // normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration cannot exceed the value below based on calculations done in // InitializeYieldProcessorNormalized() const unsigned int MaxOptimalMaxNormalizedYieldsPerSpinIteration = NsPerOptimalMaxSpinIterationDuration * 3 / (MinNsPerNormalizedYield * 2) + 1; _ASSERTE(normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration <= MaxOptimalMaxNormalizedYieldsPerSpinIteration); // This shift value should be adjusted based on the asserted condition below const UINT8 MaxShift = 3; static_assert_no_msg(((unsigned int)1 << (MaxShift + 1)) >= MaxOptimalMaxNormalizedYieldsPerSpinIteration); unsigned int n; if (spinIteration <= MaxShift && ((unsigned int)1 << spinIteration) < normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration) { n = ((unsigned int)1 << spinIteration) * normalizationInfo.yieldsPerNormalizedYield; } else { n = normalizationInfo.optimalMaxYieldsPerSpinIteration; } _ASSERTE(n != 0); do { System_YieldProcessor(); } while (--n != 0); }