summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Deseyn <tom.deseyn@gmail.com>2018-01-29 21:11:10 +0100
committerMaoni Stephens <Maoni0@users.noreply.github.com>2018-01-29 12:11:10 -0800
commitcb73944d6d159bd02adc29f0588b97f0f76da1a1 (patch)
tree9c4f406b9d5bf886c64fceb19213c25176e988cd
parent850a5bea5b8c317250f7f7ef85152d92468382b0 (diff)
downloadcoreclr-cb73944d6d159bd02adc29f0588b97f0f76da1a1.tar.gz
coreclr-cb73944d6d159bd02adc29f0588b97f0f76da1a1.tar.bz2
coreclr-cb73944d6d159bd02adc29f0588b97f0f76da1a1.zip
Perform PhysicalMemoryLimit check for workstation GC, refactor GetLargestOnDieCacheSize into GetCacheSizePerLogicalCpu (#15975)
* refactor: combine GetLargestOnDieCacheSize and GetLogicalCpuCount in GetCacheSizePerLogicalCpu * Perform PhysicalMemoryLimit check also for workstation GC
-rw-r--r--src/gc/env/gcenv.os.h7
-rw-r--r--src/gc/gc.cpp24
-rw-r--r--src/gc/unix/gcenv.unix.cpp8
-rw-r--r--src/gc/windows/gcenv.windows.cpp9
-rw-r--r--src/vm/CMakeLists.txt1
-rw-r--r--src/vm/amd64/cgenamd64.cpp83
-rw-r--r--src/vm/arm/stubs.cpp7
-rw-r--r--src/vm/arm64/cgenarm64.cpp38
-rw-r--r--src/vm/cgensys.h6
-rw-r--r--src/vm/gcenv.os.cpp11
-rw-r--r--src/vm/i386/cgenx86.cpp83
-rw-r--r--src/vm/util.cpp32
12 files changed, 50 insertions, 259 deletions
diff --git a/src/gc/env/gcenv.os.h b/src/gc/env/gcenv.os.h
index 1707f0dabe..41e46f8f0f 100644
--- a/src/gc/env/gcenv.os.h
+++ b/src/gc/env/gcenv.os.h
@@ -282,16 +282,13 @@ public:
// Processor topology
//
- // Get number of logical processors
- static uint32_t GetLogicalCpuCount();
-
- // Get size of the largest cache on the processor die
+ // Get size of the on die cache per logical processor
// Parameters:
// trueSize - true to return true cache size, false to return scaled up size based on
// the processor architecture
// Return:
// Size of the cache
- static size_t GetLargestOnDieCacheSize(bool trueSize = true);
+ static size_t GetCacheSizePerLogicalCpu(bool trueSize = true);
// Get number of processors assigned to the current process
// Return:
diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp
index 706c805db5..2b31388d38 100644
--- a/src/gc/gc.cpp
+++ b/src/gc/gc.cpp
@@ -15756,7 +15756,7 @@ void gc_heap::gc1()
size_t min_gc_size = dd_min_gc_size(dd);
// if min GC size larger than true on die cache, then don't bother
// limiting the desired size
- if ((min_gc_size <= GCToOSInterface::GetLargestOnDieCacheSize(TRUE) / GCToOSInterface::GetLogicalCpuCount()) &&
+ if ((min_gc_size <= GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE)) &&
desired_per_heap <= 2*min_gc_size)
{
desired_per_heap = min_gc_size;
@@ -35523,19 +35523,26 @@ size_t GCHeap::GetValidGen0MaxSize(size_t seg_size)
#ifdef SERVER_GC
// performance data seems to indicate halving the size results
// in optimal perf. Ask for adjusted gen0 size.
- gen0size = max(GCToOSInterface::GetLargestOnDieCacheSize(FALSE)/GCToOSInterface::GetLogicalCpuCount(),(256*1024));
+ gen0size = max(GCToOSInterface::GetCacheSizePerLogicalCpu(FALSE),(256*1024));
// if gen0 size is too large given the available memory, reduce it.
// Get true cache size, as we don't want to reduce below this.
- size_t trueSize = max(GCToOSInterface::GetLargestOnDieCacheSize(TRUE)/GCToOSInterface::GetLogicalCpuCount(),(256*1024));
+ size_t trueSize = max(GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE),(256*1024));
dprintf (2, ("cache: %Id-%Id, cpu: %Id",
- GCToOSInterface::GetLargestOnDieCacheSize(FALSE),
- GCToOSInterface::GetLargestOnDieCacheSize(TRUE),
- GCToOSInterface::GetLogicalCpuCount()));
+ GCToOSInterface::GetCacheSizePerLogicalCpu(FALSE),
+ GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE)));
+
+ int n_heaps = gc_heap::n_heaps;
+#else //SERVER_GC
+ size_t trueSize = GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE);
+ gen0size = max((4*trueSize/5),(256*1024));
+ trueSize = max(trueSize, (256*1024));
+ int n_heaps = 1;
+#endif //SERVER_GC
// if the total min GC across heaps will exceed 1/6th of available memory,
// then reduce the min GC size until it either fits or has been reduced to cache size.
- while ((gen0size * gc_heap::n_heaps) > GCToOSInterface::GetPhysicalMemoryLimit() / 6)
+ while ((gen0size * n_heaps) > GCToOSInterface::GetPhysicalMemoryLimit() / 6)
{
gen0size = gen0size / 2;
if (gen0size <= trueSize)
@@ -35544,9 +35551,6 @@ size_t GCHeap::GetValidGen0MaxSize(size_t seg_size)
break;
}
}
-#else //SERVER_GC
- gen0size = max((4*GCToOSInterface::GetLargestOnDieCacheSize(TRUE)/5),(256*1024));
-#endif //SERVER_GC
}
// Generation 0 must never be more than 1/2 the segment size.
diff --git a/src/gc/unix/gcenv.unix.cpp b/src/gc/unix/gcenv.unix.cpp
index 7bc6a37068..737c5efcf0 100644
--- a/src/gc/unix/gcenv.unix.cpp
+++ b/src/gc/unix/gcenv.unix.cpp
@@ -221,12 +221,6 @@ void GCToOSInterface::DebugBreak()
#endif
}
-// Get number of logical processors
-uint32_t GCToOSInterface::GetLogicalCpuCount()
-{
- return g_logicalCpuCount;
-}
-
// Causes the calling thread to sleep for the specified number of milliseconds
// Parameters:
// sleepMSec - time to sleep before switching to another thread
@@ -403,7 +397,7 @@ bool GCToOSInterface::GetWriteWatch(bool resetState, void* address, size_t size,
// the processor architecture
// Return:
// Size of the cache
-size_t GCToOSInterface::GetLargestOnDieCacheSize(bool trueSize)
+size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
{
// TODO(segilles) processor detection
return 0;
diff --git a/src/gc/windows/gcenv.windows.cpp b/src/gc/windows/gcenv.windows.cpp
index 69e5d7273a..e258834abc 100644
--- a/src/gc/windows/gcenv.windows.cpp
+++ b/src/gc/windows/gcenv.windows.cpp
@@ -228,13 +228,6 @@ void GCToOSInterface::DebugBreak()
::DebugBreak();
}
-// Get number of logical processors
-uint32_t GCToOSInterface::GetLogicalCpuCount()
-{
- // TODO(segilles) processor detection
- return 1;
-}
-
// Causes the calling thread to sleep for the specified number of milliseconds
// Parameters:
// sleepMSec - time to sleep before switching to another thread
@@ -381,7 +374,7 @@ bool GCToOSInterface::GetWriteWatch(bool resetState, void* address, size_t size,
// the processor architecture
// Return:
// Size of the cache
-size_t GCToOSInterface::GetLargestOnDieCacheSize(bool trueSize)
+size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
{
// TODO(segilles) processor detection (see src/vm/util.cpp:1935)
return 0;
diff --git a/src/vm/CMakeLists.txt b/src/vm/CMakeLists.txt
index 5a30bce6b8..cfa1b2ac43 100644
--- a/src/vm/CMakeLists.txt
+++ b/src/vm/CMakeLists.txt
@@ -477,7 +477,6 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM)
)
elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
set(VM_SOURCES_DAC_AND_WKS_ARCH
- ${ARCH_SOURCES_DIR}/cgenarm64.cpp
${ARCH_SOURCES_DIR}/stubs.cpp
exceptionhandling.cpp
gcinfodecoder.cpp
diff --git a/src/vm/amd64/cgenamd64.cpp b/src/vm/amd64/cgenamd64.cpp
index 6075134943..56e3bfa738 100644
--- a/src/vm/amd64/cgenamd64.cpp
+++ b/src/vm/amd64/cgenamd64.cpp
@@ -458,89 +458,6 @@ BOOL GetAnyThunkTarget (CONTEXT *pctx, TADDR *pTarget, TADDR *pTargetMethodDesc)
// determine the number of logical cpus, or the machine is not populated uniformly with the same
// type of processors, this function returns 1.
-extern "C" DWORD __stdcall getcpuid(DWORD arg, unsigned char result[16]);
-
-// fix this if/when AMD does multicore or SMT
-DWORD GetLogicalCpuCount()
-{
- // No CONTRACT possible because GetLogicalCpuCount uses SEH
-
- STATIC_CONTRACT_THROWS;
- STATIC_CONTRACT_GC_NOTRIGGER;
-
- static DWORD val = 0;
-
- // cache value for later re-use
- if (val)
- {
- return val;
- }
-
- struct Param : DefaultCatchFilterParam
- {
- DWORD retVal;
- } param;
- param.pv = COMPLUS_EXCEPTION_EXECUTE_HANDLER;
- param.retVal = 1;
-
- PAL_TRY(Param *, pParam, &param)
- {
-
- unsigned char buffer[16];
- DWORD maxCpuId = getcpuid(0, buffer);
- DWORD* dwBuffer = (DWORD*)buffer;
-
- if (maxCpuId < 1)
- goto qExit;
-
- if (dwBuffer[1] == 'uneG') {
- if (dwBuffer[3] == 'Ieni') {
- if (dwBuffer[2] == 'letn') { // get SMT/multicore enumeration for Intel EM64T
-
-
- // TODO: Currently GetLogicalCpuCountFromOS() and GetLogicalCpuCountFallback() are broken on
- // multi-core processor, but we never call into those two functions since we don't halve the
- // gen0size when it's prescott and above processor. We keep the old version here for earlier
- // generation system(Northwood based), perf data suggests on those systems, halve gen0 size
- // still boost the performance(ex:Biztalk boosts about 17%). So on earlier systems(Northwood)
- // based, we still go ahead and halve gen0 size. The logic in GetLogicalCpuCountFromOS()
- // and GetLogicalCpuCountFallback() works fine for those earlier generation systems.
- // If it's a Prescott and above processor or Multi-core, perf data suggests not to halve gen0
- // size at all gives us overall better performance.
- // This is going to be fixed with a new version in orcas time frame.
-
- if( (maxCpuId > 3) && (maxCpuId < 0x80000000) )
- goto qExit;
-
- val = GetLogicalCpuCountFromOS(); //try to obtain HT enumeration from OS API
- if (val )
- {
- pParam->retVal = val; // OS API HT enumeration successful, we are Done
- goto qExit;
- }
-
- val = GetLogicalCpuCountFallback(); // Fallback to HT enumeration using CPUID
- if( val )
- pParam->retVal = val;
- }
- }
- }
-qExit: ;
- }
-
- PAL_EXCEPT_FILTER(DefaultCatchFilter)
- {
- }
- PAL_ENDTRY
-
- if (val == 0)
- {
- val = param.retVal;
- }
-
- return param.retVal;
-}
-
void EncodeLoadAndJumpThunk (LPBYTE pBuffer, LPVOID pv, LPVOID pTarget)
{
CONTRACTL
diff --git a/src/vm/arm/stubs.cpp b/src/vm/arm/stubs.cpp
index 38009167e3..9668d7b4f0 100644
--- a/src/vm/arm/stubs.cpp
+++ b/src/vm/arm/stubs.cpp
@@ -3369,13 +3369,6 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target)
#ifndef CROSSGEN_COMPILE
-DWORD GetLogicalCpuCount()
-{
- // Just use the OS to return this information (the APIs used exist on all versions of Windows which
- // support ARM).
- return GetLogicalCpuCountFromOS();
-}
-
#ifdef FEATURE_READYTORUN
//
diff --git a/src/vm/arm64/cgenarm64.cpp b/src/vm/arm64/cgenarm64.cpp
deleted file mode 100644
index 59905bf098..0000000000
--- a/src/vm/arm64/cgenarm64.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-//
-// Various helper routines for generating AMD64 assembly code.
-//
-
-// Precompiled Header
-
-#include "common.h"
-
-#include "stublink.h"
-#include "cgensys.h"
-#include "siginfo.hpp"
-#include "excep.h"
-#include "ecall.h"
-#include "dllimport.h"
-#include "dllimportcallback.h"
-#include "dbginterface.h"
-#include "fcall.h"
-#include "array.h"
-#include "virtualcallstub.h"
-
-#ifndef DACCESS_COMPILE
-
-// Note: This is only used on server GC on Windows.
-
-DWORD GetLogicalCpuCount()
-{
- LIMITED_METHOD_CONTRACT;
-
- // The contact with any callers of this function is that if we're unable to determine
- // the processor count, or the number of processors is not distributed evenly, then
- // we should return 1.
- return 1;
-}
-
-#endif // DACCESS_COMPILE
diff --git a/src/vm/cgensys.h b/src/vm/cgensys.h
index d55d15dd7d..b5158b722c 100644
--- a/src/vm/cgensys.h
+++ b/src/vm/cgensys.h
@@ -34,10 +34,6 @@ int CallJitEHFilter (CrawlFrame* pCf, BYTE* startPC, EE_ILEXCEPTION_CLAUSE *EHC
void CallJitEHFinally(CrawlFrame* pCf, BYTE* startPC, EE_ILEXCEPTION_CLAUSE *EHClausePtr, DWORD nestingLevel);
#endif // _TARGET_X86_
-
-// get number of logical to physical processors. Returns 1 on failure or non-intel x86 processors.
-DWORD GetLogicalCpuCount();
-
//These are in util.cpp
extern size_t GetLogicalProcessorCacheSizeFromOS();
extern size_t GetIntelDeterministicCacheEnum();
@@ -47,7 +43,7 @@ extern DWORD GetLogicalCpuCountFallback();
// Try to determine the largest last-level cache size of the machine - return 0 if unknown or no L2/L3 cache
-size_t GetLargestOnDieCacheSize(BOOL bTrueSize = TRUE);
+size_t GetCacheSizePerLogicalCpu(BOOL bTrueSize = TRUE);
#ifdef FEATURE_COMINTEROP
diff --git a/src/vm/gcenv.os.cpp b/src/vm/gcenv.os.cpp
index 78670b0af3..8d8630ec62 100644
--- a/src/vm/gcenv.os.cpp
+++ b/src/vm/gcenv.os.cpp
@@ -145,13 +145,6 @@ void GCToOSInterface::DebugBreak()
::DebugBreak();
}
-// Get number of logical processors
-uint32_t GCToOSInterface::GetLogicalCpuCount()
-{
- LIMITED_METHOD_CONTRACT;
- return ::GetLogicalCpuCount();
-}
-
// Causes the calling thread to sleep for the specified number of milliseconds
// Parameters:
// sleepMSec - time to sleep before switching to another thread
@@ -322,11 +315,11 @@ bool GCToOSInterface::GetWriteWatch(bool resetState, void* address, size_t size,
// the processor architecture
// Return:
// Size of the cache
-size_t GCToOSInterface::GetLargestOnDieCacheSize(bool trueSize)
+size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
{
LIMITED_METHOD_CONTRACT;
- return ::GetLargestOnDieCacheSize(trueSize);
+ return ::GetCacheSizePerLogicalCpu(trueSize);
}
// Sets the calling thread's affinity to only run on the processor specified
diff --git a/src/vm/i386/cgenx86.cpp b/src/vm/i386/cgenx86.cpp
index a43bc8558a..492cec4668 100644
--- a/src/vm/i386/cgenx86.cpp
+++ b/src/vm/i386/cgenx86.cpp
@@ -1513,89 +1513,6 @@ extern "C" DWORD __stdcall xmmYmmStateSupport()
#endif // !FEATURE_PAL
-// This function returns the number of logical processors on a given physical chip. If it cannot
-// determine the number of logical cpus, or the machine is not populated uniformly with the same
-// type of processors, this function returns 1.
-DWORD GetLogicalCpuCount()
-{
- // No CONTRACT possible because GetLogicalCpuCount uses SEH
-
- STATIC_CONTRACT_THROWS;
- STATIC_CONTRACT_GC_NOTRIGGER;
-
- static DWORD val = 0;
-
- // cache value for later re-use
- if (val)
- {
- return val;
- }
-
- struct Param : DefaultCatchFilterParam
- {
- DWORD retVal;
- } param;
- param.pv = COMPLUS_EXCEPTION_EXECUTE_HANDLER;
- param.retVal = 1;
-
- PAL_TRY(Param *, pParam, &param)
- {
- unsigned char buffer[16];
- DWORD* dwBuffer = NULL;
-
- DWORD maxCpuId = getcpuid(0, buffer);
-
- if (maxCpuId < 1)
- goto lDone;
-
- dwBuffer = (DWORD*)buffer;
-
- if (dwBuffer[1] == 'uneG') {
- if (dwBuffer[3] == 'Ieni') {
- if (dwBuffer[2] == 'letn') { // get SMT/multicore enumeration for Intel EM64T
-
- // TODO: Currently GetLogicalCpuCountFromOS() and GetLogicalCpuCountFallback() are broken on
- // multi-core processor, but we never call into those two functions since we don't halve the
- // gen0size when it's prescott and above processor. We keep the old version here for earlier
- // generation system(Northwood based), perf data suggests on those systems, halve gen0 size
- // still boost the performance(ex:Biztalk boosts about 17%). So on earlier systems(Northwood)
- // based, we still go ahead and halve gen0 size. The logic in GetLogicalCpuCountFromOS()
- // and GetLogicalCpuCountFallback() works fine for those earlier generation systems.
- // If it's a Prescott and above processor or Multi-core, perf data suggests not to halve gen0
- // size at all gives us overall better performance.
- // This is going to be fixed with a new version in orcas time frame.
-
- if( (maxCpuId > 3) && (maxCpuId < 0x80000000) )
- goto lDone;
-
- val = GetLogicalCpuCountFromOS(); //try to obtain HT enumeration from OS API
- if (val )
- {
- pParam->retVal = val; // OS API HT enumeration successful, we are Done
- goto lDone;
- }
-
- val = GetLogicalCpuCountFallback(); // OS API failed, Fallback to HT enumeration using CPUID
- if( val )
- pParam->retVal = val;
- }
- }
- }
-lDone: ;
- }
- PAL_EXCEPT_FILTER(DefaultCatchFilter)
- {
- }
- PAL_ENDTRY
-
- if (val == 0)
- {
- val = param.retVal;
- }
-
- return param.retVal;
-}
-
void UMEntryThunkCode::Encode(BYTE* pTargetCode, void* pvSecretParam)
{
LIMITED_METHOD_CONTRACT;
diff --git a/src/vm/util.cpp b/src/vm/util.cpp
index 692b72fc39..b9448dadbe 100644
--- a/src/vm/util.cpp
+++ b/src/vm/util.cpp
@@ -1854,9 +1854,10 @@ fDone:
#endif // _TARGET_X86_ || _TARGET_AMD64_
-size_t GetLargestOnDieCacheSize(BOOL bTrueSize)
+// fix this if/when AMD does multicore or SMT
+size_t GetCacheSizePerLogicalCpu(BOOL bTrueSize)
{
- // No CONTRACT possible because GetLargestOnDieCacheSize uses SEH
+ // No CONTRACT possible because GetCacheSizePerLogicalCpu uses SEH
STATIC_CONTRACT_NOTHROW;
STATIC_CONTRACT_GC_NOTRIGGER;
@@ -1911,6 +1912,31 @@ size_t GetLargestOnDieCacheSize(BOOL bTrueSize)
}
}
+ // TODO: Currently GetLogicalCpuCountFromOS() and GetLogicalCpuCountFallback() are broken on
+ // multi-core processor, but we never call into those two functions since we don't halve the
+ // gen0size when it's prescott and above processor. We keep the old version here for earlier
+ // generation system(Northwood based), perf data suggests on those systems, halve gen0 size
+ // still boost the performance(ex:Biztalk boosts about 17%). So on earlier systems(Northwood)
+ // based, we still go ahead and halve gen0 size. The logic in GetLogicalCpuCountFromOS()
+ // and GetLogicalCpuCountFallback() works fine for those earlier generation systems.
+ // If it's a Prescott and above processor or Multi-core, perf data suggests not to halve gen0
+ // size at all gives us overall better performance.
+ // This is going to be fixed with a new version in orcas time frame.
+ if (maxCpuId >= 2 && !((maxCpuId > 3) && (maxCpuId < 0x80000000)))
+ {
+ DWORD logicalProcessorCount = GetLogicalCpuCountFromOS(); //try to obtain HT enumeration from OS API
+
+ if (!logicalProcessorCount)
+ {
+ logicalProcessorCount = GetLogicalCpuCountFallback(); // OS API failed, Fallback to HT enumeration using CPUID
+ }
+
+ if (logicalProcessorCount)
+ {
+ tempSize = tempSize / logicalProcessorCount;
+ }
+ }
+
// update maxSize once with final value
maxTrueSize = tempSize;
@@ -2009,7 +2035,7 @@ size_t GetLargestOnDieCacheSize(BOOL bTrueSize)
maxSize = maxTrueSize * 3;
#endif
- // printf("GetLargestOnDieCacheSize returns %d, adjusted size %d\n", maxSize, maxTrueSize);
+ // printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize);
if (bTrueSize)
return maxTrueSize;
else