From ee454e11d0c73820dde10ab710f9d6eccd4471f1 Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Mon, 29 May 2017 15:46:10 +0200 Subject: Implement GetCurrentProcessCpuCount for Unix and merge NumberOfProcesors implementations. (#11742) * Implement GetCurrentProcessCpuCount for Unix and merge NumberOfProcessors implementations. * Split GetCurrentProcessCpuCount in PAL and non-PAL parts * gcenv.unix.cpp: GetCurrentProcessAffinityMask: revert, GetCurrentProcessCpuCount: add implementation * bugfix: #if instead of #ifdef HAVE_SCHED_GETAFFINITY * Remove PAL_GetCurrentProcessCpuCount * gcenv.unix: implement GetCurrentProcessAffinityMask, GetCurrentProcessCpuCount * numa.cpp: initialize g_cpuToAffinity[i].Number when numa.h is missing * cleanup * mscorwks_unixexports.src: Add GetProcessAffinityMask * PR feedback * Add GetProcessAffinityMask to mscordac_unixexports.src --- src/dlls/mscordac/mscordac_unixexports.src | 1 + src/dlls/mscoree/mscorwks_unixexports.src | 1 + src/gc/unix/config.h.in | 1 + src/gc/unix/configure.cmake | 2 + src/gc/unix/gcenv.unix.cpp | 96 ++++++++++++++++++++++++++++-- src/pal/src/misc/sysinfo.cpp | 24 ++------ src/pal/src/numa/numa.cpp | 18 +++++- src/utilcode/util.cpp | 27 ++------- 8 files changed, 120 insertions(+), 50 deletions(-) diff --git a/src/dlls/mscordac/mscordac_unixexports.src b/src/dlls/mscordac/mscordac_unixexports.src index 9881def613..b467ece16b 100644 --- a/src/dlls/mscordac/mscordac_unixexports.src +++ b/src/dlls/mscordac/mscordac_unixexports.src @@ -106,6 +106,7 @@ GetLastError GetLongPathNameW GetModuleFileNameW GetProcAddress +GetProcessAffinityMask GetProcessHeap GetShortPathNameW GetStdHandle diff --git a/src/dlls/mscoree/mscorwks_unixexports.src b/src/dlls/mscoree/mscorwks_unixexports.src index 28e9ac223f..271ff8ae7c 100644 --- a/src/dlls/mscoree/mscorwks_unixexports.src +++ b/src/dlls/mscoree/mscorwks_unixexports.src @@ -56,6 +56,7 @@ GetFileType GetFullPathNameW GetLongPathNameW GetProcAddress +GetProcessAffinityMask GetStdHandle GetSystemInfo GetTempFileNameW diff --git a/src/gc/unix/config.h.in b/src/gc/unix/config.h.in index 21980a7d08..3a56be9833 100644 --- a/src/gc/unix/config.h.in +++ b/src/gc/unix/config.h.in @@ -12,5 +12,6 @@ #cmakedefine01 HAVE_SCHED_GETCPU #cmakedefine01 HAVE_PTHREAD_CONDATTR_SETCLOCK #cmakedefine01 HAVE_MACH_ABSOLUTE_TIME +#cmakedefine01 HAVE_SCHED_GETAFFINITY #endif // __CONFIG_H__ diff --git a/src/gc/unix/configure.cmake b/src/gc/unix/configure.cmake index 5f2bdbd8b3..74ae70b1a4 100644 --- a/src/gc/unix/configure.cmake +++ b/src/gc/unix/configure.cmake @@ -52,4 +52,6 @@ check_cxx_source_runs(" } " HAVE_MACH_ABSOLUTE_TIME) +check_library_exists(c sched_getaffinity "" HAVE_SCHED_GETAFFINITY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) diff --git a/src/gc/unix/gcenv.unix.cpp b/src/gc/unix/gcenv.unix.cpp index 5fc63f47d3..bca0dfedf2 100644 --- a/src/gc/unix/gcenv.unix.cpp +++ b/src/gc/unix/gcenv.unix.cpp @@ -404,6 +404,18 @@ size_t GCToOSInterface::GetLargestOnDieCacheSize(bool trueSize) return 0; } +/*++ +Function: + GetFullAffinityMask + +Get affinity mask for the specified number of processors with all +the processors enabled. +--*/ +static uintptr_t GetFullAffinityMask(int cpuCount) +{ + return ((uintptr_t)1 << (cpuCount)) - 1; +} + // Get affinity mask of the current process // Parameters: // processMask - affinity mask for the specified process @@ -417,10 +429,62 @@ size_t GCToOSInterface::GetLargestOnDieCacheSize(bool trueSize) // A process affinity mask is a subset of the system affinity mask. A process is only allowed // to run on the processors configured into a system. Therefore, the process affinity mask cannot // specify a 1 bit for a processor when the system affinity mask specifies a 0 bit for that processor. -bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processMask, uintptr_t* systemMask) +bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processAffinityMask, uintptr_t* systemAffinityMask) { - // TODO(segilles) processor detection - return false; + if (g_logicalCpuCount > 64) + { + *processAffinityMask = 0; + *systemAffinityMask = 0; + return true; + } + + uintptr_t systemMask = GetFullAffinityMask(g_logicalCpuCount); + +#if HAVE_SCHED_GETAFFINITY + + int pid = getpid(); + cpu_set_t cpuSet; + int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet); + if (st == 0) + { + uintptr_t processMask = 0; + + for (int i = 0; i < g_logicalCpuCount; i++) + { + if (CPU_ISSET(i, &cpuSet)) + { + processMask |= ((uintptr_t)1) << i; + } + } + + *processAffinityMask = processMask; + *systemAffinityMask = systemMask; + return true; + } + else if (errno == EINVAL) + { + // There are more processors than can fit in a cpu_set_t + // return zero in both masks. + *processAffinityMask = 0; + *systemAffinityMask = 0; + return true; + } + else + { + // We should not get any of the errors that the sched_getaffinity can return since none + // of them applies for the current thread, so this is an unexpected kind of failure. + return false; + } + +#else // HAVE_SCHED_GETAFFINITY + + // There is no API to manage thread affinity, so let's return both affinity masks + // with all the CPUs on the system set. + *systemAffinityMask = systemMask; + *processAffinityMask = systemMask; + return true; + +#endif // HAVE_SCHED_GETAFFINITY } // Get number of processors assigned to the current process @@ -428,7 +492,31 @@ bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processMask, uint // The number of processors uint32_t GCToOSInterface::GetCurrentProcessCpuCount() { - return g_logicalCpuCount; + uintptr_t pmask, smask; + + if (!GetCurrentProcessAffinityMask(&pmask, &smask)) + return 1; + + pmask &= smask; + + int count = 0; + while (pmask) + { + pmask &= (pmask - 1); + count++; + } + + // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more + // than 64 processors, which would leave us with a count of 0. Since the GC + // expects there to be at least one processor to run on (and thus at least one + // heap), we'll return 64 here if count is 0, since there are likely a ton of + // processors available in that case. The GC also cannot (currently) handle + // the case where there are more than 64 processors, so we will return a + // maximum of 64 here. + if (count == 0 || count > 64) + count = 64; + + return count; } // Return the size of the user-mode portion of the virtual address space of this process. diff --git a/src/pal/src/misc/sysinfo.cpp b/src/pal/src/misc/sysinfo.cpp index 70fe3e65d2..e1785688dc 100644 --- a/src/pal/src/misc/sysinfo.cpp +++ b/src/pal/src/misc/sysinfo.cpp @@ -94,13 +94,10 @@ SET_DEFAULT_DEBUG_CHANNEL(MISC); #endif #endif // __APPLE__ -/*++ -Function: - GetNumberOfProcessors -Return number of processors available for the current process ---*/ -int GetNumberOfProcessors() +DWORD +PALAPI +PAL_GetLogicalCpuCountFromOS() { int nrcpus = 0; @@ -170,7 +167,7 @@ GetSystemInfo( lpSystemInfo->dwPageSize = pagesize; lpSystemInfo->dwActiveProcessorMask_PAL_Undefined = 0; - nrcpus = GetNumberOfProcessors(); + nrcpus = PAL_GetLogicalCpuCountFromOS(); TRACE("dwNumberOfProcessors=%d\n", nrcpus); lpSystemInfo->dwNumberOfProcessors = nrcpus; @@ -387,19 +384,6 @@ PAL_HasGetCurrentProcessorNumber() return HAVE_SCHED_GETCPU; } -DWORD -PALAPI -PAL_GetLogicalCpuCountFromOS() -{ - DWORD numLogicalCores = 0; - -#if HAVE_SYSCONF - numLogicalCores = sysconf(_SC_NPROCESSORS_ONLN); -#endif - - return numLogicalCores; -} - size_t PALAPI PAL_GetLogicalProcessorCacheSizeFromOS() diff --git a/src/pal/src/numa/numa.cpp b/src/pal/src/numa/numa.cpp index 549c10a71f..3289ea356e 100644 --- a/src/pal/src/numa/numa.cpp +++ b/src/pal/src/numa/numa.cpp @@ -42,8 +42,6 @@ using namespace CorUnix; typedef cpuset_t cpu_set_t; #endif -int GetNumberOfProcessors(); - // CPU affinity descriptor struct CpuAffinity { @@ -208,11 +206,17 @@ NUMASupportInitialize() #endif // HAVE_NUMA_H { // No NUMA - g_cpuCount = GetNumberOfProcessors(); + g_cpuCount = PAL_GetLogicalCpuCountFromOS(); g_groupCount = 1; g_highestNumaNode = 0; AllocateLookupArrays(); + + for (int i = 0; i < g_cpuCount; i++) + { + g_cpuToAffinity[i].Number = i; + g_cpuToAffinity[i].Group = 0; + } } return TRUE; @@ -599,6 +603,14 @@ GetProcessAffinityMask( *lpProcessAffinityMask = processMask; *lpSystemAffinityMask = systemMask; } + else if (errno == EINVAL) + { + // There are more processors than can fit in a cpu_set_t + // return zero in both masks. + *lpProcessAffinityMask = 0; + *lpSystemAffinityMask = 0; + success = TRUE; + } else { // We should not get any of the errors that the sched_getaffinity can return since none diff --git a/src/utilcode/util.cpp b/src/utilcode/util.cpp index b7fca3ed9b..068126bf60 100644 --- a/src/utilcode/util.cpp +++ b/src/utilcode/util.cpp @@ -1248,9 +1248,6 @@ found: return m_threadUseAllCpuGroups; } -//****************************************************************************** -// Returns the number of processors that a process has been configured to run on -//****************************************************************************** //****************************************************************************** // Returns the number of processors that a process has been configured to run on //****************************************************************************** @@ -1269,27 +1266,20 @@ int GetCurrentProcessCpuCount() if (cCPUs != 0) return cCPUs; -#ifndef FEATURE_PAL - DWORD_PTR pmask, smask; if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask)) return 1; - if (pmask == 1) - return 1; - pmask &= smask; - + int count = 0; while (pmask) { - if (pmask & 1) - count++; - - pmask >>= 1; + pmask &= (pmask - 1); + count++; } - + // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more // than 64 processors, which would leave us with a count of 0. Since the GC // expects there to be at least one processor to run on (and thus at least one @@ -1303,15 +1293,6 @@ int GetCurrentProcessCpuCount() cCPUs = count; return count; - -#else // !FEATURE_PAL - - SYSTEM_INFO sysInfo; - ::GetSystemInfo(&sysInfo); - cCPUs = sysInfo.dwNumberOfProcessors; - return sysInfo.dwNumberOfProcessors; - -#endif // !FEATURE_PAL } DWORD_PTR GetCurrentProcessCpuMask() -- cgit v1.2.3