diff options
-rw-r--r-- | src/dlls/mscordac/mscordac_unixexports.src | 1 | ||||
-rw-r--r-- | src/gc/gc.cpp | 4 | ||||
-rw-r--r-- | src/inc/utilcode.h | 4 | ||||
-rw-r--r-- | src/pal/inc/pal.h | 155 | ||||
-rw-r--r-- | src/pal/src/CMakeLists.txt | 7 | ||||
-rw-r--r-- | src/pal/src/config.h.in | 5 | ||||
-rw-r--r-- | src/pal/src/configure.cmake | 11 | ||||
-rw-r--r-- | src/pal/src/include/pal/dbgmsg.h | 2 | ||||
-rw-r--r-- | src/pal/src/include/pal/numa.h | 39 | ||||
-rw-r--r-- | src/pal/src/init/pal.cpp | 8 | ||||
-rw-r--r-- | src/pal/src/map/virtual.cpp | 1 | ||||
-rw-r--r-- | src/pal/src/misc/sysinfo.cpp | 55 | ||||
-rw-r--r-- | src/pal/src/numa/numa.cpp | 692 | ||||
-rw-r--r-- | src/utilcode/util.cpp | 34 | ||||
-rw-r--r-- | src/vm/gcenv.os.cpp | 4 |
15 files changed, 977 insertions, 45 deletions
diff --git a/src/dlls/mscordac/mscordac_unixexports.src b/src/dlls/mscordac/mscordac_unixexports.src index c2c96fab10..b0c3b0463c 100644 --- a/src/dlls/mscordac/mscordac_unixexports.src +++ b/src/dlls/mscordac/mscordac_unixexports.src @@ -21,6 +21,7 @@ PAL_GetPALDirectoryW PAL_GetResourceString PAL_get_stdout PAL_get_stderr +PAL_GetCurrentThread PAL_GetSymbolModuleBase PAL_GetTransportPipeName PAL_InitializeDLL diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp index 6d53c897fb..46820d1274 100644 --- a/src/gc/gc.cpp +++ b/src/gc/gc.cpp @@ -5151,7 +5151,6 @@ void gc_heap::destroy_thread_support () } } -#if !defined(FEATURE_PAL) void set_thread_group_affinity_for_heap(int heap_number, GCThreadAffinity* affinity) { affinity->Group = GCThreadAffinity::None; @@ -5231,7 +5230,6 @@ void set_thread_affinity_mask_for_heap(int heap_number, GCThreadAffinity* affini } } } -#endif // !FEATURE_PAL bool gc_heap::create_gc_thread () { @@ -5241,7 +5239,6 @@ bool gc_heap::create_gc_thread () affinity.Group = GCThreadAffinity::None; affinity.Processor = GCThreadAffinity::None; -#if !defined(FEATURE_PAL) if (!gc_thread_no_affinitize_p) { // We are about to set affinity for GC threads. It is a good place to set up NUMA and @@ -5252,7 +5249,6 @@ bool gc_heap::create_gc_thread () else set_thread_affinity_mask_for_heap(heap_number, &affinity); } -#endif // !FEATURE_PAL return GCToOSInterface::CreateThread(gc_thread_stub, this, &affinity); } diff --git a/src/inc/utilcode.h b/src/inc/utilcode.h index 5394f1beb2..06133cfc08 100644 --- a/src/inc/utilcode.h +++ b/src/inc/utilcode.h @@ -1380,7 +1380,7 @@ public: static BOOL CanEnableGCNumaAware(); static void InitNumaNodeInfo(); -#if !defined(FEATURE_REDHAWK)&& !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) private: // apis types //GetNumaHighestNodeNumber() @@ -1448,7 +1448,7 @@ public: static DWORD CalculateCurrentProcessorNumber(); //static void PopulateCPUUsageArray(void * infoBuffer, ULONG infoSize); -#if !defined(FEATURE_REDHAWK) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) private: //GetLogicalProcessorInforomationEx() typedef BOOL diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h index 5d2739e7b8..9e97edc8f6 100644 --- a/src/pal/inc/pal.h +++ b/src/pal/inc/pal.h @@ -4826,6 +4826,161 @@ RegisterEventSourceW ( #endif // !UNICODE // +// NUMA related APIs +// + +typedef enum _PROCESSOR_CACHE_TYPE { + CacheUnified, + CacheInstruction, + CacheData, + CacheTrace +} PROCESSOR_CACHE_TYPE; + +typedef struct _PROCESSOR_NUMBER { + WORD Group; + BYTE Number; + BYTE Reserved; +} PROCESSOR_NUMBER, *PPROCESSOR_NUMBER; + +typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP { + RelationProcessorCore, + RelationNumaNode, + RelationCache, + RelationProcessorPackage, + RelationGroup, + RelationAll = 0xffff +} LOGICAL_PROCESSOR_RELATIONSHIP; + +typedef ULONG_PTR KAFFINITY; + +#define ANYSIZE_ARRAY 1 + +typedef struct _GROUP_AFFINITY { + KAFFINITY Mask; + WORD Group; + WORD Reserved[3]; +} GROUP_AFFINITY, *PGROUP_AFFINITY; + +typedef struct _PROCESSOR_GROUP_INFO { + BYTE MaximumProcessorCount; + BYTE ActiveProcessorCount; + BYTE Reserved[38]; + KAFFINITY ActiveProcessorMask; +} PROCESSOR_GROUP_INFO, *PPROCESSOR_GROUP_INFO; + +typedef struct _PROCESSOR_RELATIONSHIP { + BYTE Flags; + BYTE EfficiencyClass; + BYTE Reserved[21]; + WORD GroupCount; + GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY]; +} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP; + +typedef struct _GROUP_RELATIONSHIP { + WORD MaximumGroupCount; + WORD ActiveGroupCount; + BYTE Reserved[20]; + PROCESSOR_GROUP_INFO GroupInfo[ANYSIZE_ARRAY]; +} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP; + +typedef struct _NUMA_NODE_RELATIONSHIP { + DWORD NodeNumber; + BYTE Reserved[20]; + GROUP_AFFINITY GroupMask; +} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP; + +typedef struct _CACHE_RELATIONSHIP { + BYTE Level; + BYTE Associativity; + WORD LineSize; + DWORD CacheSize; + PROCESSOR_CACHE_TYPE Type; + BYTE Reserved[20]; + GROUP_AFFINITY GroupMask; +} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP; + +typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX { + LOGICAL_PROCESSOR_RELATIONSHIP Relationship; + DWORD Size; + union { + PROCESSOR_RELATIONSHIP Processor; + NUMA_NODE_RELATIONSHIP NumaNode; + CACHE_RELATIONSHIP Cache; + GROUP_RELATIONSHIP Group; + }; +} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX; + + +PALIMPORT +BOOL +PALAPI +GetNumaHighestNodeNumber( + OUT PULONG HighestNodeNumber +); + +PALIMPORT +BOOL +PALAPI +GetNumaProcessorNodeEx( + IN PPROCESSOR_NUMBER Processor, + OUT PUSHORT NodeNumber +); + +PALIMPORT +LPVOID +PALAPI +VirtualAllocExNuma( + IN HANDLE hProcess, + IN OPTIONAL LPVOID lpAddress, + IN SIZE_T dwSize, + IN DWORD flAllocationType, + IN DWORD flProtect, + IN DWORD nndPreferred +); + +PALIMPORT +BOOL +PALAPI +GetLogicalProcessorInformationEx( + IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType, + OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, + IN OUT PDWORD ReturnedLength +); + +PALIMPORT +BOOL +PALAPI +SetThreadGroupAffinity( + IN HANDLE hThread, + IN const GROUP_AFFINITY *GroupAffinity, + OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity +); + +PALIMPORT +BOOL +PALAPI +GetThreadGroupAffinity( + IN HANDLE hThread, + OUT PGROUP_AFFINITY GroupAffinity +); + +PALIMPORT +VOID +PALAPI +GetCurrentProcessorNumberEx( + OUT PPROCESSOR_NUMBER ProcNumber +); + +PALIMPORT +BOOL +PALAPI +GetProcessAffinityMask( + IN HANDLE hProcess, + OUT PDWORD_PTR lpProcessAffinityMask, + OUT PDWORD_PTR lpSystemAffinityMask +); + +// // The types of events that can be logged. // #define EVENTLOG_SUCCESS 0x0000 diff --git a/src/pal/src/CMakeLists.txt b/src/pal/src/CMakeLists.txt index 5314cdf86b..145c2c9ed9 100644 --- a/src/pal/src/CMakeLists.txt +++ b/src/pal/src/CMakeLists.txt @@ -181,6 +181,7 @@ set(SOURCES misc/sysinfo.cpp misc/time.cpp misc/utils.cpp + numa/numa.cpp objmgr/palobjbase.cpp objmgr/shmobject.cpp objmgr/shmobjectmanager.cpp @@ -372,6 +373,12 @@ if(CMAKE_SYSTEM_NAME STREQUAL NetBSD) ) endif(CMAKE_SYSTEM_NAME STREQUAL NetBSD) +if(HAVE_NUMA_H) + target_link_libraries(coreclrpal + numa + ) +endif(HAVE_NUMA_H) + add_subdirectory(examples) if(FEATURE_EVENT_TRACE) diff --git a/src/pal/src/config.h.in b/src/pal/src/config.h.in index c2939f3011..e3024ac4d5 100644 --- a/src/pal/src/config.h.in +++ b/src/pal/src/config.h.in @@ -21,6 +21,8 @@ #cmakedefine01 HAVE_SYS_SYSCTL_H #cmakedefine01 HAVE_GNU_LIBNAMES_H #cmakedefine01 HAVE_PRCTL_H +#cmakedefine01 HAVE_NUMA_H +#cmakedefine01 HAVE_PTHREAD_NP_H #cmakedefine01 HAVE_KQUEUE #cmakedefine01 HAVE_GETPWUID_R @@ -33,6 +35,8 @@ #cmakedefine01 HAVE_PTHREAD_GETATTR_NP #cmakedefine01 HAVE_PTHREAD_GETCPUCLOCKID #cmakedefine01 HAVE_PTHREAD_SIGQUEUE +#cmakedefine01 HAVE_PTHREAD_GETAFFINITY_NP +#cmakedefine01 HAVE_CPUSET_T #cmakedefine01 HAVE_SIGRETURN #cmakedefine01 HAVE__THREAD_SYS_SIGRETURN #cmakedefine01 HAVE_COPYSIGN @@ -57,6 +61,7 @@ #cmakedefine01 HAS_SYSV_SEMAPHORES #cmakedefine01 HAS_PTHREAD_MUTEXES #cmakedefine01 HAVE_TTRACE +#cmakedefine01 HAVE_SCHED_GETAFFINITY #cmakedefine HAVE_UNW_GET_SAVE_LOC #cmakedefine HAVE_UNW_GET_ACCESSORS #cmakedefine01 HAVE_XSWDEV diff --git a/src/pal/src/configure.cmake b/src/pal/src/configure.cmake index 03c7343056..f9a23e8e60 100644 --- a/src/pal/src/configure.cmake +++ b/src/pal/src/configure.cmake @@ -35,6 +35,8 @@ check_include_files(libunwind.h HAVE_LIBUNWIND_H) check_include_files(runetype.h HAVE_RUNETYPE_H) check_include_files(semaphore.h HAVE_SEMAPHORE_H) check_include_files(sys/prctl.h HAVE_PRCTL_H) +check_include_files(numa.h HAVE_NUMA_H) +check_include_files(pthread_np.h HAVE_PTHREAD_NP_H) if(NOT CMAKE_SYSTEM_NAME STREQUAL FreeBSD AND NOT CMAKE_SYSTEM_NAME STREQUAL NetBSD) set(CMAKE_REQUIRED_FLAGS "-ldl") @@ -69,6 +71,7 @@ check_library_exists(${PTHREAD_LIBRARY} pthread_attr_get_np "" HAVE_PTHREAD_ATTR check_library_exists(${PTHREAD_LIBRARY} pthread_getattr_np "" HAVE_PTHREAD_GETATTR_NP) check_library_exists(${PTHREAD_LIBRARY} pthread_getcpuclockid "" HAVE_PTHREAD_GETCPUCLOCKID) check_library_exists(${PTHREAD_LIBRARY} pthread_sigqueue "" HAVE_PTHREAD_SIGQUEUE) +check_library_exists(${PTHREAD_LIBRARY} pthread_getaffinity_np "" HAVE_PTHREAD_GETAFFINITY_NP) check_function_exists(sigreturn HAVE_SIGRETURN) check_function_exists(_thread_sys_sigreturn HAVE__THREAD_SYS_SIGRETURN) @@ -119,6 +122,14 @@ int main(int argc, char **argv) { }" HAVE_UNW_GET_ACCESSORS) set(CMAKE_REQUIRED_LIBRARIES) +check_cxx_source_compiles(" +#include <pthread_np.h> +int main(int argc, char **argv) { + cpuset_t cpuSet; + + return 0; +}" HAVE_CPUSET_T) + check_struct_has_member ("struct stat" st_atimespec "sys/types.h;sys/stat.h" HAVE_STAT_TIMESPEC) check_struct_has_member ("struct stat" st_atimensec "sys/types.h;sys/stat.h" HAVE_STAT_NSEC) check_struct_has_member ("struct tm" tm_gmtoff time.h HAVE_TM_GMTOFF) diff --git a/src/pal/src/include/pal/dbgmsg.h b/src/pal/src/include/pal/dbgmsg.h index 7a49fc0ad6..052c6fa775 100644 --- a/src/pal/src/include/pal/dbgmsg.h +++ b/src/pal/src/include/pal/dbgmsg.h @@ -194,7 +194,7 @@ typedef enum #ifdef FEATURE_PAL_SXS DCI_SXS, #endif // FEATURE_PAL_SXS - + DCI_NUMA, DCI_LAST } DBG_CHANNEL_ID; diff --git a/src/pal/src/include/pal/numa.h b/src/pal/src/include/pal/numa.h new file mode 100644 index 0000000000..4fb2308a7d --- /dev/null +++ b/src/pal/src/include/pal/numa.h @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*++ + + + +Module Name: + + include/pal/numa.h + +Abstract: + + Header file for the NUMA functions. + + + +--*/ + +#ifndef _PAL_NUMA_H_ +#define _PAL_NUMA_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif // __cplusplus + +BOOL +NUMASupportInitialize(); + +VOID +NUMASupportCleanup(); + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif /* _PAL_CRITSECT_H_ */ diff --git a/src/pal/src/init/pal.cpp b/src/pal/src/init/pal.cpp index 8b0e0f53f5..2fdafe4f8c 100644 --- a/src/pal/src/init/pal.cpp +++ b/src/pal/src/init/pal.cpp @@ -42,6 +42,7 @@ SET_DEFAULT_DEBUG_CHANNEL(PAL); // some headers have code with asserts, so do th #include "pal/debug.h" #include "pal/locale.h" #include "pal/init.h" +#include "pal/numa.h" #include "pal/stackstring.hpp" #if HAVE_MACH_EXCEPTIONS @@ -523,6 +524,12 @@ Initialize( goto CLEANUP15; } + if (FALSE == NUMASupportInitialize()) + { + ERROR("Unable to initialize NUMA support\n"); + goto CLEANUP15; + } + TRACE("First-time PAL initialization complete.\n"); init_count++; @@ -548,6 +555,7 @@ Initialize( } goto done; + NUMASupportCleanup(); /* No cleanup required for CRTInitStdStreams */ CLEANUP15: FILECleanupStdHandles(); diff --git a/src/pal/src/map/virtual.cpp b/src/pal/src/map/virtual.cpp index d52ba1e896..7e00843b7a 100644 --- a/src/pal/src/map/virtual.cpp +++ b/src/pal/src/map/virtual.cpp @@ -1350,7 +1350,6 @@ done: return pRetVal; } - /*++ Function: VirtualFree diff --git a/src/pal/src/misc/sysinfo.cpp b/src/pal/src/misc/sysinfo.cpp index fff051818f..70fe3e65d2 100644 --- a/src/pal/src/misc/sysinfo.cpp +++ b/src/pal/src/misc/sysinfo.cpp @@ -94,6 +94,39 @@ SET_DEFAULT_DEBUG_CHANNEL(MISC); #endif #endif // __APPLE__ +/*++ +Function: + GetNumberOfProcessors + +Return number of processors available for the current process +--*/ +int GetNumberOfProcessors() +{ + int nrcpus = 0; + +#if HAVE_SYSCONF + nrcpus = sysconf(_SC_NPROCESSORS_ONLN); + if (nrcpus < 1) + { + ASSERT("sysconf failed for _SC_NPROCESSORS_ONLN (%d)\n", errno); + } +#elif HAVE_SYSCTL + int rc; + size_t sz; + int mib[2]; + + sz = sizeof(nrcpus); + mib[0] = CTL_HW; + mib[1] = HW_NCPU; + rc = sysctl(mib, 2, &nrcpus, &sz, NULL, 0); + if (rc != 0) + { + ASSERT("sysctl failed for HW_NCPU (%d)\n", errno); + } +#endif // HAVE_SYSCONF + + return nrcpus; +} /*++ Function: @@ -137,27 +170,7 @@ GetSystemInfo( lpSystemInfo->dwPageSize = pagesize; lpSystemInfo->dwActiveProcessorMask_PAL_Undefined = 0; -#if HAVE_SYSCONF - nrcpus = sysconf(_SC_NPROCESSORS_ONLN); - if (nrcpus < 1) - { - ASSERT("sysconf failed for _SC_NPROCESSORS_ONLN (%d)\n", errno); - } -#elif HAVE_SYSCTL - int rc; - size_t sz; - int mib[2]; - - sz = sizeof(nrcpus); - mib[0] = CTL_HW; - mib[1] = HW_NCPU; - rc = sysctl(mib, 2, &nrcpus, &sz, NULL, 0); - if (rc != 0) - { - ASSERT("sysctl failed for HW_NCPU (%d)\n", errno); - } -#endif // HAVE_SYSCONF - + nrcpus = GetNumberOfProcessors(); TRACE("dwNumberOfProcessors=%d\n", nrcpus); lpSystemInfo->dwNumberOfProcessors = nrcpus; diff --git a/src/pal/src/numa/numa.cpp b/src/pal/src/numa/numa.cpp new file mode 100644 index 0000000000..549c10a71f --- /dev/null +++ b/src/pal/src/numa/numa.cpp @@ -0,0 +1,692 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*++ + + + +Module Name: + + numa.cpp + +Abstract: + + Implementation of NUMA related APIs + +--*/ + +#include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(NUMA); + +#include "pal/palinternal.h" +#include "pal/dbgmsg.h" +#include "pal/numa.h" +#include "pal/corunix.hpp" +#include "pal/thread.hpp" + +#if HAVE_NUMA_H +#include <numa.h> +#include <numaif.h> +#endif + +#if HAVE_PTHREAD_NP_H +#include <pthread_np.h> +#endif + +#include <pthread.h> + +using namespace CorUnix; + +#if HAVE_CPUSET_T +typedef cpuset_t cpu_set_t; +#endif + +int GetNumberOfProcessors(); + +// CPU affinity descriptor +struct CpuAffinity +{ + // NUMA node + BYTE Node; + // CPU number relative to the group the CPU is in + BYTE Number; + // CPU group + WORD Group; +}; + +// Array mapping global CPU index to its affinity +CpuAffinity *g_cpuToAffinity = NULL; + +// Array mapping CPU group and index in the group to the global CPU index +short *g_groupAndIndexToCpu = NULL; +// Array mapping CPU group to the corresponding affinity mask of the CPUs in the group +KAFFINITY *g_groupToCpuMask = NULL; +// Array mapping CPU group to the number of processors in the group +BYTE *g_groupToCpuCount = NULL; + +// Total number of processors in the system +int g_cpuCount = 0; +// Total number of CPU groups +int g_groupCount = 0; +// The highest NUMA node available +int g_highestNumaNode = 0; + +static const int MaxCpusPerGroup = 8 * sizeof(KAFFINITY); +static const WORD NO_GROUP = 0xffff; + +/*++ +Function: + AllocateLookupArrays + +Allocate CPU and group lookup arrays +--*/ +VOID +AllocateLookupArrays() +{ + g_groupAndIndexToCpu = (short*)malloc(g_groupCount * MaxCpusPerGroup * sizeof(short)); + g_cpuToAffinity = (CpuAffinity*)malloc(g_cpuCount * sizeof(CpuAffinity)); + g_groupToCpuMask = (KAFFINITY*)malloc(g_groupCount * sizeof(KAFFINITY)); + g_groupToCpuCount = (BYTE*)malloc(g_groupCount * sizeof(BYTE)); + + memset(g_groupAndIndexToCpu, 0xff, g_groupCount * MaxCpusPerGroup * sizeof(short)); + memset(g_cpuToAffinity, 0xff, g_cpuCount * sizeof(CpuAffinity)); + memset(g_groupToCpuMask, 0, g_groupCount * sizeof(KAFFINITY)); + memset(g_groupToCpuCount, 0, g_groupCount * sizeof(BYTE)); +} + +/*++ +Function: + FreeLookupArrays + +Free CPU and group lookup arrays +--*/ +VOID +FreeLookupArrays() +{ + free(g_groupAndIndexToCpu); + free(g_cpuToAffinity); + free(g_groupToCpuMask); + free(g_groupToCpuCount); + + g_groupAndIndexToCpu = NULL; + g_cpuToAffinity = NULL; + g_groupToCpuMask = NULL; + g_groupToCpuCount = NULL; +} + +/*++ +Function: + GetFullAffinityMask + +Get affinity mask for the specified number of processors with all +the processors enabled. +--*/ +KAFFINITY GetFullAffinityMask(int cpuCount) +{ + return ((KAFFINITY)1 << (cpuCount)) - 1; +} + +/*++ +Function: + NUMASupportInitialize + +Initialize data structures for getting and setting thread affinities to processors and +querying NUMA related processor information. +On systems with no NUMA support, it behaves as if there was a single NUMA node with +a single group of processors. +--*/ +BOOL +NUMASupportInitialize() +{ +#if HAVE_NUMA_H + if (numa_available() != -1) + { + struct bitmask *mask = numa_allocate_cpumask(); + int numaNodesCount = numa_max_node() + 1; + + g_cpuCount = numa_num_possible_cpus(); + g_groupCount = 0; + + for (int i = 0; i < numaNodesCount; i++) + { + int st = numa_node_to_cpus(i, mask); + // The only failure that can happen is that the mask is not large enough + // but that cannot happen since the mask was allocated by numa_allocate_cpumask + _ASSERTE(st == 0); + unsigned int nodeCpuCount = numa_bitmask_weight(mask); + unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup; + g_groupCount += nodeGroupCount; + } + + AllocateLookupArrays(); + + WORD currentGroup = 0; + int currentGroupCpus = 0; + + for (int i = 0; i < numaNodesCount; i++) + { + int st = numa_node_to_cpus(i, mask); + // The only failure that can happen is that the mask is not large enough + // but that cannot happen since the mask was allocated by numa_allocate_cpumask + _ASSERTE(st == 0); + unsigned int nodeCpuCount = numa_bitmask_weight(mask); + unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup; + for (int j = 0; j < g_cpuCount; j++) + { + if (numa_bitmask_isbitset(mask, j)) + { + if (currentGroupCpus == MaxCpusPerGroup) + { + g_groupToCpuCount[currentGroup] = MaxCpusPerGroup; + g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup); + currentGroupCpus = 0; + currentGroup++; + } + g_cpuToAffinity[j].Node = i; + g_cpuToAffinity[j].Group = currentGroup; + g_cpuToAffinity[j].Number = currentGroupCpus; + g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j; + currentGroupCpus++; + } + } + + if (currentGroupCpus != 0) + { + g_groupToCpuCount[currentGroup] = currentGroupCpus; + g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus); + currentGroupCpus = 0; + currentGroup++; + } + } + + numa_free_cpumask(mask); + + g_highestNumaNode = numa_max_node(); + } + else +#endif // HAVE_NUMA_H + { + // No NUMA + g_cpuCount = GetNumberOfProcessors(); + g_groupCount = 1; + g_highestNumaNode = 0; + + AllocateLookupArrays(); + } + + return TRUE; +} + +/*++ +Function: + NUMASupportCleanup + +Cleanup of the NUMA support data structures +--*/ +VOID +NUMASupportCleanup() +{ + FreeLookupArrays(); +} + +/*++ +Function: + GetNumaHighestNodeNumber + +See MSDN doc. +--*/ +BOOL +PALAPI +GetNumaHighestNodeNumber( + OUT PULONG HighestNodeNumber +) +{ + PERF_ENTRY(GetNumaHighestNodeNumber); + ENTRY("GetNumaHighestNodeNumber(HighestNodeNumber=%p)\n", HighestNodeNumber); + *HighestNodeNumber = (ULONG)g_highestNumaNode; + + BOOL success = TRUE; + + LOGEXIT("GetNumaHighestNodeNumber returns BOOL %d\n", success); + PERF_EXIT(GetNumaHighestNodeNumber); + + return success; +} + +/*++ +Function: + GetNumaProcessorNodeEx + +See MSDN doc. +--*/ +BOOL +PALAPI +GetNumaProcessorNodeEx( + IN PPROCESSOR_NUMBER Processor, + OUT PUSHORT NodeNumber +) +{ + PERF_ENTRY(GetNumaProcessorNodeEx); + ENTRY("GetNumaProcessorNodeEx(Processor=%p, NodeNumber=%p)\n", Processor, NodeNumber); + + BOOL success = FALSE; + + if ((Processor->Group < g_groupCount) && + (Processor->Number < MaxCpusPerGroup) && + (Processor->Reserved == 0)) + { + short cpu = g_groupAndIndexToCpu[Processor->Group * MaxCpusPerGroup + Processor->Number]; + if (cpu != -1) + { + *NodeNumber = g_cpuToAffinity[cpu].Node; + success = TRUE; + } + } + + if (!success) + { + *NodeNumber = 0xffff; + SetLastError(ERROR_INVALID_PARAMETER); + } + + LOGEXIT("GetNumaProcessorNodeEx returns BOOL %d\n", success); + PERF_EXIT(GetNumaProcessorNodeEx); + + return success; +} + +/*++ +Function: + GetLogicalProcessorInformationEx + +See MSDN doc. +--*/ +BOOL +PALAPI +GetLogicalProcessorInformationEx( + IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType, + OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, + IN OUT PDWORD ReturnedLength +) +{ + PERF_ENTRY(GetLogicalProcessorInformationEx); + ENTRY("GetLogicalProcessorInformationEx(RelationshipType=%d, Buffer=%p, ReturnedLength=%p)\n", RelationshipType, Buffer, ReturnedLength); + + BOOL success = FALSE; + + if (RelationshipType == RelationGroup) + { + size_t requiredSize = __builtin_offsetof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Group); + requiredSize += __builtin_offsetof(GROUP_RELATIONSHIP, GroupInfo); + requiredSize += g_groupCount * sizeof(PROCESSOR_GROUP_INFO); + + if (*ReturnedLength >= requiredSize) + { + Buffer->Relationship = RelationGroup; + Buffer->Size = requiredSize; + Buffer->Group.MaximumGroupCount = g_groupCount; + Buffer->Group.ActiveGroupCount = g_groupCount; + for (int i = 0; i < g_groupCount; i++) + { + Buffer->Group.GroupInfo[i].MaximumProcessorCount = MaxCpusPerGroup; + Buffer->Group.GroupInfo[i].ActiveProcessorCount = g_groupToCpuCount[i]; + Buffer->Group.GroupInfo[i].ActiveProcessorMask = g_groupToCpuMask[i]; + } + + success = TRUE; + } + else + { + SetLastError(ERROR_INSUFFICIENT_BUFFER); + } + + *ReturnedLength = requiredSize; + } + else + { + // We only support the group relationship + SetLastError(ERROR_INVALID_PARAMETER); + } + + LOGEXIT("GetLogicalProcessorInformationEx returns BOOL %d\n", success); + PERF_EXIT(GetLogicalProcessorInformationEx); + + return success; +} + +/*++ +Function: + GetThreadGroupAffinityInternal + +Get the group affinity for the specified pthread +--*/ +BOOL +GetThreadGroupAffinityInternal( + IN pthread_t thread, + OUT PGROUP_AFFINITY GroupAffinity +) +{ + BOOL success = FALSE; + +#if HAVE_PTHREAD_GETAFFINITY_NP + cpu_set_t cpuSet; + + int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuSet); + + if (st == 0) + { + WORD group = NO_GROUP; + KAFFINITY mask = 0; + + for (int i = 0; i < g_cpuCount; i++) + { + if (CPU_ISSET(i, &cpuSet)) + { + WORD g = g_cpuToAffinity[i].Group; + // Unless the thread affinity was already set by SetThreadGroupAffinity, it is possible that + // the current thread has affinity with processors from multiple groups. So we report just the + // first group we find. + if (group == NO_GROUP || g == group) + { + group = g; + mask |= ((KAFFINITY)1) << g_cpuToAffinity[i].Number; + } + } + } + + GroupAffinity->Group = group; + GroupAffinity->Mask = mask; + success = TRUE; + } + else + { + SetLastError(ERROR_GEN_FAILURE); + } +#else // HAVE_PTHREAD_GETAFFINITY_NP + // There is no API to manage thread affinity, so let's return a group affinity + // with all the CPUs on the system. + GroupAffinity->Group = 0; + GroupAffinity->Mask = GetFullAffinityMask(g_cpuCount); + success = TRUE; +#endif // HAVE_PTHREAD_GETAFFINITY_NP + + return success; +} + +/*++ +Function: + GetThreadGroupAffinity + +See MSDN doc. +--*/ +BOOL +PALAPI +GetThreadGroupAffinity( + IN HANDLE hThread, + OUT PGROUP_AFFINITY GroupAffinity +) +{ + PERF_ENTRY(GetThreadGroupAffinity); + ENTRY("GetThreadGroupAffinity(hThread=%p, GroupAffinity=%p)\n", hThread, GroupAffinity); + + CPalThread *palThread = InternalGetCurrentThread(); + + BOOL success = GetThreadGroupAffinityInternal(palThread->GetPThreadSelf(), GroupAffinity); + + LOGEXIT("GetThreadGroupAffinity returns BOOL %d\n", success); + PERF_EXIT(GetThreadGroupAffinity); + + return success; +} + + +/*++ +Function: + SetThreadGroupAffinity + +See MSDN doc. +--*/ +BOOL +PALAPI +SetThreadGroupAffinity( + IN HANDLE hThread, + IN const GROUP_AFFINITY *GroupAffinity, + OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity +) +{ + PERF_ENTRY(SetThreadGroupAffinity); + ENTRY("SetThreadGroupAffinity(hThread=%p, GroupAffinity=%p, PreviousGroupAffinity=%p)\n", hThread, GroupAffinity, PreviousGroupAffinity); + + CPalThread *palThread = InternalGetCurrentThread(); + + pthread_t thread = palThread->GetPThreadSelf(); + + if (PreviousGroupAffinity != NULL) + { + GetThreadGroupAffinityInternal(thread, PreviousGroupAffinity); + } + +#if HAVE_PTHREAD_GETAFFINITY_NP + int groupStartIndex = GroupAffinity->Group * MaxCpusPerGroup; + KAFFINITY mask = 1; + cpu_set_t cpuSet; + CPU_ZERO(&cpuSet); + + for (int i = 0; i < MaxCpusPerGroup; i++, mask <<= 1) + { + if (GroupAffinity->Mask & mask) + { + int cpu = g_groupAndIndexToCpu[groupStartIndex + i]; + if (cpu != -1) + { + CPU_SET(cpu, &cpuSet); + } + } + } + + int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet); + + if (st == -1) + { + switch (errno) + { + case EINVAL: + // There is no processor in the mask that is allowed to execute the process + SetLastError(ERROR_INVALID_PARAMETER); + break; + case EPERM: + SetLastError(ERROR_ACCESS_DENIED); + break; + default: + SetLastError(ERROR_GEN_FAILURE); + break; + } + } + + BOOL success = (st == 0); +#else // HAVE_PTHREAD_GETAFFINITY_NP + // There is no API to manage thread affinity, so let's ignore the request + BOOL success = TRUE; +#endif // HAVE_PTHREAD_GETAFFINITY_NP + + LOGEXIT("SetThreadGroupAffinity returns BOOL %d\n", success); + PERF_EXIT(SetThreadGroupAffinity); + + return success; +} + +/*++ +Function: + GetCurrentProcessorNumberEx + +See MSDN doc. +--*/ +VOID +PALAPI +GetCurrentProcessorNumberEx( + OUT PPROCESSOR_NUMBER ProcNumber +) +{ + PERF_ENTRY(GetCurrentProcessorNumberEx); + ENTRY("GetCurrentProcessorNumberEx(ProcNumber=%p\n", ProcNumber); + + DWORD cpu = GetCurrentProcessorNumber(); + _ASSERTE(cpu < g_cpuCount); + ProcNumber->Group = g_cpuToAffinity[cpu].Group; + ProcNumber->Number = g_cpuToAffinity[cpu].Number; + + LOGEXIT("GetCurrentProcessorNumberEx\n"); + PERF_EXIT(GetCurrentProcessorNumberEx); +} + +/*++ +Function: + GetProcessAffinityMask + +See MSDN doc. +--*/ +BOOL +PALAPI +GetProcessAffinityMask( + IN HANDLE hProcess, + OUT PDWORD_PTR lpProcessAffinityMask, + OUT PDWORD_PTR lpSystemAffinityMask +) +{ + PERF_ENTRY(GetProcessAffinityMask); + ENTRY("GetProcessAffinityMask(hProcess=%p, lpProcessAffinityMask=%p, lpSystemAffinityMask=%p\n", hProcess, lpProcessAffinityMask, lpSystemAffinityMask); + + BOOL success = FALSE; + + if (hProcess == GetCurrentProcess()) + { + DWORD_PTR systemMask = GetFullAffinityMask(g_cpuCount); + +#if HAVE_SCHED_GETAFFINITY + int pid = getpid(); + cpu_set_t cpuSet; + int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet); + if (st == 0) + { + WORD group = NO_GROUP; + DWORD_PTR processMask = 0; + + for (int i = 0; i < g_cpuCount; i++) + { + if (CPU_ISSET(i, &cpuSet)) + { + WORD g = g_cpuToAffinity[i].Group; + if (group == NO_GROUP || g == group) + { + group = g; + processMask |= ((DWORD_PTR)1) << g_cpuToAffinity[i].Number; + } + else + { + // The process has affinity in more than one group, in such case + // the function needs to return zero in both masks. + processMask = 0; + systemMask = 0; + group = NO_GROUP; + break; + } + } + } + + success = TRUE; + + *lpProcessAffinityMask = processMask; + *lpSystemAffinityMask = systemMask; + } + else + { + // We should not get any of the errors that the sched_getaffinity can return since none + // of them applies for the current thread, so this is an unexpected kind of failure. + SetLastError(ERROR_GEN_FAILURE); + } +#else // HAVE_SCHED_GETAFFINITY + // There is no API to manage thread affinity, so let's return both affinity masks + // with all the CPUs on the system set. + *lpSystemAffinityMask = systemMask; + *lpProcessAffinityMask = systemMask; + + success = TRUE; +#endif // HAVE_SCHED_GETAFFINITY + } + else + { + // PAL supports getting affinity mask for the current process only + SetLastError(ERROR_INVALID_PARAMETER); + } + + LOGEXIT("GetProcessAffinityMask returns BOOL %d\n", success); + PERF_EXIT(GetProcessAffinityMask); + + return success; +} + +/*++ +Function: + VirtualAllocExNuma + +See MSDN doc. +--*/ +LPVOID +PALAPI +VirtualAllocExNuma( + IN HANDLE hProcess, + IN OPTIONAL LPVOID lpAddress, + IN SIZE_T dwSize, + IN DWORD flAllocationType, + IN DWORD flProtect, + IN DWORD nndPreferred +) +{ + PERF_ENTRY(VirtualAllocExNuma); + ENTRY("VirtualAllocExNuma(hProcess=%p, lpAddress=%p, dwSize=%u, flAllocationType=%#x, flProtect=%#x, nndPreferred=%d\n", + hProcess, lpAddress, dwSize, flAllocationType, flProtect, nndPreferred); + + LPVOID result = NULL; + + if (hProcess == GetCurrentProcess()) + { + if (nndPreferred <= g_highestNumaNode) + { + result = VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect); +#if HAVE_NUMA_H + if (result != NULL) + { + int nodeMaskLength = (g_highestNumaNode + 1 + sizeof(unsigned long) - 1) / sizeof(unsigned long); + unsigned long *nodeMask = new unsigned long[nodeMaskLength]; + + memset(nodeMask, 0, nodeMaskLength); + + int index = nndPreferred / sizeof(unsigned long); + int mask = ((unsigned long)1) << (nndPreferred & (sizeof(unsigned long) - 1)); + nodeMask[index] = mask; + + int st = mbind(result, dwSize, MPOL_PREFERRED, nodeMask, g_highestNumaNode, 0); + free(nodeMask); + _ASSERTE(st == 0); + // If the mbind fails, we still return the allocated memory since the nndPreferred is just a hint + } +#endif // HAVE_NUMA_H + } + else + { + // The specified node number is larger than the maximum available one + SetLastError(ERROR_INVALID_PARAMETER); + } + } + else + { + // PAL supports allocating from the current process virtual space only + SetLastError(ERROR_INVALID_PARAMETER); + } + + LOGEXIT("VirtualAllocExNuma returns %p\n", result); + PERF_EXIT(VirtualAllocExNuma); + + return result; +} diff --git a/src/utilcode/util.cpp b/src/utilcode/util.cpp index c215a49213..a8786def92 100644 --- a/src/utilcode/util.cpp +++ b/src/utilcode/util.cpp @@ -728,7 +728,7 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr, //****************************************************************************** // NumaNodeInfo //****************************************************************************** -#if !defined(FEATURE_REDHAWK) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) /*static*/ NumaNodeInfo::PGNHNN NumaNodeInfo::m_pGetNumaHighestNodeNumber = NULL; /*static*/ NumaNodeInfo::PVAExN NumaNodeInfo::m_pVirtualAllocExNuma = NULL; @@ -748,15 +748,19 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr, /*static*/ BOOL NumaNodeInfo::m_enableGCNumaAware = FALSE; /*static*/ BOOL NumaNodeInfo::InitNumaNodeInfoAPI() { -#if !defined(FEATURE_REDHAWK) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) //check for numa support if multiple heaps are used ULONG highest = 0; if (CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_GCNumaAware) == 0) return FALSE; +#ifndef FEATURE_PAL // check if required APIs are supported HMODULE hMod = GetModuleHandleW(WINDOWS_KERNEL32_DLLNAME_W); +#else + HMODULE hMod = GetCLRModule(); +#endif if (hMod == NULL) return FALSE; @@ -795,7 +799,7 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr, //****************************************************************************** // NumaNodeInfo //****************************************************************************** -#if !defined(FEATURE_REDHAWK) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) /*static*/ CPUGroupInfo::PGLPIEx CPUGroupInfo::m_pGetLogicalProcessorInformationEx = NULL; /*static*/ CPUGroupInfo::PSTGA CPUGroupInfo::m_pSetThreadGroupAffinity = NULL; /*static*/ CPUGroupInfo::PGTGA CPUGroupInfo::m_pGetThreadGroupAffinity = NULL; @@ -848,8 +852,12 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr, } CONTRACTL_END; -#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) +#ifndef FEATURE_PAL HMODULE hMod = GetModuleHandleW(WINDOWS_KERNEL32_DLLNAME_W); +#else + HMODULE hMod = GetCLRModule(); +#endif if (hMod == NULL) return FALSE; @@ -869,17 +877,19 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr, if (m_pGetCurrentProcessorNumberEx == NULL) return FALSE; +#ifndef FEATURE_PAL m_pGetSystemTimes = (PGST)GetProcAddress(hMod, "GetSystemTimes"); if (m_pGetSystemTimes == NULL) return FALSE; - +#endif + return TRUE; #else return FALSE; #endif } -#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) // Calculate greatest common divisor DWORD GCD(DWORD u, DWORD v) { @@ -910,7 +920,7 @@ DWORD LCM(DWORD u, DWORD v) } CONTRACTL_END; -#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) BYTE *bBuffer = NULL; SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pSLPIEx = NULL; SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pRecord = NULL; @@ -985,7 +995,7 @@ DWORD LCM(DWORD u, DWORD v) { LIMITED_METHOD_CONTRACT; -#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) WORD begin = 0; WORD nr_proc = 0; @@ -1012,7 +1022,7 @@ DWORD LCM(DWORD u, DWORD v) } CONTRACTL_END; -#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) BOOL enableGCCPUGroups = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_GCCpuGroup) != 0; BOOL threadUseAllCpuGroups = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_Thread_UseAllCpuGroups) != 0; @@ -1095,7 +1105,7 @@ retry: { LIMITED_METHOD_CONTRACT; -#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) WORD bTemp = 0; WORD bDiff = processor_number - bTemp; @@ -1126,7 +1136,7 @@ retry: } CONTRACTL_END; -#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) && defined(_TARGET_AMD64_) // m_enableGCCPUGroups and m_threadUseAllCpuGroups must be TRUE _ASSERTE(m_enableGCCPUGroups && m_threadUseAllCpuGroups); @@ -1147,7 +1157,7 @@ retry: #endif } -#if !defined(FEATURE_REDHAWK) && !defined(FEATURE_PAL) +#if !defined(FEATURE_REDHAWK) //Lock ThreadStore before calling this function, so that updates of weights/counts are consistent /*static*/ void CPUGroupInfo::ChooseCPUGroupAffinity(GROUP_AFFINITY *gf) { diff --git a/src/vm/gcenv.os.cpp b/src/vm/gcenv.os.cpp index d519c3cda4..5e820a6169 100644 --- a/src/vm/gcenv.os.cpp +++ b/src/vm/gcenv.os.cpp @@ -329,11 +329,7 @@ bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processMask, uint { LIMITED_METHOD_CONTRACT; -#ifndef FEATURE_PAL return !!::GetProcessAffinityMask(GetCurrentProcess(), (PDWORD_PTR)processMask, (PDWORD_PTR)systemMask); -#else - return false; -#endif } // Get number of processors assigned to the current process |