summaryrefslogtreecommitdiff
path: root/src/pal/src
diff options
context:
space:
mode:
authorJan Vorlicek <janvorli@microsoft.com>2017-04-19 15:13:30 -0700
committerGitHub <noreply@github.com>2017-04-19 15:13:30 -0700
commit48ebab8270a96e505a2be128a4875f903ef1c4b2 (patch)
tree78313aeb604ebecf715e6f4a5c2cd8b3a1ee907b /src/pal/src
parent9b9989da73f4ee8de5d3c1257d46f8d7295dc44f (diff)
downloadcoreclr-48ebab8270a96e505a2be128a4875f903ef1c4b2.tar.gz
coreclr-48ebab8270a96e505a2be128a4875f903ef1c4b2.tar.bz2
coreclr-48ebab8270a96e505a2be128a4875f903ef1c4b2.zip
Add NUMA and thread affinity support for Unix (#10938)
* Add NUMA and thread affinity support for Unix This change adds new PAL functions for NUMA and thread affinity support for Unix and also enables related code in GC and VM for FEATURE_PAL. It doesn't reflect the limits imposed by CGROUPS on systems with CGROUPS enables yet.
Diffstat (limited to 'src/pal/src')
-rw-r--r--src/pal/src/CMakeLists.txt7
-rw-r--r--src/pal/src/config.h.in5
-rw-r--r--src/pal/src/configure.cmake11
-rw-r--r--src/pal/src/include/pal/dbgmsg.h2
-rw-r--r--src/pal/src/include/pal/numa.h39
-rw-r--r--src/pal/src/init/pal.cpp8
-rw-r--r--src/pal/src/map/virtual.cpp1
-rw-r--r--src/pal/src/misc/sysinfo.cpp55
-rw-r--r--src/pal/src/numa/numa.cpp692
9 files changed, 797 insertions, 23 deletions
diff --git a/src/pal/src/CMakeLists.txt b/src/pal/src/CMakeLists.txt
index 5314cdf86b..145c2c9ed9 100644
--- a/src/pal/src/CMakeLists.txt
+++ b/src/pal/src/CMakeLists.txt
@@ -181,6 +181,7 @@ set(SOURCES
misc/sysinfo.cpp
misc/time.cpp
misc/utils.cpp
+ numa/numa.cpp
objmgr/palobjbase.cpp
objmgr/shmobject.cpp
objmgr/shmobjectmanager.cpp
@@ -372,6 +373,12 @@ if(CMAKE_SYSTEM_NAME STREQUAL NetBSD)
)
endif(CMAKE_SYSTEM_NAME STREQUAL NetBSD)
+if(HAVE_NUMA_H)
+ target_link_libraries(coreclrpal
+ numa
+ )
+endif(HAVE_NUMA_H)
+
add_subdirectory(examples)
if(FEATURE_EVENT_TRACE)
diff --git a/src/pal/src/config.h.in b/src/pal/src/config.h.in
index c2939f3011..e3024ac4d5 100644
--- a/src/pal/src/config.h.in
+++ b/src/pal/src/config.h.in
@@ -21,6 +21,8 @@
#cmakedefine01 HAVE_SYS_SYSCTL_H
#cmakedefine01 HAVE_GNU_LIBNAMES_H
#cmakedefine01 HAVE_PRCTL_H
+#cmakedefine01 HAVE_NUMA_H
+#cmakedefine01 HAVE_PTHREAD_NP_H
#cmakedefine01 HAVE_KQUEUE
#cmakedefine01 HAVE_GETPWUID_R
@@ -33,6 +35,8 @@
#cmakedefine01 HAVE_PTHREAD_GETATTR_NP
#cmakedefine01 HAVE_PTHREAD_GETCPUCLOCKID
#cmakedefine01 HAVE_PTHREAD_SIGQUEUE
+#cmakedefine01 HAVE_PTHREAD_GETAFFINITY_NP
+#cmakedefine01 HAVE_CPUSET_T
#cmakedefine01 HAVE_SIGRETURN
#cmakedefine01 HAVE__THREAD_SYS_SIGRETURN
#cmakedefine01 HAVE_COPYSIGN
@@ -57,6 +61,7 @@
#cmakedefine01 HAS_SYSV_SEMAPHORES
#cmakedefine01 HAS_PTHREAD_MUTEXES
#cmakedefine01 HAVE_TTRACE
+#cmakedefine01 HAVE_SCHED_GETAFFINITY
#cmakedefine HAVE_UNW_GET_SAVE_LOC
#cmakedefine HAVE_UNW_GET_ACCESSORS
#cmakedefine01 HAVE_XSWDEV
diff --git a/src/pal/src/configure.cmake b/src/pal/src/configure.cmake
index 03c7343056..f9a23e8e60 100644
--- a/src/pal/src/configure.cmake
+++ b/src/pal/src/configure.cmake
@@ -35,6 +35,8 @@ check_include_files(libunwind.h HAVE_LIBUNWIND_H)
check_include_files(runetype.h HAVE_RUNETYPE_H)
check_include_files(semaphore.h HAVE_SEMAPHORE_H)
check_include_files(sys/prctl.h HAVE_PRCTL_H)
+check_include_files(numa.h HAVE_NUMA_H)
+check_include_files(pthread_np.h HAVE_PTHREAD_NP_H)
if(NOT CMAKE_SYSTEM_NAME STREQUAL FreeBSD AND NOT CMAKE_SYSTEM_NAME STREQUAL NetBSD)
set(CMAKE_REQUIRED_FLAGS "-ldl")
@@ -69,6 +71,7 @@ check_library_exists(${PTHREAD_LIBRARY} pthread_attr_get_np "" HAVE_PTHREAD_ATTR
check_library_exists(${PTHREAD_LIBRARY} pthread_getattr_np "" HAVE_PTHREAD_GETATTR_NP)
check_library_exists(${PTHREAD_LIBRARY} pthread_getcpuclockid "" HAVE_PTHREAD_GETCPUCLOCKID)
check_library_exists(${PTHREAD_LIBRARY} pthread_sigqueue "" HAVE_PTHREAD_SIGQUEUE)
+check_library_exists(${PTHREAD_LIBRARY} pthread_getaffinity_np "" HAVE_PTHREAD_GETAFFINITY_NP)
check_function_exists(sigreturn HAVE_SIGRETURN)
check_function_exists(_thread_sys_sigreturn HAVE__THREAD_SYS_SIGRETURN)
@@ -119,6 +122,14 @@ int main(int argc, char **argv) {
}" HAVE_UNW_GET_ACCESSORS)
set(CMAKE_REQUIRED_LIBRARIES)
+check_cxx_source_compiles("
+#include <pthread_np.h>
+int main(int argc, char **argv) {
+ cpuset_t cpuSet;
+
+ return 0;
+}" HAVE_CPUSET_T)
+
check_struct_has_member ("struct stat" st_atimespec "sys/types.h;sys/stat.h" HAVE_STAT_TIMESPEC)
check_struct_has_member ("struct stat" st_atimensec "sys/types.h;sys/stat.h" HAVE_STAT_NSEC)
check_struct_has_member ("struct tm" tm_gmtoff time.h HAVE_TM_GMTOFF)
diff --git a/src/pal/src/include/pal/dbgmsg.h b/src/pal/src/include/pal/dbgmsg.h
index 7a49fc0ad6..052c6fa775 100644
--- a/src/pal/src/include/pal/dbgmsg.h
+++ b/src/pal/src/include/pal/dbgmsg.h
@@ -194,7 +194,7 @@ typedef enum
#ifdef FEATURE_PAL_SXS
DCI_SXS,
#endif // FEATURE_PAL_SXS
-
+ DCI_NUMA,
DCI_LAST
} DBG_CHANNEL_ID;
diff --git a/src/pal/src/include/pal/numa.h b/src/pal/src/include/pal/numa.h
new file mode 100644
index 0000000000..4fb2308a7d
--- /dev/null
+++ b/src/pal/src/include/pal/numa.h
@@ -0,0 +1,39 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*++
+
+
+
+Module Name:
+
+ include/pal/numa.h
+
+Abstract:
+
+ Header file for the NUMA functions.
+
+
+
+--*/
+
+#ifndef _PAL_NUMA_H_
+#define _PAL_NUMA_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif // __cplusplus
+
+BOOL
+NUMASupportInitialize();
+
+VOID
+NUMASupportCleanup();
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif /* _PAL_CRITSECT_H_ */
diff --git a/src/pal/src/init/pal.cpp b/src/pal/src/init/pal.cpp
index 8b0e0f53f5..2fdafe4f8c 100644
--- a/src/pal/src/init/pal.cpp
+++ b/src/pal/src/init/pal.cpp
@@ -42,6 +42,7 @@ SET_DEFAULT_DEBUG_CHANNEL(PAL); // some headers have code with asserts, so do th
#include "pal/debug.h"
#include "pal/locale.h"
#include "pal/init.h"
+#include "pal/numa.h"
#include "pal/stackstring.hpp"
#if HAVE_MACH_EXCEPTIONS
@@ -523,6 +524,12 @@ Initialize(
goto CLEANUP15;
}
+ if (FALSE == NUMASupportInitialize())
+ {
+ ERROR("Unable to initialize NUMA support\n");
+ goto CLEANUP15;
+ }
+
TRACE("First-time PAL initialization complete.\n");
init_count++;
@@ -548,6 +555,7 @@ Initialize(
}
goto done;
+ NUMASupportCleanup();
/* No cleanup required for CRTInitStdStreams */
CLEANUP15:
FILECleanupStdHandles();
diff --git a/src/pal/src/map/virtual.cpp b/src/pal/src/map/virtual.cpp
index d52ba1e896..7e00843b7a 100644
--- a/src/pal/src/map/virtual.cpp
+++ b/src/pal/src/map/virtual.cpp
@@ -1350,7 +1350,6 @@ done:
return pRetVal;
}
-
/*++
Function:
VirtualFree
diff --git a/src/pal/src/misc/sysinfo.cpp b/src/pal/src/misc/sysinfo.cpp
index fff051818f..70fe3e65d2 100644
--- a/src/pal/src/misc/sysinfo.cpp
+++ b/src/pal/src/misc/sysinfo.cpp
@@ -94,6 +94,39 @@ SET_DEFAULT_DEBUG_CHANNEL(MISC);
#endif
#endif // __APPLE__
+/*++
+Function:
+ GetNumberOfProcessors
+
+Return number of processors available for the current process
+--*/
+int GetNumberOfProcessors()
+{
+ int nrcpus = 0;
+
+#if HAVE_SYSCONF
+ nrcpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (nrcpus < 1)
+ {
+ ASSERT("sysconf failed for _SC_NPROCESSORS_ONLN (%d)\n", errno);
+ }
+#elif HAVE_SYSCTL
+ int rc;
+ size_t sz;
+ int mib[2];
+
+ sz = sizeof(nrcpus);
+ mib[0] = CTL_HW;
+ mib[1] = HW_NCPU;
+ rc = sysctl(mib, 2, &nrcpus, &sz, NULL, 0);
+ if (rc != 0)
+ {
+ ASSERT("sysctl failed for HW_NCPU (%d)\n", errno);
+ }
+#endif // HAVE_SYSCONF
+
+ return nrcpus;
+}
/*++
Function:
@@ -137,27 +170,7 @@ GetSystemInfo(
lpSystemInfo->dwPageSize = pagesize;
lpSystemInfo->dwActiveProcessorMask_PAL_Undefined = 0;
-#if HAVE_SYSCONF
- nrcpus = sysconf(_SC_NPROCESSORS_ONLN);
- if (nrcpus < 1)
- {
- ASSERT("sysconf failed for _SC_NPROCESSORS_ONLN (%d)\n", errno);
- }
-#elif HAVE_SYSCTL
- int rc;
- size_t sz;
- int mib[2];
-
- sz = sizeof(nrcpus);
- mib[0] = CTL_HW;
- mib[1] = HW_NCPU;
- rc = sysctl(mib, 2, &nrcpus, &sz, NULL, 0);
- if (rc != 0)
- {
- ASSERT("sysctl failed for HW_NCPU (%d)\n", errno);
- }
-#endif // HAVE_SYSCONF
-
+ nrcpus = GetNumberOfProcessors();
TRACE("dwNumberOfProcessors=%d\n", nrcpus);
lpSystemInfo->dwNumberOfProcessors = nrcpus;
diff --git a/src/pal/src/numa/numa.cpp b/src/pal/src/numa/numa.cpp
new file mode 100644
index 0000000000..549c10a71f
--- /dev/null
+++ b/src/pal/src/numa/numa.cpp
@@ -0,0 +1,692 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*++
+
+
+
+Module Name:
+
+ numa.cpp
+
+Abstract:
+
+ Implementation of NUMA related APIs
+
+--*/
+
+#include "pal/dbgmsg.h"
+SET_DEFAULT_DEBUG_CHANNEL(NUMA);
+
+#include "pal/palinternal.h"
+#include "pal/dbgmsg.h"
+#include "pal/numa.h"
+#include "pal/corunix.hpp"
+#include "pal/thread.hpp"
+
+#if HAVE_NUMA_H
+#include <numa.h>
+#include <numaif.h>
+#endif
+
+#if HAVE_PTHREAD_NP_H
+#include <pthread_np.h>
+#endif
+
+#include <pthread.h>
+
+using namespace CorUnix;
+
+#if HAVE_CPUSET_T
+typedef cpuset_t cpu_set_t;
+#endif
+
+int GetNumberOfProcessors();
+
+// CPU affinity descriptor
+struct CpuAffinity
+{
+ // NUMA node
+ BYTE Node;
+ // CPU number relative to the group the CPU is in
+ BYTE Number;
+ // CPU group
+ WORD Group;
+};
+
+// Array mapping global CPU index to its affinity
+CpuAffinity *g_cpuToAffinity = NULL;
+
+// Array mapping CPU group and index in the group to the global CPU index
+short *g_groupAndIndexToCpu = NULL;
+// Array mapping CPU group to the corresponding affinity mask of the CPUs in the group
+KAFFINITY *g_groupToCpuMask = NULL;
+// Array mapping CPU group to the number of processors in the group
+BYTE *g_groupToCpuCount = NULL;
+
+// Total number of processors in the system
+int g_cpuCount = 0;
+// Total number of CPU groups
+int g_groupCount = 0;
+// The highest NUMA node available
+int g_highestNumaNode = 0;
+
+static const int MaxCpusPerGroup = 8 * sizeof(KAFFINITY);
+static const WORD NO_GROUP = 0xffff;
+
+/*++
+Function:
+ AllocateLookupArrays
+
+Allocate CPU and group lookup arrays
+--*/
+VOID
+AllocateLookupArrays()
+{
+ g_groupAndIndexToCpu = (short*)malloc(g_groupCount * MaxCpusPerGroup * sizeof(short));
+ g_cpuToAffinity = (CpuAffinity*)malloc(g_cpuCount * sizeof(CpuAffinity));
+ g_groupToCpuMask = (KAFFINITY*)malloc(g_groupCount * sizeof(KAFFINITY));
+ g_groupToCpuCount = (BYTE*)malloc(g_groupCount * sizeof(BYTE));
+
+ memset(g_groupAndIndexToCpu, 0xff, g_groupCount * MaxCpusPerGroup * sizeof(short));
+ memset(g_cpuToAffinity, 0xff, g_cpuCount * sizeof(CpuAffinity));
+ memset(g_groupToCpuMask, 0, g_groupCount * sizeof(KAFFINITY));
+ memset(g_groupToCpuCount, 0, g_groupCount * sizeof(BYTE));
+}
+
+/*++
+Function:
+ FreeLookupArrays
+
+Free CPU and group lookup arrays
+--*/
+VOID
+FreeLookupArrays()
+{
+ free(g_groupAndIndexToCpu);
+ free(g_cpuToAffinity);
+ free(g_groupToCpuMask);
+ free(g_groupToCpuCount);
+
+ g_groupAndIndexToCpu = NULL;
+ g_cpuToAffinity = NULL;
+ g_groupToCpuMask = NULL;
+ g_groupToCpuCount = NULL;
+}
+
+/*++
+Function:
+ GetFullAffinityMask
+
+Get affinity mask for the specified number of processors with all
+the processors enabled.
+--*/
+KAFFINITY GetFullAffinityMask(int cpuCount)
+{
+ return ((KAFFINITY)1 << (cpuCount)) - 1;
+}
+
+/*++
+Function:
+ NUMASupportInitialize
+
+Initialize data structures for getting and setting thread affinities to processors and
+querying NUMA related processor information.
+On systems with no NUMA support, it behaves as if there was a single NUMA node with
+a single group of processors.
+--*/
+BOOL
+NUMASupportInitialize()
+{
+#if HAVE_NUMA_H
+ if (numa_available() != -1)
+ {
+ struct bitmask *mask = numa_allocate_cpumask();
+ int numaNodesCount = numa_max_node() + 1;
+
+ g_cpuCount = numa_num_possible_cpus();
+ g_groupCount = 0;
+
+ for (int i = 0; i < numaNodesCount; i++)
+ {
+ int st = numa_node_to_cpus(i, mask);
+ // The only failure that can happen is that the mask is not large enough
+ // but that cannot happen since the mask was allocated by numa_allocate_cpumask
+ _ASSERTE(st == 0);
+ unsigned int nodeCpuCount = numa_bitmask_weight(mask);
+ unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
+ g_groupCount += nodeGroupCount;
+ }
+
+ AllocateLookupArrays();
+
+ WORD currentGroup = 0;
+ int currentGroupCpus = 0;
+
+ for (int i = 0; i < numaNodesCount; i++)
+ {
+ int st = numa_node_to_cpus(i, mask);
+ // The only failure that can happen is that the mask is not large enough
+ // but that cannot happen since the mask was allocated by numa_allocate_cpumask
+ _ASSERTE(st == 0);
+ unsigned int nodeCpuCount = numa_bitmask_weight(mask);
+ unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
+ for (int j = 0; j < g_cpuCount; j++)
+ {
+ if (numa_bitmask_isbitset(mask, j))
+ {
+ if (currentGroupCpus == MaxCpusPerGroup)
+ {
+ g_groupToCpuCount[currentGroup] = MaxCpusPerGroup;
+ g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup);
+ currentGroupCpus = 0;
+ currentGroup++;
+ }
+ g_cpuToAffinity[j].Node = i;
+ g_cpuToAffinity[j].Group = currentGroup;
+ g_cpuToAffinity[j].Number = currentGroupCpus;
+ g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j;
+ currentGroupCpus++;
+ }
+ }
+
+ if (currentGroupCpus != 0)
+ {
+ g_groupToCpuCount[currentGroup] = currentGroupCpus;
+ g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus);
+ currentGroupCpus = 0;
+ currentGroup++;
+ }
+ }
+
+ numa_free_cpumask(mask);
+
+ g_highestNumaNode = numa_max_node();
+ }
+ else
+#endif // HAVE_NUMA_H
+ {
+ // No NUMA
+ g_cpuCount = GetNumberOfProcessors();
+ g_groupCount = 1;
+ g_highestNumaNode = 0;
+
+ AllocateLookupArrays();
+ }
+
+ return TRUE;
+}
+
+/*++
+Function:
+ NUMASupportCleanup
+
+Cleanup of the NUMA support data structures
+--*/
+VOID
+NUMASupportCleanup()
+{
+ FreeLookupArrays();
+}
+
+/*++
+Function:
+ GetNumaHighestNodeNumber
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+GetNumaHighestNodeNumber(
+ OUT PULONG HighestNodeNumber
+)
+{
+ PERF_ENTRY(GetNumaHighestNodeNumber);
+ ENTRY("GetNumaHighestNodeNumber(HighestNodeNumber=%p)\n", HighestNodeNumber);
+ *HighestNodeNumber = (ULONG)g_highestNumaNode;
+
+ BOOL success = TRUE;
+
+ LOGEXIT("GetNumaHighestNodeNumber returns BOOL %d\n", success);
+ PERF_EXIT(GetNumaHighestNodeNumber);
+
+ return success;
+}
+
+/*++
+Function:
+ GetNumaProcessorNodeEx
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+GetNumaProcessorNodeEx(
+ IN PPROCESSOR_NUMBER Processor,
+ OUT PUSHORT NodeNumber
+)
+{
+ PERF_ENTRY(GetNumaProcessorNodeEx);
+ ENTRY("GetNumaProcessorNodeEx(Processor=%p, NodeNumber=%p)\n", Processor, NodeNumber);
+
+ BOOL success = FALSE;
+
+ if ((Processor->Group < g_groupCount) &&
+ (Processor->Number < MaxCpusPerGroup) &&
+ (Processor->Reserved == 0))
+ {
+ short cpu = g_groupAndIndexToCpu[Processor->Group * MaxCpusPerGroup + Processor->Number];
+ if (cpu != -1)
+ {
+ *NodeNumber = g_cpuToAffinity[cpu].Node;
+ success = TRUE;
+ }
+ }
+
+ if (!success)
+ {
+ *NodeNumber = 0xffff;
+ SetLastError(ERROR_INVALID_PARAMETER);
+ }
+
+ LOGEXIT("GetNumaProcessorNodeEx returns BOOL %d\n", success);
+ PERF_EXIT(GetNumaProcessorNodeEx);
+
+ return success;
+}
+
+/*++
+Function:
+ GetLogicalProcessorInformationEx
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+GetLogicalProcessorInformationEx(
+ IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType,
+ OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer,
+ IN OUT PDWORD ReturnedLength
+)
+{
+ PERF_ENTRY(GetLogicalProcessorInformationEx);
+ ENTRY("GetLogicalProcessorInformationEx(RelationshipType=%d, Buffer=%p, ReturnedLength=%p)\n", RelationshipType, Buffer, ReturnedLength);
+
+ BOOL success = FALSE;
+
+ if (RelationshipType == RelationGroup)
+ {
+ size_t requiredSize = __builtin_offsetof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Group);
+ requiredSize += __builtin_offsetof(GROUP_RELATIONSHIP, GroupInfo);
+ requiredSize += g_groupCount * sizeof(PROCESSOR_GROUP_INFO);
+
+ if (*ReturnedLength >= requiredSize)
+ {
+ Buffer->Relationship = RelationGroup;
+ Buffer->Size = requiredSize;
+ Buffer->Group.MaximumGroupCount = g_groupCount;
+ Buffer->Group.ActiveGroupCount = g_groupCount;
+ for (int i = 0; i < g_groupCount; i++)
+ {
+ Buffer->Group.GroupInfo[i].MaximumProcessorCount = MaxCpusPerGroup;
+ Buffer->Group.GroupInfo[i].ActiveProcessorCount = g_groupToCpuCount[i];
+ Buffer->Group.GroupInfo[i].ActiveProcessorMask = g_groupToCpuMask[i];
+ }
+
+ success = TRUE;
+ }
+ else
+ {
+ SetLastError(ERROR_INSUFFICIENT_BUFFER);
+ }
+
+ *ReturnedLength = requiredSize;
+ }
+ else
+ {
+ // We only support the group relationship
+ SetLastError(ERROR_INVALID_PARAMETER);
+ }
+
+ LOGEXIT("GetLogicalProcessorInformationEx returns BOOL %d\n", success);
+ PERF_EXIT(GetLogicalProcessorInformationEx);
+
+ return success;
+}
+
+/*++
+Function:
+ GetThreadGroupAffinityInternal
+
+Get the group affinity for the specified pthread
+--*/
+BOOL
+GetThreadGroupAffinityInternal(
+ IN pthread_t thread,
+ OUT PGROUP_AFFINITY GroupAffinity
+)
+{
+ BOOL success = FALSE;
+
+#if HAVE_PTHREAD_GETAFFINITY_NP
+ cpu_set_t cpuSet;
+
+ int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
+
+ if (st == 0)
+ {
+ WORD group = NO_GROUP;
+ KAFFINITY mask = 0;
+
+ for (int i = 0; i < g_cpuCount; i++)
+ {
+ if (CPU_ISSET(i, &cpuSet))
+ {
+ WORD g = g_cpuToAffinity[i].Group;
+ // Unless the thread affinity was already set by SetThreadGroupAffinity, it is possible that
+ // the current thread has affinity with processors from multiple groups. So we report just the
+ // first group we find.
+ if (group == NO_GROUP || g == group)
+ {
+ group = g;
+ mask |= ((KAFFINITY)1) << g_cpuToAffinity[i].Number;
+ }
+ }
+ }
+
+ GroupAffinity->Group = group;
+ GroupAffinity->Mask = mask;
+ success = TRUE;
+ }
+ else
+ {
+ SetLastError(ERROR_GEN_FAILURE);
+ }
+#else // HAVE_PTHREAD_GETAFFINITY_NP
+ // There is no API to manage thread affinity, so let's return a group affinity
+ // with all the CPUs on the system.
+ GroupAffinity->Group = 0;
+ GroupAffinity->Mask = GetFullAffinityMask(g_cpuCount);
+ success = TRUE;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
+
+ return success;
+}
+
+/*++
+Function:
+ GetThreadGroupAffinity
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+GetThreadGroupAffinity(
+ IN HANDLE hThread,
+ OUT PGROUP_AFFINITY GroupAffinity
+)
+{
+ PERF_ENTRY(GetThreadGroupAffinity);
+ ENTRY("GetThreadGroupAffinity(hThread=%p, GroupAffinity=%p)\n", hThread, GroupAffinity);
+
+ CPalThread *palThread = InternalGetCurrentThread();
+
+ BOOL success = GetThreadGroupAffinityInternal(palThread->GetPThreadSelf(), GroupAffinity);
+
+ LOGEXIT("GetThreadGroupAffinity returns BOOL %d\n", success);
+ PERF_EXIT(GetThreadGroupAffinity);
+
+ return success;
+}
+
+
+/*++
+Function:
+ SetThreadGroupAffinity
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+SetThreadGroupAffinity(
+ IN HANDLE hThread,
+ IN const GROUP_AFFINITY *GroupAffinity,
+ OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity
+)
+{
+ PERF_ENTRY(SetThreadGroupAffinity);
+ ENTRY("SetThreadGroupAffinity(hThread=%p, GroupAffinity=%p, PreviousGroupAffinity=%p)\n", hThread, GroupAffinity, PreviousGroupAffinity);
+
+ CPalThread *palThread = InternalGetCurrentThread();
+
+ pthread_t thread = palThread->GetPThreadSelf();
+
+ if (PreviousGroupAffinity != NULL)
+ {
+ GetThreadGroupAffinityInternal(thread, PreviousGroupAffinity);
+ }
+
+#if HAVE_PTHREAD_GETAFFINITY_NP
+ int groupStartIndex = GroupAffinity->Group * MaxCpusPerGroup;
+ KAFFINITY mask = 1;
+ cpu_set_t cpuSet;
+ CPU_ZERO(&cpuSet);
+
+ for (int i = 0; i < MaxCpusPerGroup; i++, mask <<= 1)
+ {
+ if (GroupAffinity->Mask & mask)
+ {
+ int cpu = g_groupAndIndexToCpu[groupStartIndex + i];
+ if (cpu != -1)
+ {
+ CPU_SET(cpu, &cpuSet);
+ }
+ }
+ }
+
+ int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
+
+ if (st == -1)
+ {
+ switch (errno)
+ {
+ case EINVAL:
+ // There is no processor in the mask that is allowed to execute the process
+ SetLastError(ERROR_INVALID_PARAMETER);
+ break;
+ case EPERM:
+ SetLastError(ERROR_ACCESS_DENIED);
+ break;
+ default:
+ SetLastError(ERROR_GEN_FAILURE);
+ break;
+ }
+ }
+
+ BOOL success = (st == 0);
+#else // HAVE_PTHREAD_GETAFFINITY_NP
+ // There is no API to manage thread affinity, so let's ignore the request
+ BOOL success = TRUE;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
+
+ LOGEXIT("SetThreadGroupAffinity returns BOOL %d\n", success);
+ PERF_EXIT(SetThreadGroupAffinity);
+
+ return success;
+}
+
+/*++
+Function:
+ GetCurrentProcessorNumberEx
+
+See MSDN doc.
+--*/
+VOID
+PALAPI
+GetCurrentProcessorNumberEx(
+ OUT PPROCESSOR_NUMBER ProcNumber
+)
+{
+ PERF_ENTRY(GetCurrentProcessorNumberEx);
+ ENTRY("GetCurrentProcessorNumberEx(ProcNumber=%p\n", ProcNumber);
+
+ DWORD cpu = GetCurrentProcessorNumber();
+ _ASSERTE(cpu < g_cpuCount);
+ ProcNumber->Group = g_cpuToAffinity[cpu].Group;
+ ProcNumber->Number = g_cpuToAffinity[cpu].Number;
+
+ LOGEXIT("GetCurrentProcessorNumberEx\n");
+ PERF_EXIT(GetCurrentProcessorNumberEx);
+}
+
+/*++
+Function:
+ GetProcessAffinityMask
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+GetProcessAffinityMask(
+ IN HANDLE hProcess,
+ OUT PDWORD_PTR lpProcessAffinityMask,
+ OUT PDWORD_PTR lpSystemAffinityMask
+)
+{
+ PERF_ENTRY(GetProcessAffinityMask);
+ ENTRY("GetProcessAffinityMask(hProcess=%p, lpProcessAffinityMask=%p, lpSystemAffinityMask=%p\n", hProcess, lpProcessAffinityMask, lpSystemAffinityMask);
+
+ BOOL success = FALSE;
+
+ if (hProcess == GetCurrentProcess())
+ {
+ DWORD_PTR systemMask = GetFullAffinityMask(g_cpuCount);
+
+#if HAVE_SCHED_GETAFFINITY
+ int pid = getpid();
+ cpu_set_t cpuSet;
+ int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet);
+ if (st == 0)
+ {
+ WORD group = NO_GROUP;
+ DWORD_PTR processMask = 0;
+
+ for (int i = 0; i < g_cpuCount; i++)
+ {
+ if (CPU_ISSET(i, &cpuSet))
+ {
+ WORD g = g_cpuToAffinity[i].Group;
+ if (group == NO_GROUP || g == group)
+ {
+ group = g;
+ processMask |= ((DWORD_PTR)1) << g_cpuToAffinity[i].Number;
+ }
+ else
+ {
+ // The process has affinity in more than one group, in such case
+ // the function needs to return zero in both masks.
+ processMask = 0;
+ systemMask = 0;
+ group = NO_GROUP;
+ break;
+ }
+ }
+ }
+
+ success = TRUE;
+
+ *lpProcessAffinityMask = processMask;
+ *lpSystemAffinityMask = systemMask;
+ }
+ else
+ {
+ // We should not get any of the errors that the sched_getaffinity can return since none
+ // of them applies for the current thread, so this is an unexpected kind of failure.
+ SetLastError(ERROR_GEN_FAILURE);
+ }
+#else // HAVE_SCHED_GETAFFINITY
+ // There is no API to manage thread affinity, so let's return both affinity masks
+ // with all the CPUs on the system set.
+ *lpSystemAffinityMask = systemMask;
+ *lpProcessAffinityMask = systemMask;
+
+ success = TRUE;
+#endif // HAVE_SCHED_GETAFFINITY
+ }
+ else
+ {
+ // PAL supports getting affinity mask for the current process only
+ SetLastError(ERROR_INVALID_PARAMETER);
+ }
+
+ LOGEXIT("GetProcessAffinityMask returns BOOL %d\n", success);
+ PERF_EXIT(GetProcessAffinityMask);
+
+ return success;
+}
+
+/*++
+Function:
+ VirtualAllocExNuma
+
+See MSDN doc.
+--*/
+LPVOID
+PALAPI
+VirtualAllocExNuma(
+ IN HANDLE hProcess,
+ IN OPTIONAL LPVOID lpAddress,
+ IN SIZE_T dwSize,
+ IN DWORD flAllocationType,
+ IN DWORD flProtect,
+ IN DWORD nndPreferred
+)
+{
+ PERF_ENTRY(VirtualAllocExNuma);
+ ENTRY("VirtualAllocExNuma(hProcess=%p, lpAddress=%p, dwSize=%u, flAllocationType=%#x, flProtect=%#x, nndPreferred=%d\n",
+ hProcess, lpAddress, dwSize, flAllocationType, flProtect, nndPreferred);
+
+ LPVOID result = NULL;
+
+ if (hProcess == GetCurrentProcess())
+ {
+ if (nndPreferred <= g_highestNumaNode)
+ {
+ result = VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect);
+#if HAVE_NUMA_H
+ if (result != NULL)
+ {
+ int nodeMaskLength = (g_highestNumaNode + 1 + sizeof(unsigned long) - 1) / sizeof(unsigned long);
+ unsigned long *nodeMask = new unsigned long[nodeMaskLength];
+
+ memset(nodeMask, 0, nodeMaskLength);
+
+ int index = nndPreferred / sizeof(unsigned long);
+ int mask = ((unsigned long)1) << (nndPreferred & (sizeof(unsigned long) - 1));
+ nodeMask[index] = mask;
+
+ int st = mbind(result, dwSize, MPOL_PREFERRED, nodeMask, g_highestNumaNode, 0);
+ free(nodeMask);
+ _ASSERTE(st == 0);
+ // If the mbind fails, we still return the allocated memory since the nndPreferred is just a hint
+ }
+#endif // HAVE_NUMA_H
+ }
+ else
+ {
+ // The specified node number is larger than the maximum available one
+ SetLastError(ERROR_INVALID_PARAMETER);
+ }
+ }
+ else
+ {
+ // PAL supports allocating from the current process virtual space only
+ SetLastError(ERROR_INVALID_PARAMETER);
+ }
+
+ LOGEXIT("VirtualAllocExNuma returns %p\n", result);
+ PERF_EXIT(VirtualAllocExNuma);
+
+ return result;
+}