summaryrefslogtreecommitdiff
path: root/src/pal/src/numa/numa.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/pal/src/numa/numa.cpp')
-rw-r--r--src/pal/src/numa/numa.cpp692
1 files changed, 692 insertions, 0 deletions
diff --git a/src/pal/src/numa/numa.cpp b/src/pal/src/numa/numa.cpp
new file mode 100644
index 0000000000..549c10a71f
--- /dev/null
+++ b/src/pal/src/numa/numa.cpp
@@ -0,0 +1,692 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*++
+
+
+
+Module Name:
+
+ numa.cpp
+
+Abstract:
+
+ Implementation of NUMA related APIs
+
+--*/
+
+#include "pal/dbgmsg.h"
+SET_DEFAULT_DEBUG_CHANNEL(NUMA);
+
+#include "pal/palinternal.h"
+#include "pal/dbgmsg.h"
+#include "pal/numa.h"
+#include "pal/corunix.hpp"
+#include "pal/thread.hpp"
+
+#if HAVE_NUMA_H
+#include <numa.h>
+#include <numaif.h>
+#endif
+
+#if HAVE_PTHREAD_NP_H
+#include <pthread_np.h>
+#endif
+
+#include <pthread.h>
+
+using namespace CorUnix;
+
+#if HAVE_CPUSET_T
+typedef cpuset_t cpu_set_t;
+#endif
+
+int GetNumberOfProcessors();
+
+// CPU affinity descriptor
+struct CpuAffinity
+{
+ // NUMA node
+ BYTE Node;
+ // CPU number relative to the group the CPU is in
+ BYTE Number;
+ // CPU group
+ WORD Group;
+};
+
+// Array mapping global CPU index to its affinity
+CpuAffinity *g_cpuToAffinity = NULL;
+
+// Array mapping CPU group and index in the group to the global CPU index
+short *g_groupAndIndexToCpu = NULL;
+// Array mapping CPU group to the corresponding affinity mask of the CPUs in the group
+KAFFINITY *g_groupToCpuMask = NULL;
+// Array mapping CPU group to the number of processors in the group
+BYTE *g_groupToCpuCount = NULL;
+
+// Total number of processors in the system
+int g_cpuCount = 0;
+// Total number of CPU groups
+int g_groupCount = 0;
+// The highest NUMA node available
+int g_highestNumaNode = 0;
+
+static const int MaxCpusPerGroup = 8 * sizeof(KAFFINITY);
+static const WORD NO_GROUP = 0xffff;
+
+/*++
+Function:
+ AllocateLookupArrays
+
+Allocate CPU and group lookup arrays
+--*/
+VOID
+AllocateLookupArrays()
+{
+ g_groupAndIndexToCpu = (short*)malloc(g_groupCount * MaxCpusPerGroup * sizeof(short));
+ g_cpuToAffinity = (CpuAffinity*)malloc(g_cpuCount * sizeof(CpuAffinity));
+ g_groupToCpuMask = (KAFFINITY*)malloc(g_groupCount * sizeof(KAFFINITY));
+ g_groupToCpuCount = (BYTE*)malloc(g_groupCount * sizeof(BYTE));
+
+ memset(g_groupAndIndexToCpu, 0xff, g_groupCount * MaxCpusPerGroup * sizeof(short));
+ memset(g_cpuToAffinity, 0xff, g_cpuCount * sizeof(CpuAffinity));
+ memset(g_groupToCpuMask, 0, g_groupCount * sizeof(KAFFINITY));
+ memset(g_groupToCpuCount, 0, g_groupCount * sizeof(BYTE));
+}
+
+/*++
+Function:
+ FreeLookupArrays
+
+Free CPU and group lookup arrays
+--*/
+VOID
+FreeLookupArrays()
+{
+ free(g_groupAndIndexToCpu);
+ free(g_cpuToAffinity);
+ free(g_groupToCpuMask);
+ free(g_groupToCpuCount);
+
+ g_groupAndIndexToCpu = NULL;
+ g_cpuToAffinity = NULL;
+ g_groupToCpuMask = NULL;
+ g_groupToCpuCount = NULL;
+}
+
+/*++
+Function:
+ GetFullAffinityMask
+
+Get affinity mask for the specified number of processors with all
+the processors enabled.
+--*/
+KAFFINITY GetFullAffinityMask(int cpuCount)
+{
+ return ((KAFFINITY)1 << (cpuCount)) - 1;
+}
+
+/*++
+Function:
+ NUMASupportInitialize
+
+Initialize data structures for getting and setting thread affinities to processors and
+querying NUMA related processor information.
+On systems with no NUMA support, it behaves as if there was a single NUMA node with
+a single group of processors.
+--*/
+BOOL
+NUMASupportInitialize()
+{
+#if HAVE_NUMA_H
+ if (numa_available() != -1)
+ {
+ struct bitmask *mask = numa_allocate_cpumask();
+ int numaNodesCount = numa_max_node() + 1;
+
+ g_cpuCount = numa_num_possible_cpus();
+ g_groupCount = 0;
+
+ for (int i = 0; i < numaNodesCount; i++)
+ {
+ int st = numa_node_to_cpus(i, mask);
+ // The only failure that can happen is that the mask is not large enough
+ // but that cannot happen since the mask was allocated by numa_allocate_cpumask
+ _ASSERTE(st == 0);
+ unsigned int nodeCpuCount = numa_bitmask_weight(mask);
+ unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
+ g_groupCount += nodeGroupCount;
+ }
+
+ AllocateLookupArrays();
+
+ WORD currentGroup = 0;
+ int currentGroupCpus = 0;
+
+ for (int i = 0; i < numaNodesCount; i++)
+ {
+ int st = numa_node_to_cpus(i, mask);
+ // The only failure that can happen is that the mask is not large enough
+ // but that cannot happen since the mask was allocated by numa_allocate_cpumask
+ _ASSERTE(st == 0);
+ unsigned int nodeCpuCount = numa_bitmask_weight(mask);
+ unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
+ for (int j = 0; j < g_cpuCount; j++)
+ {
+ if (numa_bitmask_isbitset(mask, j))
+ {
+ if (currentGroupCpus == MaxCpusPerGroup)
+ {
+ g_groupToCpuCount[currentGroup] = MaxCpusPerGroup;
+ g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup);
+ currentGroupCpus = 0;
+ currentGroup++;
+ }
+ g_cpuToAffinity[j].Node = i;
+ g_cpuToAffinity[j].Group = currentGroup;
+ g_cpuToAffinity[j].Number = currentGroupCpus;
+ g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j;
+ currentGroupCpus++;
+ }
+ }
+
+ if (currentGroupCpus != 0)
+ {
+ g_groupToCpuCount[currentGroup] = currentGroupCpus;
+ g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus);
+ currentGroupCpus = 0;
+ currentGroup++;
+ }
+ }
+
+ numa_free_cpumask(mask);
+
+ g_highestNumaNode = numa_max_node();
+ }
+ else
+#endif // HAVE_NUMA_H
+ {
+ // No NUMA
+ g_cpuCount = GetNumberOfProcessors();
+ g_groupCount = 1;
+ g_highestNumaNode = 0;
+
+ AllocateLookupArrays();
+ }
+
+ return TRUE;
+}
+
+/*++
+Function:
+ NUMASupportCleanup
+
+Cleanup of the NUMA support data structures
+--*/
+VOID
+NUMASupportCleanup()
+{
+ FreeLookupArrays();
+}
+
+/*++
+Function:
+ GetNumaHighestNodeNumber
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+GetNumaHighestNodeNumber(
+ OUT PULONG HighestNodeNumber
+)
+{
+ PERF_ENTRY(GetNumaHighestNodeNumber);
+ ENTRY("GetNumaHighestNodeNumber(HighestNodeNumber=%p)\n", HighestNodeNumber);
+ *HighestNodeNumber = (ULONG)g_highestNumaNode;
+
+ BOOL success = TRUE;
+
+ LOGEXIT("GetNumaHighestNodeNumber returns BOOL %d\n", success);
+ PERF_EXIT(GetNumaHighestNodeNumber);
+
+ return success;
+}
+
+/*++
+Function:
+ GetNumaProcessorNodeEx
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+GetNumaProcessorNodeEx(
+ IN PPROCESSOR_NUMBER Processor,
+ OUT PUSHORT NodeNumber
+)
+{
+ PERF_ENTRY(GetNumaProcessorNodeEx);
+ ENTRY("GetNumaProcessorNodeEx(Processor=%p, NodeNumber=%p)\n", Processor, NodeNumber);
+
+ BOOL success = FALSE;
+
+ if ((Processor->Group < g_groupCount) &&
+ (Processor->Number < MaxCpusPerGroup) &&
+ (Processor->Reserved == 0))
+ {
+ short cpu = g_groupAndIndexToCpu[Processor->Group * MaxCpusPerGroup + Processor->Number];
+ if (cpu != -1)
+ {
+ *NodeNumber = g_cpuToAffinity[cpu].Node;
+ success = TRUE;
+ }
+ }
+
+ if (!success)
+ {
+ *NodeNumber = 0xffff;
+ SetLastError(ERROR_INVALID_PARAMETER);
+ }
+
+ LOGEXIT("GetNumaProcessorNodeEx returns BOOL %d\n", success);
+ PERF_EXIT(GetNumaProcessorNodeEx);
+
+ return success;
+}
+
+/*++
+Function:
+ GetLogicalProcessorInformationEx
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+GetLogicalProcessorInformationEx(
+ IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType,
+ OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer,
+ IN OUT PDWORD ReturnedLength
+)
+{
+ PERF_ENTRY(GetLogicalProcessorInformationEx);
+ ENTRY("GetLogicalProcessorInformationEx(RelationshipType=%d, Buffer=%p, ReturnedLength=%p)\n", RelationshipType, Buffer, ReturnedLength);
+
+ BOOL success = FALSE;
+
+ if (RelationshipType == RelationGroup)
+ {
+ size_t requiredSize = __builtin_offsetof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Group);
+ requiredSize += __builtin_offsetof(GROUP_RELATIONSHIP, GroupInfo);
+ requiredSize += g_groupCount * sizeof(PROCESSOR_GROUP_INFO);
+
+ if (*ReturnedLength >= requiredSize)
+ {
+ Buffer->Relationship = RelationGroup;
+ Buffer->Size = requiredSize;
+ Buffer->Group.MaximumGroupCount = g_groupCount;
+ Buffer->Group.ActiveGroupCount = g_groupCount;
+ for (int i = 0; i < g_groupCount; i++)
+ {
+ Buffer->Group.GroupInfo[i].MaximumProcessorCount = MaxCpusPerGroup;
+ Buffer->Group.GroupInfo[i].ActiveProcessorCount = g_groupToCpuCount[i];
+ Buffer->Group.GroupInfo[i].ActiveProcessorMask = g_groupToCpuMask[i];
+ }
+
+ success = TRUE;
+ }
+ else
+ {
+ SetLastError(ERROR_INSUFFICIENT_BUFFER);
+ }
+
+ *ReturnedLength = requiredSize;
+ }
+ else
+ {
+ // We only support the group relationship
+ SetLastError(ERROR_INVALID_PARAMETER);
+ }
+
+ LOGEXIT("GetLogicalProcessorInformationEx returns BOOL %d\n", success);
+ PERF_EXIT(GetLogicalProcessorInformationEx);
+
+ return success;
+}
+
+/*++
+Function:
+ GetThreadGroupAffinityInternal
+
+Get the group affinity for the specified pthread
+--*/
+BOOL
+GetThreadGroupAffinityInternal(
+ IN pthread_t thread,
+ OUT PGROUP_AFFINITY GroupAffinity
+)
+{
+ BOOL success = FALSE;
+
+#if HAVE_PTHREAD_GETAFFINITY_NP
+ cpu_set_t cpuSet;
+
+ int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
+
+ if (st == 0)
+ {
+ WORD group = NO_GROUP;
+ KAFFINITY mask = 0;
+
+ for (int i = 0; i < g_cpuCount; i++)
+ {
+ if (CPU_ISSET(i, &cpuSet))
+ {
+ WORD g = g_cpuToAffinity[i].Group;
+ // Unless the thread affinity was already set by SetThreadGroupAffinity, it is possible that
+ // the current thread has affinity with processors from multiple groups. So we report just the
+ // first group we find.
+ if (group == NO_GROUP || g == group)
+ {
+ group = g;
+ mask |= ((KAFFINITY)1) << g_cpuToAffinity[i].Number;
+ }
+ }
+ }
+
+ GroupAffinity->Group = group;
+ GroupAffinity->Mask = mask;
+ success = TRUE;
+ }
+ else
+ {
+ SetLastError(ERROR_GEN_FAILURE);
+ }
+#else // HAVE_PTHREAD_GETAFFINITY_NP
+ // There is no API to manage thread affinity, so let's return a group affinity
+ // with all the CPUs on the system.
+ GroupAffinity->Group = 0;
+ GroupAffinity->Mask = GetFullAffinityMask(g_cpuCount);
+ success = TRUE;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
+
+ return success;
+}
+
+/*++
+Function:
+ GetThreadGroupAffinity
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+GetThreadGroupAffinity(
+ IN HANDLE hThread,
+ OUT PGROUP_AFFINITY GroupAffinity
+)
+{
+ PERF_ENTRY(GetThreadGroupAffinity);
+ ENTRY("GetThreadGroupAffinity(hThread=%p, GroupAffinity=%p)\n", hThread, GroupAffinity);
+
+ CPalThread *palThread = InternalGetCurrentThread();
+
+ BOOL success = GetThreadGroupAffinityInternal(palThread->GetPThreadSelf(), GroupAffinity);
+
+ LOGEXIT("GetThreadGroupAffinity returns BOOL %d\n", success);
+ PERF_EXIT(GetThreadGroupAffinity);
+
+ return success;
+}
+
+
+/*++
+Function:
+ SetThreadGroupAffinity
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+SetThreadGroupAffinity(
+ IN HANDLE hThread,
+ IN const GROUP_AFFINITY *GroupAffinity,
+ OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity
+)
+{
+ PERF_ENTRY(SetThreadGroupAffinity);
+ ENTRY("SetThreadGroupAffinity(hThread=%p, GroupAffinity=%p, PreviousGroupAffinity=%p)\n", hThread, GroupAffinity, PreviousGroupAffinity);
+
+ CPalThread *palThread = InternalGetCurrentThread();
+
+ pthread_t thread = palThread->GetPThreadSelf();
+
+ if (PreviousGroupAffinity != NULL)
+ {
+ GetThreadGroupAffinityInternal(thread, PreviousGroupAffinity);
+ }
+
+#if HAVE_PTHREAD_GETAFFINITY_NP
+ int groupStartIndex = GroupAffinity->Group * MaxCpusPerGroup;
+ KAFFINITY mask = 1;
+ cpu_set_t cpuSet;
+ CPU_ZERO(&cpuSet);
+
+ for (int i = 0; i < MaxCpusPerGroup; i++, mask <<= 1)
+ {
+ if (GroupAffinity->Mask & mask)
+ {
+ int cpu = g_groupAndIndexToCpu[groupStartIndex + i];
+ if (cpu != -1)
+ {
+ CPU_SET(cpu, &cpuSet);
+ }
+ }
+ }
+
+ int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
+
+ if (st == -1)
+ {
+ switch (errno)
+ {
+ case EINVAL:
+ // There is no processor in the mask that is allowed to execute the process
+ SetLastError(ERROR_INVALID_PARAMETER);
+ break;
+ case EPERM:
+ SetLastError(ERROR_ACCESS_DENIED);
+ break;
+ default:
+ SetLastError(ERROR_GEN_FAILURE);
+ break;
+ }
+ }
+
+ BOOL success = (st == 0);
+#else // HAVE_PTHREAD_GETAFFINITY_NP
+ // There is no API to manage thread affinity, so let's ignore the request
+ BOOL success = TRUE;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
+
+ LOGEXIT("SetThreadGroupAffinity returns BOOL %d\n", success);
+ PERF_EXIT(SetThreadGroupAffinity);
+
+ return success;
+}
+
+/*++
+Function:
+ GetCurrentProcessorNumberEx
+
+See MSDN doc.
+--*/
+VOID
+PALAPI
+GetCurrentProcessorNumberEx(
+ OUT PPROCESSOR_NUMBER ProcNumber
+)
+{
+ PERF_ENTRY(GetCurrentProcessorNumberEx);
+ ENTRY("GetCurrentProcessorNumberEx(ProcNumber=%p\n", ProcNumber);
+
+ DWORD cpu = GetCurrentProcessorNumber();
+ _ASSERTE(cpu < g_cpuCount);
+ ProcNumber->Group = g_cpuToAffinity[cpu].Group;
+ ProcNumber->Number = g_cpuToAffinity[cpu].Number;
+
+ LOGEXIT("GetCurrentProcessorNumberEx\n");
+ PERF_EXIT(GetCurrentProcessorNumberEx);
+}
+
+/*++
+Function:
+ GetProcessAffinityMask
+
+See MSDN doc.
+--*/
+BOOL
+PALAPI
+GetProcessAffinityMask(
+ IN HANDLE hProcess,
+ OUT PDWORD_PTR lpProcessAffinityMask,
+ OUT PDWORD_PTR lpSystemAffinityMask
+)
+{
+ PERF_ENTRY(GetProcessAffinityMask);
+ ENTRY("GetProcessAffinityMask(hProcess=%p, lpProcessAffinityMask=%p, lpSystemAffinityMask=%p\n", hProcess, lpProcessAffinityMask, lpSystemAffinityMask);
+
+ BOOL success = FALSE;
+
+ if (hProcess == GetCurrentProcess())
+ {
+ DWORD_PTR systemMask = GetFullAffinityMask(g_cpuCount);
+
+#if HAVE_SCHED_GETAFFINITY
+ int pid = getpid();
+ cpu_set_t cpuSet;
+ int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet);
+ if (st == 0)
+ {
+ WORD group = NO_GROUP;
+ DWORD_PTR processMask = 0;
+
+ for (int i = 0; i < g_cpuCount; i++)
+ {
+ if (CPU_ISSET(i, &cpuSet))
+ {
+ WORD g = g_cpuToAffinity[i].Group;
+ if (group == NO_GROUP || g == group)
+ {
+ group = g;
+ processMask |= ((DWORD_PTR)1) << g_cpuToAffinity[i].Number;
+ }
+ else
+ {
+ // The process has affinity in more than one group, in such case
+ // the function needs to return zero in both masks.
+ processMask = 0;
+ systemMask = 0;
+ group = NO_GROUP;
+ break;
+ }
+ }
+ }
+
+ success = TRUE;
+
+ *lpProcessAffinityMask = processMask;
+ *lpSystemAffinityMask = systemMask;
+ }
+ else
+ {
+ // We should not get any of the errors that the sched_getaffinity can return since none
+ // of them applies for the current thread, so this is an unexpected kind of failure.
+ SetLastError(ERROR_GEN_FAILURE);
+ }
+#else // HAVE_SCHED_GETAFFINITY
+ // There is no API to manage thread affinity, so let's return both affinity masks
+ // with all the CPUs on the system set.
+ *lpSystemAffinityMask = systemMask;
+ *lpProcessAffinityMask = systemMask;
+
+ success = TRUE;
+#endif // HAVE_SCHED_GETAFFINITY
+ }
+ else
+ {
+ // PAL supports getting affinity mask for the current process only
+ SetLastError(ERROR_INVALID_PARAMETER);
+ }
+
+ LOGEXIT("GetProcessAffinityMask returns BOOL %d\n", success);
+ PERF_EXIT(GetProcessAffinityMask);
+
+ return success;
+}
+
+/*++
+Function:
+ VirtualAllocExNuma
+
+See MSDN doc.
+--*/
+LPVOID
+PALAPI
+VirtualAllocExNuma(
+ IN HANDLE hProcess,
+ IN OPTIONAL LPVOID lpAddress,
+ IN SIZE_T dwSize,
+ IN DWORD flAllocationType,
+ IN DWORD flProtect,
+ IN DWORD nndPreferred
+)
+{
+ PERF_ENTRY(VirtualAllocExNuma);
+ ENTRY("VirtualAllocExNuma(hProcess=%p, lpAddress=%p, dwSize=%u, flAllocationType=%#x, flProtect=%#x, nndPreferred=%d\n",
+ hProcess, lpAddress, dwSize, flAllocationType, flProtect, nndPreferred);
+
+ LPVOID result = NULL;
+
+ if (hProcess == GetCurrentProcess())
+ {
+ if (nndPreferred <= g_highestNumaNode)
+ {
+ result = VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect);
+#if HAVE_NUMA_H
+ if (result != NULL)
+ {
+ int nodeMaskLength = (g_highestNumaNode + 1 + sizeof(unsigned long) - 1) / sizeof(unsigned long);
+ unsigned long *nodeMask = new unsigned long[nodeMaskLength];
+
+ memset(nodeMask, 0, nodeMaskLength);
+
+ int index = nndPreferred / sizeof(unsigned long);
+ int mask = ((unsigned long)1) << (nndPreferred & (sizeof(unsigned long) - 1));
+ nodeMask[index] = mask;
+
+ int st = mbind(result, dwSize, MPOL_PREFERRED, nodeMask, g_highestNumaNode, 0);
+ free(nodeMask);
+ _ASSERTE(st == 0);
+ // If the mbind fails, we still return the allocated memory since the nndPreferred is just a hint
+ }
+#endif // HAVE_NUMA_H
+ }
+ else
+ {
+ // The specified node number is larger than the maximum available one
+ SetLastError(ERROR_INVALID_PARAMETER);
+ }
+ }
+ else
+ {
+ // PAL supports allocating from the current process virtual space only
+ SetLastError(ERROR_INVALID_PARAMETER);
+ }
+
+ LOGEXIT("VirtualAllocExNuma returns %p\n", result);
+ PERF_EXIT(VirtualAllocExNuma);
+
+ return result;
+}