diff options
Diffstat (limited to 'src/pal/src/numa/numa.cpp')
-rw-r--r-- | src/pal/src/numa/numa.cpp | 692 |
1 files changed, 692 insertions, 0 deletions
diff --git a/src/pal/src/numa/numa.cpp b/src/pal/src/numa/numa.cpp new file mode 100644 index 0000000000..549c10a71f --- /dev/null +++ b/src/pal/src/numa/numa.cpp @@ -0,0 +1,692 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*++ + + + +Module Name: + + numa.cpp + +Abstract: + + Implementation of NUMA related APIs + +--*/ + +#include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(NUMA); + +#include "pal/palinternal.h" +#include "pal/dbgmsg.h" +#include "pal/numa.h" +#include "pal/corunix.hpp" +#include "pal/thread.hpp" + +#if HAVE_NUMA_H +#include <numa.h> +#include <numaif.h> +#endif + +#if HAVE_PTHREAD_NP_H +#include <pthread_np.h> +#endif + +#include <pthread.h> + +using namespace CorUnix; + +#if HAVE_CPUSET_T +typedef cpuset_t cpu_set_t; +#endif + +int GetNumberOfProcessors(); + +// CPU affinity descriptor +struct CpuAffinity +{ + // NUMA node + BYTE Node; + // CPU number relative to the group the CPU is in + BYTE Number; + // CPU group + WORD Group; +}; + +// Array mapping global CPU index to its affinity +CpuAffinity *g_cpuToAffinity = NULL; + +// Array mapping CPU group and index in the group to the global CPU index +short *g_groupAndIndexToCpu = NULL; +// Array mapping CPU group to the corresponding affinity mask of the CPUs in the group +KAFFINITY *g_groupToCpuMask = NULL; +// Array mapping CPU group to the number of processors in the group +BYTE *g_groupToCpuCount = NULL; + +// Total number of processors in the system +int g_cpuCount = 0; +// Total number of CPU groups +int g_groupCount = 0; +// The highest NUMA node available +int g_highestNumaNode = 0; + +static const int MaxCpusPerGroup = 8 * sizeof(KAFFINITY); +static const WORD NO_GROUP = 0xffff; + +/*++ +Function: + AllocateLookupArrays + +Allocate CPU and group lookup arrays +--*/ +VOID +AllocateLookupArrays() +{ + g_groupAndIndexToCpu = (short*)malloc(g_groupCount * MaxCpusPerGroup * sizeof(short)); + g_cpuToAffinity = (CpuAffinity*)malloc(g_cpuCount * sizeof(CpuAffinity)); + g_groupToCpuMask = (KAFFINITY*)malloc(g_groupCount * sizeof(KAFFINITY)); + g_groupToCpuCount = (BYTE*)malloc(g_groupCount * sizeof(BYTE)); + + memset(g_groupAndIndexToCpu, 0xff, g_groupCount * MaxCpusPerGroup * sizeof(short)); + memset(g_cpuToAffinity, 0xff, g_cpuCount * sizeof(CpuAffinity)); + memset(g_groupToCpuMask, 0, g_groupCount * sizeof(KAFFINITY)); + memset(g_groupToCpuCount, 0, g_groupCount * sizeof(BYTE)); +} + +/*++ +Function: + FreeLookupArrays + +Free CPU and group lookup arrays +--*/ +VOID +FreeLookupArrays() +{ + free(g_groupAndIndexToCpu); + free(g_cpuToAffinity); + free(g_groupToCpuMask); + free(g_groupToCpuCount); + + g_groupAndIndexToCpu = NULL; + g_cpuToAffinity = NULL; + g_groupToCpuMask = NULL; + g_groupToCpuCount = NULL; +} + +/*++ +Function: + GetFullAffinityMask + +Get affinity mask for the specified number of processors with all +the processors enabled. +--*/ +KAFFINITY GetFullAffinityMask(int cpuCount) +{ + return ((KAFFINITY)1 << (cpuCount)) - 1; +} + +/*++ +Function: + NUMASupportInitialize + +Initialize data structures for getting and setting thread affinities to processors and +querying NUMA related processor information. +On systems with no NUMA support, it behaves as if there was a single NUMA node with +a single group of processors. +--*/ +BOOL +NUMASupportInitialize() +{ +#if HAVE_NUMA_H + if (numa_available() != -1) + { + struct bitmask *mask = numa_allocate_cpumask(); + int numaNodesCount = numa_max_node() + 1; + + g_cpuCount = numa_num_possible_cpus(); + g_groupCount = 0; + + for (int i = 0; i < numaNodesCount; i++) + { + int st = numa_node_to_cpus(i, mask); + // The only failure that can happen is that the mask is not large enough + // but that cannot happen since the mask was allocated by numa_allocate_cpumask + _ASSERTE(st == 0); + unsigned int nodeCpuCount = numa_bitmask_weight(mask); + unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup; + g_groupCount += nodeGroupCount; + } + + AllocateLookupArrays(); + + WORD currentGroup = 0; + int currentGroupCpus = 0; + + for (int i = 0; i < numaNodesCount; i++) + { + int st = numa_node_to_cpus(i, mask); + // The only failure that can happen is that the mask is not large enough + // but that cannot happen since the mask was allocated by numa_allocate_cpumask + _ASSERTE(st == 0); + unsigned int nodeCpuCount = numa_bitmask_weight(mask); + unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup; + for (int j = 0; j < g_cpuCount; j++) + { + if (numa_bitmask_isbitset(mask, j)) + { + if (currentGroupCpus == MaxCpusPerGroup) + { + g_groupToCpuCount[currentGroup] = MaxCpusPerGroup; + g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup); + currentGroupCpus = 0; + currentGroup++; + } + g_cpuToAffinity[j].Node = i; + g_cpuToAffinity[j].Group = currentGroup; + g_cpuToAffinity[j].Number = currentGroupCpus; + g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j; + currentGroupCpus++; + } + } + + if (currentGroupCpus != 0) + { + g_groupToCpuCount[currentGroup] = currentGroupCpus; + g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus); + currentGroupCpus = 0; + currentGroup++; + } + } + + numa_free_cpumask(mask); + + g_highestNumaNode = numa_max_node(); + } + else +#endif // HAVE_NUMA_H + { + // No NUMA + g_cpuCount = GetNumberOfProcessors(); + g_groupCount = 1; + g_highestNumaNode = 0; + + AllocateLookupArrays(); + } + + return TRUE; +} + +/*++ +Function: + NUMASupportCleanup + +Cleanup of the NUMA support data structures +--*/ +VOID +NUMASupportCleanup() +{ + FreeLookupArrays(); +} + +/*++ +Function: + GetNumaHighestNodeNumber + +See MSDN doc. +--*/ +BOOL +PALAPI +GetNumaHighestNodeNumber( + OUT PULONG HighestNodeNumber +) +{ + PERF_ENTRY(GetNumaHighestNodeNumber); + ENTRY("GetNumaHighestNodeNumber(HighestNodeNumber=%p)\n", HighestNodeNumber); + *HighestNodeNumber = (ULONG)g_highestNumaNode; + + BOOL success = TRUE; + + LOGEXIT("GetNumaHighestNodeNumber returns BOOL %d\n", success); + PERF_EXIT(GetNumaHighestNodeNumber); + + return success; +} + +/*++ +Function: + GetNumaProcessorNodeEx + +See MSDN doc. +--*/ +BOOL +PALAPI +GetNumaProcessorNodeEx( + IN PPROCESSOR_NUMBER Processor, + OUT PUSHORT NodeNumber +) +{ + PERF_ENTRY(GetNumaProcessorNodeEx); + ENTRY("GetNumaProcessorNodeEx(Processor=%p, NodeNumber=%p)\n", Processor, NodeNumber); + + BOOL success = FALSE; + + if ((Processor->Group < g_groupCount) && + (Processor->Number < MaxCpusPerGroup) && + (Processor->Reserved == 0)) + { + short cpu = g_groupAndIndexToCpu[Processor->Group * MaxCpusPerGroup + Processor->Number]; + if (cpu != -1) + { + *NodeNumber = g_cpuToAffinity[cpu].Node; + success = TRUE; + } + } + + if (!success) + { + *NodeNumber = 0xffff; + SetLastError(ERROR_INVALID_PARAMETER); + } + + LOGEXIT("GetNumaProcessorNodeEx returns BOOL %d\n", success); + PERF_EXIT(GetNumaProcessorNodeEx); + + return success; +} + +/*++ +Function: + GetLogicalProcessorInformationEx + +See MSDN doc. +--*/ +BOOL +PALAPI +GetLogicalProcessorInformationEx( + IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType, + OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, + IN OUT PDWORD ReturnedLength +) +{ + PERF_ENTRY(GetLogicalProcessorInformationEx); + ENTRY("GetLogicalProcessorInformationEx(RelationshipType=%d, Buffer=%p, ReturnedLength=%p)\n", RelationshipType, Buffer, ReturnedLength); + + BOOL success = FALSE; + + if (RelationshipType == RelationGroup) + { + size_t requiredSize = __builtin_offsetof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Group); + requiredSize += __builtin_offsetof(GROUP_RELATIONSHIP, GroupInfo); + requiredSize += g_groupCount * sizeof(PROCESSOR_GROUP_INFO); + + if (*ReturnedLength >= requiredSize) + { + Buffer->Relationship = RelationGroup; + Buffer->Size = requiredSize; + Buffer->Group.MaximumGroupCount = g_groupCount; + Buffer->Group.ActiveGroupCount = g_groupCount; + for (int i = 0; i < g_groupCount; i++) + { + Buffer->Group.GroupInfo[i].MaximumProcessorCount = MaxCpusPerGroup; + Buffer->Group.GroupInfo[i].ActiveProcessorCount = g_groupToCpuCount[i]; + Buffer->Group.GroupInfo[i].ActiveProcessorMask = g_groupToCpuMask[i]; + } + + success = TRUE; + } + else + { + SetLastError(ERROR_INSUFFICIENT_BUFFER); + } + + *ReturnedLength = requiredSize; + } + else + { + // We only support the group relationship + SetLastError(ERROR_INVALID_PARAMETER); + } + + LOGEXIT("GetLogicalProcessorInformationEx returns BOOL %d\n", success); + PERF_EXIT(GetLogicalProcessorInformationEx); + + return success; +} + +/*++ +Function: + GetThreadGroupAffinityInternal + +Get the group affinity for the specified pthread +--*/ +BOOL +GetThreadGroupAffinityInternal( + IN pthread_t thread, + OUT PGROUP_AFFINITY GroupAffinity +) +{ + BOOL success = FALSE; + +#if HAVE_PTHREAD_GETAFFINITY_NP + cpu_set_t cpuSet; + + int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuSet); + + if (st == 0) + { + WORD group = NO_GROUP; + KAFFINITY mask = 0; + + for (int i = 0; i < g_cpuCount; i++) + { + if (CPU_ISSET(i, &cpuSet)) + { + WORD g = g_cpuToAffinity[i].Group; + // Unless the thread affinity was already set by SetThreadGroupAffinity, it is possible that + // the current thread has affinity with processors from multiple groups. So we report just the + // first group we find. + if (group == NO_GROUP || g == group) + { + group = g; + mask |= ((KAFFINITY)1) << g_cpuToAffinity[i].Number; + } + } + } + + GroupAffinity->Group = group; + GroupAffinity->Mask = mask; + success = TRUE; + } + else + { + SetLastError(ERROR_GEN_FAILURE); + } +#else // HAVE_PTHREAD_GETAFFINITY_NP + // There is no API to manage thread affinity, so let's return a group affinity + // with all the CPUs on the system. + GroupAffinity->Group = 0; + GroupAffinity->Mask = GetFullAffinityMask(g_cpuCount); + success = TRUE; +#endif // HAVE_PTHREAD_GETAFFINITY_NP + + return success; +} + +/*++ +Function: + GetThreadGroupAffinity + +See MSDN doc. +--*/ +BOOL +PALAPI +GetThreadGroupAffinity( + IN HANDLE hThread, + OUT PGROUP_AFFINITY GroupAffinity +) +{ + PERF_ENTRY(GetThreadGroupAffinity); + ENTRY("GetThreadGroupAffinity(hThread=%p, GroupAffinity=%p)\n", hThread, GroupAffinity); + + CPalThread *palThread = InternalGetCurrentThread(); + + BOOL success = GetThreadGroupAffinityInternal(palThread->GetPThreadSelf(), GroupAffinity); + + LOGEXIT("GetThreadGroupAffinity returns BOOL %d\n", success); + PERF_EXIT(GetThreadGroupAffinity); + + return success; +} + + +/*++ +Function: + SetThreadGroupAffinity + +See MSDN doc. +--*/ +BOOL +PALAPI +SetThreadGroupAffinity( + IN HANDLE hThread, + IN const GROUP_AFFINITY *GroupAffinity, + OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity +) +{ + PERF_ENTRY(SetThreadGroupAffinity); + ENTRY("SetThreadGroupAffinity(hThread=%p, GroupAffinity=%p, PreviousGroupAffinity=%p)\n", hThread, GroupAffinity, PreviousGroupAffinity); + + CPalThread *palThread = InternalGetCurrentThread(); + + pthread_t thread = palThread->GetPThreadSelf(); + + if (PreviousGroupAffinity != NULL) + { + GetThreadGroupAffinityInternal(thread, PreviousGroupAffinity); + } + +#if HAVE_PTHREAD_GETAFFINITY_NP + int groupStartIndex = GroupAffinity->Group * MaxCpusPerGroup; + KAFFINITY mask = 1; + cpu_set_t cpuSet; + CPU_ZERO(&cpuSet); + + for (int i = 0; i < MaxCpusPerGroup; i++, mask <<= 1) + { + if (GroupAffinity->Mask & mask) + { + int cpu = g_groupAndIndexToCpu[groupStartIndex + i]; + if (cpu != -1) + { + CPU_SET(cpu, &cpuSet); + } + } + } + + int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet); + + if (st == -1) + { + switch (errno) + { + case EINVAL: + // There is no processor in the mask that is allowed to execute the process + SetLastError(ERROR_INVALID_PARAMETER); + break; + case EPERM: + SetLastError(ERROR_ACCESS_DENIED); + break; + default: + SetLastError(ERROR_GEN_FAILURE); + break; + } + } + + BOOL success = (st == 0); +#else // HAVE_PTHREAD_GETAFFINITY_NP + // There is no API to manage thread affinity, so let's ignore the request + BOOL success = TRUE; +#endif // HAVE_PTHREAD_GETAFFINITY_NP + + LOGEXIT("SetThreadGroupAffinity returns BOOL %d\n", success); + PERF_EXIT(SetThreadGroupAffinity); + + return success; +} + +/*++ +Function: + GetCurrentProcessorNumberEx + +See MSDN doc. +--*/ +VOID +PALAPI +GetCurrentProcessorNumberEx( + OUT PPROCESSOR_NUMBER ProcNumber +) +{ + PERF_ENTRY(GetCurrentProcessorNumberEx); + ENTRY("GetCurrentProcessorNumberEx(ProcNumber=%p\n", ProcNumber); + + DWORD cpu = GetCurrentProcessorNumber(); + _ASSERTE(cpu < g_cpuCount); + ProcNumber->Group = g_cpuToAffinity[cpu].Group; + ProcNumber->Number = g_cpuToAffinity[cpu].Number; + + LOGEXIT("GetCurrentProcessorNumberEx\n"); + PERF_EXIT(GetCurrentProcessorNumberEx); +} + +/*++ +Function: + GetProcessAffinityMask + +See MSDN doc. +--*/ +BOOL +PALAPI +GetProcessAffinityMask( + IN HANDLE hProcess, + OUT PDWORD_PTR lpProcessAffinityMask, + OUT PDWORD_PTR lpSystemAffinityMask +) +{ + PERF_ENTRY(GetProcessAffinityMask); + ENTRY("GetProcessAffinityMask(hProcess=%p, lpProcessAffinityMask=%p, lpSystemAffinityMask=%p\n", hProcess, lpProcessAffinityMask, lpSystemAffinityMask); + + BOOL success = FALSE; + + if (hProcess == GetCurrentProcess()) + { + DWORD_PTR systemMask = GetFullAffinityMask(g_cpuCount); + +#if HAVE_SCHED_GETAFFINITY + int pid = getpid(); + cpu_set_t cpuSet; + int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet); + if (st == 0) + { + WORD group = NO_GROUP; + DWORD_PTR processMask = 0; + + for (int i = 0; i < g_cpuCount; i++) + { + if (CPU_ISSET(i, &cpuSet)) + { + WORD g = g_cpuToAffinity[i].Group; + if (group == NO_GROUP || g == group) + { + group = g; + processMask |= ((DWORD_PTR)1) << g_cpuToAffinity[i].Number; + } + else + { + // The process has affinity in more than one group, in such case + // the function needs to return zero in both masks. + processMask = 0; + systemMask = 0; + group = NO_GROUP; + break; + } + } + } + + success = TRUE; + + *lpProcessAffinityMask = processMask; + *lpSystemAffinityMask = systemMask; + } + else + { + // We should not get any of the errors that the sched_getaffinity can return since none + // of them applies for the current thread, so this is an unexpected kind of failure. + SetLastError(ERROR_GEN_FAILURE); + } +#else // HAVE_SCHED_GETAFFINITY + // There is no API to manage thread affinity, so let's return both affinity masks + // with all the CPUs on the system set. + *lpSystemAffinityMask = systemMask; + *lpProcessAffinityMask = systemMask; + + success = TRUE; +#endif // HAVE_SCHED_GETAFFINITY + } + else + { + // PAL supports getting affinity mask for the current process only + SetLastError(ERROR_INVALID_PARAMETER); + } + + LOGEXIT("GetProcessAffinityMask returns BOOL %d\n", success); + PERF_EXIT(GetProcessAffinityMask); + + return success; +} + +/*++ +Function: + VirtualAllocExNuma + +See MSDN doc. +--*/ +LPVOID +PALAPI +VirtualAllocExNuma( + IN HANDLE hProcess, + IN OPTIONAL LPVOID lpAddress, + IN SIZE_T dwSize, + IN DWORD flAllocationType, + IN DWORD flProtect, + IN DWORD nndPreferred +) +{ + PERF_ENTRY(VirtualAllocExNuma); + ENTRY("VirtualAllocExNuma(hProcess=%p, lpAddress=%p, dwSize=%u, flAllocationType=%#x, flProtect=%#x, nndPreferred=%d\n", + hProcess, lpAddress, dwSize, flAllocationType, flProtect, nndPreferred); + + LPVOID result = NULL; + + if (hProcess == GetCurrentProcess()) + { + if (nndPreferred <= g_highestNumaNode) + { + result = VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect); +#if HAVE_NUMA_H + if (result != NULL) + { + int nodeMaskLength = (g_highestNumaNode + 1 + sizeof(unsigned long) - 1) / sizeof(unsigned long); + unsigned long *nodeMask = new unsigned long[nodeMaskLength]; + + memset(nodeMask, 0, nodeMaskLength); + + int index = nndPreferred / sizeof(unsigned long); + int mask = ((unsigned long)1) << (nndPreferred & (sizeof(unsigned long) - 1)); + nodeMask[index] = mask; + + int st = mbind(result, dwSize, MPOL_PREFERRED, nodeMask, g_highestNumaNode, 0); + free(nodeMask); + _ASSERTE(st == 0); + // If the mbind fails, we still return the allocated memory since the nndPreferred is just a hint + } +#endif // HAVE_NUMA_H + } + else + { + // The specified node number is larger than the maximum available one + SetLastError(ERROR_INVALID_PARAMETER); + } + } + else + { + // PAL supports allocating from the current process virtual space only + SetLastError(ERROR_INVALID_PARAMETER); + } + + LOGEXIT("VirtualAllocExNuma returns %p\n", result); + PERF_EXIT(VirtualAllocExNuma); + + return result; +} |