diff options
author | Jan Vorlicek <janvorli@microsoft.com> | 2019-04-05 02:53:32 +0200 |
---|---|---|
committer | Jan Vorlicek <janvorli@microsoft.com> | 2019-04-09 01:26:55 +0200 |
commit | 060990945a0117cb824b9d1c1e6c3a336533a3ee (patch) | |
tree | e762ccb63b9843ccf75d6f87b6ab14cfe352d40c /src/pal | |
parent | 187410154a323024938a3281fb5a49f5e59ec350 (diff) | |
download | coreclr-060990945a0117cb824b9d1c1e6c3a336533a3ee.tar.gz coreclr-060990945a0117cb824b9d1c1e6c3a336533a3ee.tar.bz2 coreclr-060990945a0117cb824b9d1c1e6c3a336533a3ee.zip |
Remove Unix CPU groups emulation
This change removes CPU groups emulation from Unix PAL and modifies the
GC and thread pool code accordingly.
Diffstat (limited to 'src/pal')
-rw-r--r-- | src/pal/inc/pal.h | 140 | ||||
-rw-r--r-- | src/pal/src/include/pal/palinternal.h | 3 | ||||
-rw-r--r-- | src/pal/src/misc/sysinfo.cpp | 42 | ||||
-rw-r--r-- | src/pal/src/numa/numa.cpp | 799 | ||||
-rw-r--r-- | src/pal/src/numa/numashim.h | 16 | ||||
-rw-r--r-- | src/pal/src/thread/thread.cpp | 93 |
6 files changed, 145 insertions, 948 deletions
diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h index 0c9b5a70b1..79bc677830 100644 --- a/src/pal/inc/pal.h +++ b/src/pal/inc/pal.h @@ -3996,88 +3996,6 @@ CreatePipe( // NUMA related APIs // -typedef enum _PROCESSOR_CACHE_TYPE { - CacheUnified, - CacheInstruction, - CacheData, - CacheTrace -} PROCESSOR_CACHE_TYPE; - -typedef struct _PROCESSOR_NUMBER { - WORD Group; - BYTE Number; - BYTE Reserved; -} PROCESSOR_NUMBER, *PPROCESSOR_NUMBER; - -typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP { - RelationProcessorCore, - RelationNumaNode, - RelationCache, - RelationProcessorPackage, - RelationGroup, - RelationAll = 0xffff -} LOGICAL_PROCESSOR_RELATIONSHIP; - -typedef ULONG_PTR KAFFINITY; - -#define ANYSIZE_ARRAY 1 - -typedef struct _GROUP_AFFINITY { - KAFFINITY Mask; - WORD Group; - WORD Reserved[3]; -} GROUP_AFFINITY, *PGROUP_AFFINITY; - -typedef struct _PROCESSOR_GROUP_INFO { - BYTE MaximumProcessorCount; - BYTE ActiveProcessorCount; - BYTE Reserved[38]; - KAFFINITY ActiveProcessorMask; -} PROCESSOR_GROUP_INFO, *PPROCESSOR_GROUP_INFO; - -typedef struct _PROCESSOR_RELATIONSHIP { - BYTE Flags; - BYTE EfficiencyClass; - BYTE Reserved[21]; - WORD GroupCount; - GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY]; -} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP; - -typedef struct _GROUP_RELATIONSHIP { - WORD MaximumGroupCount; - WORD ActiveGroupCount; - BYTE Reserved[20]; - PROCESSOR_GROUP_INFO GroupInfo[ANYSIZE_ARRAY]; -} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP; - -typedef struct _NUMA_NODE_RELATIONSHIP { - DWORD NodeNumber; - BYTE Reserved[20]; - GROUP_AFFINITY GroupMask; -} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP; - -typedef struct _CACHE_RELATIONSHIP { - BYTE Level; - BYTE Associativity; - WORD LineSize; - DWORD CacheSize; - PROCESSOR_CACHE_TYPE Type; - BYTE Reserved[20]; - GROUP_AFFINITY GroupMask; -} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP; - -typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX { - LOGICAL_PROCESSOR_RELATIONSHIP Relationship; - DWORD Size; - union { - PROCESSOR_RELATIONSHIP Processor; - NUMA_NODE_RELATIONSHIP NumaNode; - CACHE_RELATIONSHIP Cache; - GROUP_RELATIONSHIP Group; - }; -} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX; - - PALIMPORT BOOL PALAPI @@ -4088,10 +4006,7 @@ GetNumaHighestNodeNumber( PALIMPORT BOOL PALAPI -GetNumaProcessorNodeEx( - IN PPROCESSOR_NUMBER Processor, - OUT PUSHORT NodeNumber -); +PAL_GetNumaProcessorNode(WORD procNo, WORD* node); PALIMPORT LPVOID @@ -4108,61 +4023,12 @@ VirtualAllocExNuma( PALIMPORT BOOL PALAPI -GetLogicalProcessorInformationEx( - IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType, - OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, - IN OUT PDWORD ReturnedLength -); - -PALIMPORT -DWORD_PTR -PALAPI -SetThreadAffinityMask( - IN HANDLE hThread, - IN DWORD_PTR dwThreadAffinityMask -); - -PALIMPORT -BOOL -PALAPI -SetThreadGroupAffinity( - IN HANDLE hThread, - IN const GROUP_AFFINITY *GroupAffinity, - OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity -); +PAL_SetCurrentThreadAffinity(WORD procNo); PALIMPORT BOOL PALAPI -GetThreadGroupAffinity( - IN HANDLE hThread, - OUT PGROUP_AFFINITY GroupAffinity -); - -PALIMPORT -VOID -PALAPI -GetCurrentProcessorNumberEx( - OUT PPROCESSOR_NUMBER ProcNumber -); - -PALIMPORT -BOOL -PALAPI -GetProcessAffinityMask( - IN HANDLE hProcess, - OUT PDWORD_PTR lpProcessAffinityMask, - OUT PDWORD_PTR lpSystemAffinityMask -); - -PALIMPORT -BOOL -PALAPI -SetThreadIdealProcessorEx( - IN HANDLE hThread, - IN PPROCESSOR_NUMBER lpIdealProcessor, - OUT PPROCESSOR_NUMBER lpPreviousIdealProcessor -); +PAL_GetCurrentThreadAffinitySet(SIZE_T size, UINT_PTR* data); // // The types of events that can be logged. diff --git a/src/pal/src/include/pal/palinternal.h b/src/pal/src/include/pal/palinternal.h index 67236aaa49..6f64208e05 100644 --- a/src/pal/src/include/pal/palinternal.h +++ b/src/pal/src/include/pal/palinternal.h @@ -679,6 +679,9 @@ typedef enum _TimeConversionConstants bool ReadMemoryValueFromFile(const char* filename, size_t* val); +DWORD +GetTotalCpuCount(); + #ifdef __APPLE__ bool GetApplicationContainerFolder(PathCharString& buffer, const char *applicationGroupId, int applicationGroupIdLength); diff --git a/src/pal/src/misc/sysinfo.cpp b/src/pal/src/misc/sysinfo.cpp index 2c14949b95..419c3f6708 100644 --- a/src/pal/src/misc/sysinfo.cpp +++ b/src/pal/src/misc/sysinfo.cpp @@ -95,24 +95,11 @@ SET_DEFAULT_DEBUG_CHANNEL(MISC); #endif #endif // __APPLE__ - -DWORD -PALAPI -PAL_GetLogicalCpuCountFromOS() +DWORD GetTotalCpuCount() { int nrcpus = 0; -#if HAVE_SCHED_GETAFFINITY - - cpu_set_t cpuSet; - int st = sched_getaffinity(0, sizeof(cpu_set_t), &cpuSet); - if (st != 0) - { - ASSERT("sched_getaffinity failed (%d)\n", errno); - } - - nrcpus = CPU_COUNT(&cpuSet); -#elif HAVE_SYSCONF +#if HAVE_SYSCONF #if defined(_ARM_) || defined(_ARM64_) #define SYSCONF_GET_NUMPROCS _SC_NPROCESSORS_CONF @@ -139,11 +126,36 @@ PAL_GetLogicalCpuCountFromOS() { ASSERT("sysctl failed for HW_NCPU (%d)\n", errno); } +#else // HAVE_SYSCONF +#error "Don't know how to get total CPU count on this platform" #endif // HAVE_SYSCONF return nrcpus; } +DWORD +PALAPI +PAL_GetLogicalCpuCountFromOS() +{ + int nrcpus = 0; + +#if HAVE_SCHED_GETAFFINITY + + cpu_set_t cpuSet; + int st = sched_getaffinity(0, sizeof(cpu_set_t), &cpuSet); + if (st != 0) + { + ASSERT("sched_getaffinity failed (%d)\n", errno); + } + + nrcpus = CPU_COUNT(&cpuSet); +#else // HAVE_SCHED_GETAFFINITY + nrcpus = GetTotalCpuCount(); +#endif // HAVE_SCHED_GETAFFINITY + + return nrcpus; +} + /*++ Function: GetSystemInfo diff --git a/src/pal/src/numa/numa.cpp b/src/pal/src/numa/numa.cpp index 9283a044da..0c9d4090a5 100644 --- a/src/pal/src/numa/numa.cpp +++ b/src/pal/src/numa/numa.cpp @@ -47,33 +47,6 @@ using namespace CorUnix; typedef cpuset_t cpu_set_t; #endif -// CPU affinity descriptor -struct CpuAffinity -{ - // NUMA node - BYTE Node; - // CPU number relative to the group the CPU is in - BYTE Number; - // CPU group - WORD Group; -}; - -// Array mapping global CPU index to its affinity -CpuAffinity *g_cpuToAffinity = NULL; - -// Array mapping CPU group and index in the group to the global CPU index -short *g_groupAndIndexToCpu = NULL; -// Array mapping CPU group to the corresponding affinity mask of the CPUs in the group -KAFFINITY *g_groupToCpuMask = NULL; -// Array mapping CPU group to the number of processors in the group -BYTE *g_groupToCpuCount = NULL; - -// Total number of processors in the system -int g_cpuCount = 0; -// Total number of possible processors in the system -int g_possibleCpuCount = 0; -// Total number of CPU groups -int g_groupCount = 0; // The highest NUMA node available int g_highestNumaNode = 0; // Is numa available @@ -87,92 +60,6 @@ FOR_ALL_NUMA_FUNCTIONS #undef PER_FUNCTION_BLOCK #endif // HAVE_NUMA_H -static const int MaxCpusPerGroup = 8 * sizeof(KAFFINITY); -static const WORD NO_GROUP = 0xffff; - -/*++ -Function: - FreeLookupArrays - -Free CPU and group lookup arrays ---*/ -VOID -FreeLookupArrays() -{ - free(g_groupAndIndexToCpu); - free(g_cpuToAffinity); - free(g_groupToCpuMask); - free(g_groupToCpuCount); - - g_groupAndIndexToCpu = NULL; - g_cpuToAffinity = NULL; - g_groupToCpuMask = NULL; - g_groupToCpuCount = NULL; -} - -/*++ -Function: - AllocateLookupArrays - -Allocate CPU and group lookup arrays -Return TRUE if the allocation succeeded ---*/ -BOOL -AllocateLookupArrays() -{ - g_groupAndIndexToCpu = (short*)malloc(g_groupCount * MaxCpusPerGroup * sizeof(short)); - if (g_groupAndIndexToCpu == NULL) - { - goto FAILED; - } - - g_cpuToAffinity = (CpuAffinity*)malloc(g_possibleCpuCount * sizeof(CpuAffinity)); - if (g_cpuToAffinity == NULL) - { - goto FAILED; - } - - g_groupToCpuMask = (KAFFINITY*)malloc(g_groupCount * sizeof(KAFFINITY)); - if (g_groupToCpuMask == NULL) - { - goto FAILED; - } - - g_groupToCpuCount = (BYTE*)malloc(g_groupCount * sizeof(BYTE)); - if (g_groupToCpuCount == NULL) - { - goto FAILED; - } - - memset(g_groupAndIndexToCpu, 0xff, g_groupCount * MaxCpusPerGroup * sizeof(short)); - memset(g_cpuToAffinity, 0xff, g_possibleCpuCount * sizeof(CpuAffinity)); - memset(g_groupToCpuMask, 0, g_groupCount * sizeof(KAFFINITY)); - memset(g_groupToCpuCount, 0, g_groupCount * sizeof(BYTE)); - - return TRUE; - -FAILED: - FreeLookupArrays(); - - return FALSE; -} - -/*++ -Function: - GetFullAffinityMask - -Get affinity mask for the specified number of processors with all -the processors enabled. ---*/ -KAFFINITY GetFullAffinityMask(int cpuCount) -{ - if ((size_t)cpuCount < sizeof(KAFFINITY) * 8) - { - return ((KAFFINITY)1 << (cpuCount)) - 1; - } - - return ~(KAFFINITY)0; -} /*++ Function: @@ -208,73 +95,6 @@ FOR_ALL_NUMA_FUNCTIONS else { g_numaAvailable = true; - - struct bitmask *mask = numa_allocate_cpumask(); - int numaNodesCount = numa_max_node() + 1; - - g_possibleCpuCount = numa_num_possible_cpus(); - g_cpuCount = 0; - g_groupCount = 0; - - for (int i = 0; i < numaNodesCount; i++) - { - int st = numa_node_to_cpus(i, mask); - // The only failure that can happen is that the mask is not large enough - // but that cannot happen since the mask was allocated by numa_allocate_cpumask - _ASSERTE(st == 0); - unsigned int nodeCpuCount = numa_bitmask_weight(mask); - g_cpuCount += nodeCpuCount; - unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup; - g_groupCount += nodeGroupCount; - } - - if (!AllocateLookupArrays()) - { - dlclose(numaHandle); - return FALSE; - } - - WORD currentGroup = 0; - int currentGroupCpus = 0; - - for (int i = 0; i < numaNodesCount; i++) - { - int st = numa_node_to_cpus(i, mask); - // The only failure that can happen is that the mask is not large enough - // but that cannot happen since the mask was allocated by numa_allocate_cpumask - _ASSERTE(st == 0); - unsigned int nodeCpuCount = numa_bitmask_weight(mask); - unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup; - for (int j = 0; j < g_possibleCpuCount; j++) - { - if (numa_bitmask_isbitset(mask, j)) - { - if (currentGroupCpus == MaxCpusPerGroup) - { - g_groupToCpuCount[currentGroup] = MaxCpusPerGroup; - g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup); - currentGroupCpus = 0; - currentGroup++; - } - g_cpuToAffinity[j].Node = i; - g_cpuToAffinity[j].Group = currentGroup; - g_cpuToAffinity[j].Number = currentGroupCpus; - g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j; - currentGroupCpus++; - } - } - - if (currentGroupCpus != 0) - { - g_groupToCpuCount[currentGroup] = currentGroupCpus; - g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus); - currentGroupCpus = 0; - currentGroup++; - } - } - - numa_free_cpumask(mask); - g_highestNumaNode = numa_max_node(); } } @@ -282,21 +102,7 @@ FOR_ALL_NUMA_FUNCTIONS if (!g_numaAvailable) { // No NUMA - g_possibleCpuCount = PAL_GetLogicalCpuCountFromOS(); - g_cpuCount = PAL_GetLogicalCpuCountFromOS(); - g_groupCount = 1; g_highestNumaNode = 0; - - if (!AllocateLookupArrays()) - { - return FALSE; - } - - for (int i = 0; i < g_possibleCpuCount; i++) - { - g_cpuToAffinity[i].Number = i; - g_cpuToAffinity[i].Group = 0; - } } return TRUE; @@ -311,7 +117,6 @@ Cleanup of the NUMA support data structures VOID NUMASupportCleanup() { - FreeLookupArrays(); #if HAVE_NUMA_H if (g_numaAvailable) { @@ -346,493 +151,35 @@ GetNumaHighestNodeNumber( /*++ Function: - GetNumaProcessorNodeEx - -See MSDN doc. ---*/ -BOOL -PALAPI -GetNumaProcessorNodeEx( - IN PPROCESSOR_NUMBER Processor, - OUT PUSHORT NodeNumber -) -{ - PERF_ENTRY(GetNumaProcessorNodeEx); - ENTRY("GetNumaProcessorNodeEx(Processor=%p, NodeNumber=%p)\n", Processor, NodeNumber); + PAL_GetNumaProcessorNode - BOOL success = FALSE; +Abstract + Get NUMA node of a processor - if ((Processor->Group < g_groupCount) && - (Processor->Number < MaxCpusPerGroup) && - (Processor->Reserved == 0)) - { - short cpu = g_groupAndIndexToCpu[Processor->Group * MaxCpusPerGroup + Processor->Number]; - if (cpu != -1) - { - *NodeNumber = g_cpuToAffinity[cpu].Node; - success = TRUE; - } - } - - if (!success) - { - *NodeNumber = 0xffff; - SetLastError(ERROR_INVALID_PARAMETER); - } +Parameters: + procNo - number of the processor to get the NUMA node for + node - the resulting NUMA node - LOGEXIT("GetNumaProcessorNodeEx returns BOOL %d\n", success); - PERF_EXIT(GetNumaProcessorNodeEx); - - return success; -} - -/*++ -Function: - GetLogicalProcessorInformationEx - -See MSDN doc. ---*/ -BOOL -PALAPI -GetLogicalProcessorInformationEx( - IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType, - OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, - IN OUT PDWORD ReturnedLength -) -{ - PERF_ENTRY(GetLogicalProcessorInformationEx); - ENTRY("GetLogicalProcessorInformationEx(RelationshipType=%d, Buffer=%p, ReturnedLength=%p)\n", RelationshipType, Buffer, ReturnedLength); - - BOOL success = FALSE; - - if (RelationshipType == RelationGroup) - { - size_t requiredSize = __builtin_offsetof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Group); - requiredSize += __builtin_offsetof(GROUP_RELATIONSHIP, GroupInfo); - requiredSize += g_groupCount * sizeof(PROCESSOR_GROUP_INFO); - - if (*ReturnedLength >= requiredSize) - { - Buffer->Relationship = RelationGroup; - Buffer->Size = requiredSize; - Buffer->Group.MaximumGroupCount = g_groupCount; - Buffer->Group.ActiveGroupCount = g_groupCount; - for (int i = 0; i < g_groupCount; i++) - { - Buffer->Group.GroupInfo[i].MaximumProcessorCount = MaxCpusPerGroup; - Buffer->Group.GroupInfo[i].ActiveProcessorCount = g_groupToCpuCount[i]; - Buffer->Group.GroupInfo[i].ActiveProcessorMask = g_groupToCpuMask[i]; - } - - success = TRUE; - } - else - { - SetLastError(ERROR_INSUFFICIENT_BUFFER); - } - - *ReturnedLength = requiredSize; - } - else - { - // We only support the group relationship - SetLastError(ERROR_INVALID_PARAMETER); - } - - LOGEXIT("GetLogicalProcessorInformationEx returns BOOL %d\n", success); - PERF_EXIT(GetLogicalProcessorInformationEx); - - return success; -} - -/*++ -Function: - GetThreadGroupAffinityInternal - -Get the group affinity for the specified pthread ---*/ -BOOL -GetThreadGroupAffinityInternal( - IN pthread_t thread, - OUT PGROUP_AFFINITY GroupAffinity -) -{ - BOOL success = FALSE; - -#if HAVE_PTHREAD_GETAFFINITY_NP - cpu_set_t cpuSet; - - int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuSet); - - if (st == 0) - { - WORD group = NO_GROUP; - KAFFINITY mask = 0; - - for (int i = 0; i < g_possibleCpuCount; i++) - { - if (CPU_ISSET(i, &cpuSet)) - { - WORD g = g_cpuToAffinity[i].Group; - // Unless the thread affinity was already set by SetThreadGroupAffinity, it is possible that - // the current thread has affinity with processors from multiple groups. So we report just the - // first group we find. - if (group == NO_GROUP || g == group) - { - group = g; - mask |= ((KAFFINITY)1) << g_cpuToAffinity[i].Number; - } - } - } - - GroupAffinity->Group = group; - GroupAffinity->Mask = mask; - success = TRUE; - } - else - { - SetLastError(ERROR_GEN_FAILURE); - } -#else // HAVE_PTHREAD_GETAFFINITY_NP - // There is no API to manage thread affinity, so let's return a group affinity - // with all the CPUs on the system. - GroupAffinity->Group = 0; - GroupAffinity->Mask = GetFullAffinityMask(g_possibleCpuCount); - success = TRUE; -#endif // HAVE_PTHREAD_GETAFFINITY_NP - - return success; -} - -/*++ -Function: - GetThreadGroupAffinity - -See MSDN doc. ---*/ -BOOL -PALAPI -GetThreadGroupAffinity( - IN HANDLE hThread, - OUT PGROUP_AFFINITY GroupAffinity -) -{ - PERF_ENTRY(GetThreadGroupAffinity); - ENTRY("GetThreadGroupAffinity(hThread=%p, GroupAffinity=%p)\n", hThread, GroupAffinity); - CPalThread *pCurrentThread = InternalGetCurrentThread(); - CPalThread *pTargetThread = NULL; - IPalObject *pTargetThreadObject = NULL; - - PAL_ERROR palErr = - InternalGetThreadDataFromHandle(pCurrentThread, hThread, - 0, // THREAD_SET_CONTEXT - &pTargetThread, &pTargetThreadObject); - - if (NO_ERROR != palErr) - { - ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread, - palErr); - return FALSE; - } - - BOOL success = GetThreadGroupAffinityInternal( - pTargetThread->GetPThreadSelf(), GroupAffinity); - LOGEXIT("GetThreadGroupAffinity returns BOOL %d\n", success); - PERF_EXIT(GetThreadGroupAffinity); - - return success; -} - - -/*++ -Function: - SetThreadGroupAffinity - -See MSDN doc. +Return value: + TRUE if the function was able to get the NUMA node, FALSE if it has failed. --*/ BOOL PALAPI -SetThreadGroupAffinity( - IN HANDLE hThread, - IN const GROUP_AFFINITY *GroupAffinity, - OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity -) +PAL_GetNumaProcessorNode(WORD procNo, WORD* node) { - PERF_ENTRY(SetThreadGroupAffinity); - ENTRY("SetThreadGroupAffinity(hThread=%p, GroupAffinity=%p, PreviousGroupAffinity=%p)\n", hThread, GroupAffinity, PreviousGroupAffinity); - - CPalThread *pCurrentThread = InternalGetCurrentThread(); - CPalThread *pTargetThread = NULL; - IPalObject *pTargetThreadObject = NULL; - - PAL_ERROR palErr = - InternalGetThreadDataFromHandle(pCurrentThread, hThread, - 0, // THREAD_SET_CONTEXT - &pTargetThread, &pTargetThreadObject); - - if (NO_ERROR != palErr) - { - ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread, - palErr); - return FALSE; - } - - pthread_t thread = pTargetThread->GetPThreadSelf(); - - if (PreviousGroupAffinity != NULL) - { - GetThreadGroupAffinityInternal(thread, PreviousGroupAffinity); - } - -#if HAVE_PTHREAD_GETAFFINITY_NP - int groupStartIndex = GroupAffinity->Group * MaxCpusPerGroup; - KAFFINITY mask = 1; - cpu_set_t cpuSet; - CPU_ZERO(&cpuSet); - - for (int i = 0; i < MaxCpusPerGroup; i++, mask <<= 1) - { - if (GroupAffinity->Mask & mask) - { - int cpu = g_groupAndIndexToCpu[groupStartIndex + i]; - if (cpu != -1) - { - CPU_SET(cpu, &cpuSet); - } - } - } - - int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet); - - if (st != 0) - { - switch (st) - { - case EINVAL: - // There is no processor in the mask that is allowed to execute the process - SetLastError(ERROR_INVALID_PARAMETER); - break; - case ESRCH: - SetLastError(ERROR_INVALID_HANDLE); - break; - default: - SetLastError(ERROR_GEN_FAILURE); - break; - } - } - - BOOL success = (st == 0); -#else // HAVE_PTHREAD_GETAFFINITY_NP - // There is no API to manage thread affinity, so let's ignore the request - BOOL success = TRUE; -#endif // HAVE_PTHREAD_GETAFFINITY_NP - - LOGEXIT("SetThreadGroupAffinity returns BOOL %d\n", success); - PERF_EXIT(SetThreadGroupAffinity); - - return success; -} - -/*++ -Function: - SetThreadAffinityMask - -See MSDN doc. ---*/ -DWORD_PTR -PALAPI -SetThreadAffinityMask( - IN HANDLE hThread, - IN DWORD_PTR dwThreadAffinityMask -) -{ - PERF_ENTRY(SetThreadAffinityMask); - ENTRY("SetThreadAffinityMask(hThread=%p, dwThreadAffinityMask=%p)\n", hThread, dwThreadAffinityMask); - - CPalThread *pCurrentThread = InternalGetCurrentThread(); - CPalThread *pTargetThread = NULL; - IPalObject *pTargetThreadObject = NULL; - - PAL_ERROR palErr = - InternalGetThreadDataFromHandle(pCurrentThread, hThread, - 0, // THREAD_SET_CONTEXT - &pTargetThread, &pTargetThreadObject); - - if (NO_ERROR != palErr) - { - ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread, - palErr); - return 0; - } - - pthread_t thread = pTargetThread->GetPThreadSelf(); - -#if HAVE_PTHREAD_GETAFFINITY_NP - cpu_set_t prevCpuSet; - CPU_ZERO(&prevCpuSet); - KAFFINITY prevMask = 0; - - int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet); - - if (st == 0) - { - for (int i = 0; i < std::min(8 * (int)sizeof(KAFFINITY), g_possibleCpuCount); i++) - { - if (CPU_ISSET(i, &prevCpuSet)) - { - prevMask |= ((KAFFINITY)1) << i; - } - } - } - - cpu_set_t cpuSet; - CPU_ZERO(&cpuSet); - - int cpu = 0; - while (dwThreadAffinityMask) - { - if (dwThreadAffinityMask & 1) - { - CPU_SET(cpu, &cpuSet); - } - cpu++; - dwThreadAffinityMask >>= 1; - } - - st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet); - - if (st != 0) - { - switch (st) - { - case EINVAL: - // There is no processor in the mask that is allowed to execute the - // process - SetLastError(ERROR_INVALID_PARAMETER); - break; - case ESRCH: - SetLastError(ERROR_INVALID_HANDLE); - break; - default: - SetLastError(ERROR_GEN_FAILURE); - break; - } - } - - DWORD_PTR ret = (st == 0) ? prevMask : 0; -#else // HAVE_PTHREAD_GETAFFINITY_NP - // There is no API to manage thread affinity, so let's ignore the request - DWORD_PTR ret = 0; -#endif // HAVE_PTHREAD_GETAFFINITY_NP - LOGEXIT("SetThreadAffinityMask returns %lu\n", ret); - PERF_EXIT(SetThreadAffinityMask); - - return ret; -} - -/*++ -Function: - GetCurrentProcessorNumberEx - -See MSDN doc. ---*/ -VOID -PALAPI -GetCurrentProcessorNumberEx( - OUT PPROCESSOR_NUMBER ProcNumber -) -{ - PERF_ENTRY(GetCurrentProcessorNumberEx); - ENTRY("GetCurrentProcessorNumberEx(ProcNumber=%p\n", ProcNumber); - - DWORD cpu = GetCurrentProcessorNumber(); - _ASSERTE((int)cpu < g_possibleCpuCount); - ProcNumber->Group = g_cpuToAffinity[cpu].Group; - ProcNumber->Number = g_cpuToAffinity[cpu].Number; - - LOGEXIT("GetCurrentProcessorNumberEx\n"); - PERF_EXIT(GetCurrentProcessorNumberEx); -} - -/*++ -Function: - GetProcessAffinityMask - -See MSDN doc. ---*/ -BOOL -PALAPI -GetProcessAffinityMask( - IN HANDLE hProcess, - OUT PDWORD_PTR lpProcessAffinityMask, - OUT PDWORD_PTR lpSystemAffinityMask -) -{ - PERF_ENTRY(GetProcessAffinityMask); - ENTRY("GetProcessAffinityMask(hProcess=%p, lpProcessAffinityMask=%p, lpSystemAffinityMask=%p\n", hProcess, lpProcessAffinityMask, lpSystemAffinityMask); - - BOOL success = FALSE; - - if (hProcess == GetCurrentProcess()) +#if HAVE_NUMA_H + if (g_numaAvailable) { - int cpuCountInMask = (g_cpuCount > 64) ? 64 : g_cpuCount; - - DWORD_PTR systemMask = GetFullAffinityMask(cpuCountInMask); - -#if HAVE_SCHED_GETAFFINITY - int pid = getpid(); - cpu_set_t cpuSet; - int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet); - if (st == 0) + int result = numa_node_of_cpu(procNo); + if (result >= 0) { - DWORD_PTR processMask = 0; - - for (int i = 0; i < cpuCountInMask; i++) - { - if (CPU_ISSET(i, &cpuSet)) - { - processMask |= ((DWORD_PTR)1) << i; - } - } - - success = TRUE; - - *lpProcessAffinityMask = processMask; - *lpSystemAffinityMask = systemMask; - } - else if (errno == EINVAL) - { - // There are more processors than can fit in a cpu_set_t - // return all bits set for all processors (upto 64) for both masks. - *lpProcessAffinityMask = systemMask; - *lpSystemAffinityMask = systemMask; - success = TRUE; - } - else - { - // We should not get any of the errors that the sched_getaffinity can return since none - // of them applies for the current thread, so this is an unexpected kind of failure. - SetLastError(ERROR_GEN_FAILURE); + *node = (WORD)result; + return TRUE; } -#else // HAVE_SCHED_GETAFFINITY - // There is no API to manage thread affinity, so let's return both affinity masks - // with all the CPUs on the system set. - *lpSystemAffinityMask = systemMask; - *lpProcessAffinityMask = systemMask; - - success = TRUE; -#endif // HAVE_SCHED_GETAFFINITY - } - else - { - // PAL supports getting affinity mask for the current process only - SetLastError(ERROR_INVALID_PARAMETER); } +#endif // HAVE_NUMA_H - LOGEXIT("GetProcessAffinityMask returns BOOL %d\n", success); - PERF_EXIT(GetProcessAffinityMask); - - return success; + return FALSE; } /*++ @@ -898,115 +245,3 @@ VirtualAllocExNuma( return result; } - -/*++ -Function: - SetThreadIdealProcessorEx - -See MSDN doc. ---*/ -BOOL -PALAPI -SetThreadIdealProcessorEx( - IN HANDLE hThread, - IN PPROCESSOR_NUMBER lpIdealProcessor, - OUT PPROCESSOR_NUMBER lpPreviousIdealProcessor) -{ - PERF_ENTRY(SetThreadIdealProcessorEx); - ENTRY("SetThreadIdealProcessorEx(hThread=%p, lpIdealProcessor=%p)\n", hThread, lpIdealProcessor); - - CPalThread *pCurrentThread = InternalGetCurrentThread(); - CPalThread *pTargetThread = NULL; - IPalObject *pTargetThreadObject = NULL; - - PAL_ERROR palErr = - InternalGetThreadDataFromHandle(pCurrentThread, hThread, - 0, // THREAD_SET_CONTEXT - &pTargetThread, &pTargetThreadObject); - - if (NO_ERROR != palErr) - { - ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread, - palErr); - return 0; - } - - pthread_t thread = pTargetThread->GetPThreadSelf(); - -#if HAVE_PTHREAD_GETAFFINITY_NP - int cpu = -1; - if ((lpIdealProcessor->Group < g_groupCount) && - (lpIdealProcessor->Number < MaxCpusPerGroup) && - (lpIdealProcessor->Reserved == 0)) - { - cpu = g_groupAndIndexToCpu[lpIdealProcessor->Group * MaxCpusPerGroup + lpIdealProcessor->Number]; - } - - if (cpu == -1) - { - SetLastError(ERROR_INVALID_PARAMETER); - return FALSE; - } - - if (lpPreviousIdealProcessor != NULL) - { - cpu_set_t prevCpuSet; - CPU_ZERO(&prevCpuSet); - DWORD prevCpu = GetCurrentProcessorNumber(); - - int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet); - - if (st == 0) - { - for (int i = 0; i < g_possibleCpuCount; i++) - { - if (CPU_ISSET(i, &prevCpuSet)) - { - prevCpu = i; - break; - } - } - } - - _ASSERTE((int)prevCpu < g_possibleCpuCount); - lpPreviousIdealProcessor->Group = g_cpuToAffinity[prevCpu].Group; - lpPreviousIdealProcessor->Number = g_cpuToAffinity[prevCpu].Number; - lpPreviousIdealProcessor->Reserved = 0; - } - - cpu_set_t cpuSet; - CPU_ZERO(&cpuSet); - CPU_SET(cpu, &cpuSet); - - int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet); - - if (st != 0) - { - switch (st) - { - case EINVAL: - // There is no processor in the mask that is allowed to execute the - // process - SetLastError(ERROR_INVALID_PARAMETER); - break; - case ESRCH: - SetLastError(ERROR_INVALID_HANDLE); - break; - default: - SetLastError(ERROR_GEN_FAILURE); - break; - } - } - - BOOL success = (st == 0); - -#else // HAVE_PTHREAD_GETAFFINITY_NP - // There is no API to manage thread affinity, so let's ignore the request - BOOL success = FALSE; -#endif // HAVE_PTHREAD_GETAFFINITY_NP - - LOGEXIT("SetThreadIdealProcessorEx returns BOOL %d\n", success); - PERF_EXIT(SetThreadIdealProcessorEx); - - return success; -} diff --git a/src/pal/src/numa/numashim.h b/src/pal/src/numa/numashim.h index dd7f58d6de..e56cfab9d1 100644 --- a/src/pal/src/numa/numashim.h +++ b/src/pal/src/numa/numashim.h @@ -13,19 +13,12 @@ #include <numa.h> #include <numaif.h> -#define numa_free_cpumask numa_bitmask_free - // List of all functions from the numa library that are used #define FOR_ALL_NUMA_FUNCTIONS \ PER_FUNCTION_BLOCK(numa_available) \ PER_FUNCTION_BLOCK(mbind) \ - PER_FUNCTION_BLOCK(numa_num_possible_cpus) \ PER_FUNCTION_BLOCK(numa_max_node) \ - PER_FUNCTION_BLOCK(numa_allocate_cpumask) \ - PER_FUNCTION_BLOCK(numa_node_to_cpus) \ - PER_FUNCTION_BLOCK(numa_bitmask_weight) \ - PER_FUNCTION_BLOCK(numa_bitmask_isbitset) \ - PER_FUNCTION_BLOCK(numa_bitmask_free) + PER_FUNCTION_BLOCK(numa_node_of_cpu) // Declare pointers to all the used numa functions #define PER_FUNCTION_BLOCK(fn) extern decltype(fn)* fn##_ptr; @@ -36,13 +29,8 @@ FOR_ALL_NUMA_FUNCTIONS // to the functions of libnuma in the initialization. #define numa_available() numa_available_ptr() #define mbind(...) mbind_ptr(__VA_ARGS__) -#define numa_num_possible_cpus() numa_num_possible_cpus_ptr() #define numa_max_node() numa_max_node_ptr() -#define numa_allocate_cpumask() numa_allocate_cpumask_ptr() -#define numa_node_to_cpus(...) numa_node_to_cpus_ptr(__VA_ARGS__) -#define numa_bitmask_weight(...) numa_bitmask_weight_ptr(__VA_ARGS__) -#define numa_bitmask_isbitset(...) numa_bitmask_isbitset_ptr(__VA_ARGS__) -#define numa_bitmask_free(...) numa_bitmask_free_ptr(__VA_ARGS__) +#define numa_node_of_cpu(...) numa_node_of_cpu_ptr(__VA_ARGS__) #endif // HAVE_NUMA_H diff --git a/src/pal/src/thread/thread.cpp b/src/pal/src/thread/thread.cpp index 86a08639c7..122e86014c 100644 --- a/src/pal/src/thread/thread.cpp +++ b/src/pal/src/thread/thread.cpp @@ -64,6 +64,7 @@ SET_DEFAULT_DEBUG_CHANNEL(THREAD); // some headers have code with asserts, so do #include "pal/fakepoll.h" #endif // HAVE_POLL #include <limits.h> +#include <algorithm> #if HAVE_SYS_LWP_H #include <sys/lwp.h> @@ -2921,3 +2922,95 @@ int CorUnix::CThreadMachExceptionHandlers::GetIndexOfHandler(exception_mask_t bm } #endif // HAVE_MACH_EXCEPTIONS + +/*++ +Function: + PAL_SetCurrentThreadAffinity + +Abstract + Set affinity of the current thread to the specified processor. + +Parameters: + procNo - number of the processor to affinitize the current thread to + +Return value: + TRUE if the function was able to set the affinity, FALSE if it has failed. +--*/ +BOOL +PALAPI +PAL_SetCurrentThreadAffinity(WORD procNo) +{ +#if HAVE_PTHREAD_GETAFFINITY_NP + cpu_set_t cpuSet; + CPU_ZERO(&cpuSet); + + int st = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet); + + if (st == 0) + { + CPU_SET(procNo, &cpuSet); + st = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet); + } + + return st == 0; +#else // HAVE_PTHREAD_GETAFFINITY_NP + // There is no API to manage thread affinity, so let's ignore the request + return FALSE; +#endif // HAVE_PTHREAD_GETAFFINITY_NP +} + +/*++ +Function: + PAL_SetCurrentThreadAffinity + +Abstract + Get affinity set of the current thread. The set is represented by an array of "size" entries of UINT_PTR type. + +Parameters: + size - number of entries in the "data" array + data - pointer to the data of the resulting set, the LSB of the first entry in the array represents processor 0 + +Return value: + TRUE if the function was able to get the affinity set, FALSE if it has failed. +--*/ +BOOL +PALAPI +PAL_GetCurrentThreadAffinitySet(SIZE_T size, UINT_PTR* data) +{ + cpu_set_t cpuSet; + CPU_ZERO(&cpuSet); + +#if HAVE_PTHREAD_GETAFFINITY_NP + int st = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet); + + if (st == 0) + { + const SIZE_T BitsPerBitsetEntry = 8 * sizeof(UINT_PTR); + int nrcpus = GetTotalCpuCount(); + + // Get info for as much processors as it is possible to fit into the resulting set + SIZE_T remainingCount = std::min(size * BitsPerBitsetEntry, (SIZE_T)nrcpus); + SIZE_T i = 0; + while (remainingCount != 0) + { + UINT_PTR entry = 0; + SIZE_T bitsToCopy = std::min(remainingCount, BitsPerBitsetEntry); + SIZE_T cpuSetOffset = i * BitsPerBitsetEntry; + for (SIZE_T j = 0; j < bitsToCopy; j++) + { + if (CPU_ISSET(cpuSetOffset + j, &cpuSet)) + { + entry |= (UINT_PTR)1 << j; + } + } + remainingCount -= bitsToCopy; + data[i++] = entry; + } + } + + return st == 0; +#else // HAVE_PTHREAD_GETAFFINITY_NP + // There is no API to manage thread affinity, so let's ignore the request + return FALSE; +#endif // HAVE_PTHREAD_GETAFFINITY_NP +} |