summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/classlibnative/bcltype/system.cpp3
-rw-r--r--src/gc/env/gcenv.os.h10
-rw-r--r--src/gc/unix/gcenv.unix.cpp137
-rw-r--r--src/gc/windows/gcenv.windows.cpp95
-rw-r--r--src/inc/utilcode.h19
-rw-r--r--src/pal/inc/pal.h140
-rw-r--r--src/pal/src/include/pal/palinternal.h3
-rw-r--r--src/pal/src/misc/sysinfo.cpp42
-rw-r--r--src/pal/src/numa/numa.cpp799
-rw-r--r--src/pal/src/numa/numashim.h16
-rw-r--r--src/pal/src/thread/thread.cpp93
-rw-r--r--src/utilcode/util.cpp32
-rw-r--r--src/vm/ceemain.cpp3
-rw-r--r--src/vm/eeconfig.cpp9
-rw-r--r--src/vm/gcenv.os.cpp158
-rw-r--r--src/vm/threads.cpp8
-rw-r--r--src/vm/threads.h3
-rw-r--r--src/vm/win32threadpool.cpp20
-rw-r--r--src/vm/win32threadpool.h14
19 files changed, 502 insertions, 1102 deletions
diff --git a/src/classlibnative/bcltype/system.cpp b/src/classlibnative/bcltype/system.cpp
index 12397a330e..38e5bba9ad 100644
--- a/src/classlibnative/bcltype/system.cpp
+++ b/src/classlibnative/bcltype/system.cpp
@@ -325,13 +325,14 @@ INT32 QCALLTYPE SystemNative::GetProcessorCount()
BEGIN_QCALL;
+#ifndef FEATURE_PAL
CPUGroupInfo::EnsureInitialized();
if(CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
{
processorCount = CPUGroupInfo::GetNumActiveProcessors();
}
-
+#endif // !FEATURE_PAL
// Processor count will be 0 if CPU groups are disabled/not supported
if(processorCount == 0)
{
diff --git a/src/gc/env/gcenv.os.h b/src/gc/env/gcenv.os.h
index 05dccf7a6d..7fa1ba7062 100644
--- a/src/gc/env/gcenv.os.h
+++ b/src/gc/env/gcenv.os.h
@@ -167,11 +167,18 @@ class AffinitySet
public:
+ static const size_t BitsetDataSize = MAX_SUPPORTED_CPUS / BitsPerBitsetEntry;
+
AffinitySet()
{
memset(m_bitset, 0, sizeof(m_bitset));
}
+ uintptr_t* GetBitsetData()
+ {
+ return m_bitset;
+ }
+
// Check if the set contains a processor
bool Contains(size_t cpuIndex) const
{
@@ -477,9 +484,6 @@ public:
// Is NUMA support available
static bool CanEnableGCNumaAware();
- // Gets the NUMA node for the processor
- static bool GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no);
-
// Get processor number and optionally its NUMA node number for the specified heap number
// Parameters:
// heap_number - heap number to get the result for
diff --git a/src/gc/unix/gcenv.unix.cpp b/src/gc/unix/gcenv.unix.cpp
index c71d211e01..a6d56f2433 100644
--- a/src/gc/unix/gcenv.unix.cpp
+++ b/src/gc/unix/gcenv.unix.cpp
@@ -55,6 +55,33 @@
#include "globals.h"
#include "cgroup.h"
+#if HAVE_NUMA_H
+
+#include <numa.h>
+#include <numaif.h>
+#include <dlfcn.h>
+
+// List of all functions from the numa library that are used
+#define FOR_ALL_NUMA_FUNCTIONS \
+ PER_FUNCTION_BLOCK(mbind) \
+ PER_FUNCTION_BLOCK(numa_available) \
+ PER_FUNCTION_BLOCK(numa_max_node) \
+ PER_FUNCTION_BLOCK(numa_node_of_cpu)
+
+// Declare pointers to all the used numa functions
+#define PER_FUNCTION_BLOCK(fn) extern decltype(fn)* fn##_ptr;
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+
+// Redefine all calls to numa functions as calls through pointers that are set
+// to the functions of libnuma in the initialization.
+#define mbind(...) mbind_ptr(__VA_ARGS__)
+#define numa_available() numa_available_ptr()
+#define numa_max_node() numa_max_node_ptr()
+#define numa_node_of_cpu(...) numa_node_of_cpu_ptr(__VA_ARGS__)
+
+#endif // HAVE_NUMA_H
+
#if defined(_ARM_) || defined(_ARM64_)
#define SYSCONF_GET_NUMPROCS _SC_NPROCESSORS_CONF
#else
@@ -109,6 +136,74 @@ uint32_t g_pageSizeUnixInl = 0;
AffinitySet g_processAffinitySet;
+#if HAVE_CPUSET_T
+typedef cpuset_t cpu_set_t;
+#endif
+
+// The highest NUMA node available
+int g_highestNumaNode = 0;
+// Is numa available
+bool g_numaAvailable = false;
+
+void* g_numaHandle = nullptr;
+
+#if HAVE_NUMA_H
+#define PER_FUNCTION_BLOCK(fn) decltype(fn)* fn##_ptr;
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+#endif // HAVE_NUMA_H
+
+
+// Initialize data structures for getting and setting thread affinities to processors and
+// querying NUMA related processor information.
+// On systems with no NUMA support, it behaves as if there was a single NUMA node with
+// a single group of processors.
+void NUMASupportInitialize()
+{
+#if HAVE_NUMA_H
+ g_numaHandle = dlopen("libnuma.so", RTLD_LAZY);
+ if (g_numaHandle == 0)
+ {
+ g_numaHandle = dlopen("libnuma.so.1", RTLD_LAZY);
+ }
+ if (g_numaHandle != 0)
+ {
+ dlsym(g_numaHandle, "numa_allocate_cpumask");
+#define PER_FUNCTION_BLOCK(fn) \
+ fn##_ptr = (decltype(fn)*)dlsym(g_numaHandle, #fn); \
+ if (fn##_ptr == NULL) { fprintf(stderr, "Cannot get symbol " #fn " from libnuma\n"); abort(); }
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+
+ if (numa_available() == -1)
+ {
+ dlclose(g_numaHandle);
+ }
+ else
+ {
+ g_numaAvailable = true;
+ g_highestNumaNode = numa_max_node();
+ }
+ }
+#endif // HAVE_NUMA_H
+ if (!g_numaAvailable)
+ {
+ // No NUMA
+ g_highestNumaNode = 0;
+ }
+}
+
+// Cleanup of the NUMA support data structures
+void NUMASupportCleanup()
+{
+#if HAVE_NUMA_H
+ if (g_numaAvailable)
+ {
+ dlclose(g_numaHandle);
+ }
+#endif // HAVE_NUMA_H
+}
+
// Initialize the interface implementation
// Return:
// true if it has succeeded, false if it has failed
@@ -221,6 +316,8 @@ bool GCToOSInterface::Initialize()
#endif // HAVE_SCHED_GETAFFINITY
+ NUMASupportInitialize();
+
return true;
}
@@ -235,6 +332,7 @@ void GCToOSInterface::Shutdown()
munmap(g_helperPage, OS_PAGE_SIZE);
CleanupCGroup();
+ NUMASupportCleanup();
}
// Get numeric id of the current thread if possible on the
@@ -468,8 +566,29 @@ void* GCToOSInterface::VirtualReserveAndCommitLargePages(size_t size)
// true if it has succeeded, false if it has failed
bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint16_t node)
{
- assert(node == NUMA_NODE_UNDEFINED && "Numa allocation is not ported to local GC on unix yet");
- return mprotect(address, size, PROT_WRITE | PROT_READ) == 0;
+ bool success = mprotect(address, size, PROT_WRITE | PROT_READ) == 0;
+
+#if HAVE_NUMA_H
+ if (success && g_numaAvailable && (node != NUMA_NODE_UNDEFINED))
+ {
+ if ((int)node <= g_highestNumaNode)
+ {
+ int nodeMaskLength = (g_highestNumaNode + 1 + sizeof(unsigned long) - 1) / sizeof(unsigned long);
+ unsigned long *nodeMask = (unsigned long*)alloca(nodeMaskLength * sizeof(unsigned long));
+ memset(nodeMask, 0, nodeMaskLength);
+
+ int index = node / sizeof(unsigned long);
+ int mask = ((unsigned long)1) << (node & (sizeof(unsigned long) - 1));
+ nodeMask[index] = mask;
+
+ int st = mbind(address, size, MPOL_PREFERRED, nodeMask, g_highestNumaNode, 0);
+ assert(st == 0);
+ // If the mbind fails, we still return the allocated memory since the node is just a hint
+ }
+ }
+#endif // HAVE_NUMA_H
+
+ return success;
}
// Decomit virtual memory range.
@@ -775,13 +894,7 @@ uint32_t GCToOSInterface::GetTotalProcessorCount()
bool GCToOSInterface::CanEnableGCNumaAware()
{
- return false;
-}
-
-bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
-{
- assert(!"Numa has not been ported to local GC for unix");
- return false;
+ return g_numaAvailable;
}
// Get processor number and optionally its NUMA node number for the specified heap number
@@ -806,10 +919,8 @@ bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_n
if (GCToOSInterface::CanEnableGCNumaAware())
{
- if (!GCToOSInterface::GetNumaProcessorNode(procNumber, node_no))
- {
- *node_no = NUMA_NODE_UNDEFINED;
- }
+ int result = numa_node_of_cpu(procNumber);
+ *node_no = (result >= 0) ? (uint16_t)result : NUMA_NODE_UNDEFINED;
}
else
{
diff --git a/src/gc/windows/gcenv.windows.cpp b/src/gc/windows/gcenv.windows.cpp
index 86bd7038c0..d2bcde899a 100644
--- a/src/gc/windows/gcenv.windows.cpp
+++ b/src/gc/windows/gcenv.windows.cpp
@@ -1286,19 +1286,6 @@ bool GCToOSInterface::CanEnableGCNumaAware()
return g_fEnableGCNumaAware;
}
-bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
-{
- GroupProcNo groupProcNo(proc_no);
-
- PROCESSOR_NUMBER procNumber;
- procNumber.Group = groupProcNo.GetGroup();
- procNumber.Number = (BYTE)groupProcNo.GetProcIndex();
- procNumber.Reserved = 0;
-
- assert(g_fEnableGCNumaAware);
- return ::GetNumaProcessorNodeEx(&procNumber, node_no) != FALSE;
-}
-
// Get processor number and optionally its NUMA node number for the specified heap number
// Parameters:
// heap_number - heap number to get the result for
@@ -1310,53 +1297,67 @@ bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_n
{
bool success = false;
- if (CanEnableGCCPUGroups())
+ // Locate heap_number-th available processor
+ uint16_t procNumber;
+ size_t cnt = heap_number;
+ for (uint16_t i = 0; i < GCToOSInterface::GetTotalProcessorCount(); i++)
{
- uint16_t gn, gpn;
- GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
-
- *proc_no = GroupProcNo(gn, gpn).GetCombinedValue();
-
- if (GCToOSInterface::CanEnableGCNumaAware())
+ if (g_processAffinitySet.Contains(i))
{
- if (!GCToOSInterface::GetNumaProcessorNode(*proc_no, node_no))
+ if (cnt == 0)
{
- *node_no = NUMA_NODE_UNDEFINED;
+ procNumber = i;
+ success = true;
+ break;
}
+
+ cnt--;
+ }
+ }
+
+ if (success)
+ {
+ WORD gn, gpn;
+
+ if (CanEnableGCCPUGroups())
+ {
+ GetGroupForProcessor(procNumber, &gn, &gpn);
}
else
- { // no numa setting, each cpu group is treated as a node
- *node_no = gn;
+ {
+ gn = GroupProcNo::NoGroup;
+ gpn = procNumber;
}
- success = true;
- }
- else
- {
- int bit_number = 0;
- uint8_t proc_number = 0;
- for (uintptr_t mask = 1; mask != 0; mask <<= 1)
+ GroupProcNo groupProcNo(gn, gpn);
+ *proc_no = groupProcNo.GetCombinedValue();
+
+ if (GCToOSInterface::CanEnableGCNumaAware())
{
- if (g_processAffinitySet.Contains(proc_number))
+ PROCESSOR_NUMBER procNumber;
+
+ if (CanEnableGCCPUGroups())
{
- if (bit_number == heap_number)
- {
- *proc_no = GroupProcNo(GroupProcNo::NoGroup, proc_number).GetCombinedValue();
+ procNumber.Group = gn;
+ }
+ else
+ {
+ // Get the current processor group
+ PROCESSOR_NUMBER procNumber;
+ GetCurrentProcessorNumberEx(&procNumber);
+ }
- if (GCToOSInterface::CanEnableGCNumaAware())
- {
- if (!GCToOSInterface::GetNumaProcessorNode(proc_number, node_no))
- {
- *node_no = NUMA_NODE_UNDEFINED;
- }
- }
+ procNumber.Number = (BYTE)gpn;
+ procNumber.Reserved = 0;
- success = true;
- break;
- }
- bit_number++;
+ if (GetNumaProcessorNodeEx(&procNumber, node_no))
+ {
+ *node_no = NUMA_NODE_UNDEFINED;
}
- proc_number++;
+ }
+ else
+ { // no numa setting, each cpu group is treated as a node
+ *node_no = groupProcNo.GetGroup();
}
}
diff --git a/src/inc/utilcode.h b/src/inc/utilcode.h
index 5b222b1ab9..cf4b8ddf12 100644
--- a/src/inc/utilcode.h
+++ b/src/inc/utilcode.h
@@ -1331,10 +1331,7 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr,
// Allocate free memory with specific alignment
//
LPVOID ClrVirtualAllocAligned(LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect, SIZE_T alignment);
-
-//******************************************************************************
-// Returns the number of processors that a process has been configured to run on
-//******************************************************************************
+
class NumaNodeInfo
{
private:
@@ -1350,10 +1347,16 @@ public: // functions
static LPVOID VirtualAllocExNuma(HANDLE hProc, LPVOID lpAddr, SIZE_T size,
DWORD allocType, DWORD prot, DWORD node);
+#ifndef FEATURE_PAL
static BOOL GetNumaProcessorNodeEx(PPROCESSOR_NUMBER proc_no, PUSHORT node_no);
+#else // !FEATURE_PAL
+ static BOOL GetNumaProcessorNodeEx(USHORT proc_no, PUSHORT node_no);
+#endif // !FEATURE_PAL
#endif
};
+#ifndef FEATURE_PAL
+
struct CPU_Group_Info
{
WORD nr_active; // at most 64
@@ -1413,9 +1416,15 @@ public:
}
};
-int GetCurrentProcessCpuCount();
DWORD_PTR GetCurrentProcessCpuMask();
+#endif // !FEATURE_PAL
+
+//******************************************************************************
+// Returns the number of processors that a process has been configured to run on
+//******************************************************************************
+int GetCurrentProcessCpuCount();
+
uint32_t GetOsPageSize();
diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h
index 0c9b5a70b1..79bc677830 100644
--- a/src/pal/inc/pal.h
+++ b/src/pal/inc/pal.h
@@ -3996,88 +3996,6 @@ CreatePipe(
// NUMA related APIs
//
-typedef enum _PROCESSOR_CACHE_TYPE {
- CacheUnified,
- CacheInstruction,
- CacheData,
- CacheTrace
-} PROCESSOR_CACHE_TYPE;
-
-typedef struct _PROCESSOR_NUMBER {
- WORD Group;
- BYTE Number;
- BYTE Reserved;
-} PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
-
-typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
- RelationProcessorCore,
- RelationNumaNode,
- RelationCache,
- RelationProcessorPackage,
- RelationGroup,
- RelationAll = 0xffff
-} LOGICAL_PROCESSOR_RELATIONSHIP;
-
-typedef ULONG_PTR KAFFINITY;
-
-#define ANYSIZE_ARRAY 1
-
-typedef struct _GROUP_AFFINITY {
- KAFFINITY Mask;
- WORD Group;
- WORD Reserved[3];
-} GROUP_AFFINITY, *PGROUP_AFFINITY;
-
-typedef struct _PROCESSOR_GROUP_INFO {
- BYTE MaximumProcessorCount;
- BYTE ActiveProcessorCount;
- BYTE Reserved[38];
- KAFFINITY ActiveProcessorMask;
-} PROCESSOR_GROUP_INFO, *PPROCESSOR_GROUP_INFO;
-
-typedef struct _PROCESSOR_RELATIONSHIP {
- BYTE Flags;
- BYTE EfficiencyClass;
- BYTE Reserved[21];
- WORD GroupCount;
- GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY];
-} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP;
-
-typedef struct _GROUP_RELATIONSHIP {
- WORD MaximumGroupCount;
- WORD ActiveGroupCount;
- BYTE Reserved[20];
- PROCESSOR_GROUP_INFO GroupInfo[ANYSIZE_ARRAY];
-} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP;
-
-typedef struct _NUMA_NODE_RELATIONSHIP {
- DWORD NodeNumber;
- BYTE Reserved[20];
- GROUP_AFFINITY GroupMask;
-} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP;
-
-typedef struct _CACHE_RELATIONSHIP {
- BYTE Level;
- BYTE Associativity;
- WORD LineSize;
- DWORD CacheSize;
- PROCESSOR_CACHE_TYPE Type;
- BYTE Reserved[20];
- GROUP_AFFINITY GroupMask;
-} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP;
-
-typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX {
- LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
- DWORD Size;
- union {
- PROCESSOR_RELATIONSHIP Processor;
- NUMA_NODE_RELATIONSHIP NumaNode;
- CACHE_RELATIONSHIP Cache;
- GROUP_RELATIONSHIP Group;
- };
-} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
-
-
PALIMPORT
BOOL
PALAPI
@@ -4088,10 +4006,7 @@ GetNumaHighestNodeNumber(
PALIMPORT
BOOL
PALAPI
-GetNumaProcessorNodeEx(
- IN PPROCESSOR_NUMBER Processor,
- OUT PUSHORT NodeNumber
-);
+PAL_GetNumaProcessorNode(WORD procNo, WORD* node);
PALIMPORT
LPVOID
@@ -4108,61 +4023,12 @@ VirtualAllocExNuma(
PALIMPORT
BOOL
PALAPI
-GetLogicalProcessorInformationEx(
- IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType,
- OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer,
- IN OUT PDWORD ReturnedLength
-);
-
-PALIMPORT
-DWORD_PTR
-PALAPI
-SetThreadAffinityMask(
- IN HANDLE hThread,
- IN DWORD_PTR dwThreadAffinityMask
-);
-
-PALIMPORT
-BOOL
-PALAPI
-SetThreadGroupAffinity(
- IN HANDLE hThread,
- IN const GROUP_AFFINITY *GroupAffinity,
- OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity
-);
+PAL_SetCurrentThreadAffinity(WORD procNo);
PALIMPORT
BOOL
PALAPI
-GetThreadGroupAffinity(
- IN HANDLE hThread,
- OUT PGROUP_AFFINITY GroupAffinity
-);
-
-PALIMPORT
-VOID
-PALAPI
-GetCurrentProcessorNumberEx(
- OUT PPROCESSOR_NUMBER ProcNumber
-);
-
-PALIMPORT
-BOOL
-PALAPI
-GetProcessAffinityMask(
- IN HANDLE hProcess,
- OUT PDWORD_PTR lpProcessAffinityMask,
- OUT PDWORD_PTR lpSystemAffinityMask
-);
-
-PALIMPORT
-BOOL
-PALAPI
-SetThreadIdealProcessorEx(
- IN HANDLE hThread,
- IN PPROCESSOR_NUMBER lpIdealProcessor,
- OUT PPROCESSOR_NUMBER lpPreviousIdealProcessor
-);
+PAL_GetCurrentThreadAffinitySet(SIZE_T size, UINT_PTR* data);
//
// The types of events that can be logged.
diff --git a/src/pal/src/include/pal/palinternal.h b/src/pal/src/include/pal/palinternal.h
index 67236aaa49..6f64208e05 100644
--- a/src/pal/src/include/pal/palinternal.h
+++ b/src/pal/src/include/pal/palinternal.h
@@ -679,6 +679,9 @@ typedef enum _TimeConversionConstants
bool
ReadMemoryValueFromFile(const char* filename, size_t* val);
+DWORD
+GetTotalCpuCount();
+
#ifdef __APPLE__
bool
GetApplicationContainerFolder(PathCharString& buffer, const char *applicationGroupId, int applicationGroupIdLength);
diff --git a/src/pal/src/misc/sysinfo.cpp b/src/pal/src/misc/sysinfo.cpp
index 2c14949b95..419c3f6708 100644
--- a/src/pal/src/misc/sysinfo.cpp
+++ b/src/pal/src/misc/sysinfo.cpp
@@ -95,24 +95,11 @@ SET_DEFAULT_DEBUG_CHANNEL(MISC);
#endif
#endif // __APPLE__
-
-DWORD
-PALAPI
-PAL_GetLogicalCpuCountFromOS()
+DWORD GetTotalCpuCount()
{
int nrcpus = 0;
-#if HAVE_SCHED_GETAFFINITY
-
- cpu_set_t cpuSet;
- int st = sched_getaffinity(0, sizeof(cpu_set_t), &cpuSet);
- if (st != 0)
- {
- ASSERT("sched_getaffinity failed (%d)\n", errno);
- }
-
- nrcpus = CPU_COUNT(&cpuSet);
-#elif HAVE_SYSCONF
+#if HAVE_SYSCONF
#if defined(_ARM_) || defined(_ARM64_)
#define SYSCONF_GET_NUMPROCS _SC_NPROCESSORS_CONF
@@ -139,11 +126,36 @@ PAL_GetLogicalCpuCountFromOS()
{
ASSERT("sysctl failed for HW_NCPU (%d)\n", errno);
}
+#else // HAVE_SYSCONF
+#error "Don't know how to get total CPU count on this platform"
#endif // HAVE_SYSCONF
return nrcpus;
}
+DWORD
+PALAPI
+PAL_GetLogicalCpuCountFromOS()
+{
+ int nrcpus = 0;
+
+#if HAVE_SCHED_GETAFFINITY
+
+ cpu_set_t cpuSet;
+ int st = sched_getaffinity(0, sizeof(cpu_set_t), &cpuSet);
+ if (st != 0)
+ {
+ ASSERT("sched_getaffinity failed (%d)\n", errno);
+ }
+
+ nrcpus = CPU_COUNT(&cpuSet);
+#else // HAVE_SCHED_GETAFFINITY
+ nrcpus = GetTotalCpuCount();
+#endif // HAVE_SCHED_GETAFFINITY
+
+ return nrcpus;
+}
+
/*++
Function:
GetSystemInfo
diff --git a/src/pal/src/numa/numa.cpp b/src/pal/src/numa/numa.cpp
index 9283a044da..0c9d4090a5 100644
--- a/src/pal/src/numa/numa.cpp
+++ b/src/pal/src/numa/numa.cpp
@@ -47,33 +47,6 @@ using namespace CorUnix;
typedef cpuset_t cpu_set_t;
#endif
-// CPU affinity descriptor
-struct CpuAffinity
-{
- // NUMA node
- BYTE Node;
- // CPU number relative to the group the CPU is in
- BYTE Number;
- // CPU group
- WORD Group;
-};
-
-// Array mapping global CPU index to its affinity
-CpuAffinity *g_cpuToAffinity = NULL;
-
-// Array mapping CPU group and index in the group to the global CPU index
-short *g_groupAndIndexToCpu = NULL;
-// Array mapping CPU group to the corresponding affinity mask of the CPUs in the group
-KAFFINITY *g_groupToCpuMask = NULL;
-// Array mapping CPU group to the number of processors in the group
-BYTE *g_groupToCpuCount = NULL;
-
-// Total number of processors in the system
-int g_cpuCount = 0;
-// Total number of possible processors in the system
-int g_possibleCpuCount = 0;
-// Total number of CPU groups
-int g_groupCount = 0;
// The highest NUMA node available
int g_highestNumaNode = 0;
// Is numa available
@@ -87,92 +60,6 @@ FOR_ALL_NUMA_FUNCTIONS
#undef PER_FUNCTION_BLOCK
#endif // HAVE_NUMA_H
-static const int MaxCpusPerGroup = 8 * sizeof(KAFFINITY);
-static const WORD NO_GROUP = 0xffff;
-
-/*++
-Function:
- FreeLookupArrays
-
-Free CPU and group lookup arrays
---*/
-VOID
-FreeLookupArrays()
-{
- free(g_groupAndIndexToCpu);
- free(g_cpuToAffinity);
- free(g_groupToCpuMask);
- free(g_groupToCpuCount);
-
- g_groupAndIndexToCpu = NULL;
- g_cpuToAffinity = NULL;
- g_groupToCpuMask = NULL;
- g_groupToCpuCount = NULL;
-}
-
-/*++
-Function:
- AllocateLookupArrays
-
-Allocate CPU and group lookup arrays
-Return TRUE if the allocation succeeded
---*/
-BOOL
-AllocateLookupArrays()
-{
- g_groupAndIndexToCpu = (short*)malloc(g_groupCount * MaxCpusPerGroup * sizeof(short));
- if (g_groupAndIndexToCpu == NULL)
- {
- goto FAILED;
- }
-
- g_cpuToAffinity = (CpuAffinity*)malloc(g_possibleCpuCount * sizeof(CpuAffinity));
- if (g_cpuToAffinity == NULL)
- {
- goto FAILED;
- }
-
- g_groupToCpuMask = (KAFFINITY*)malloc(g_groupCount * sizeof(KAFFINITY));
- if (g_groupToCpuMask == NULL)
- {
- goto FAILED;
- }
-
- g_groupToCpuCount = (BYTE*)malloc(g_groupCount * sizeof(BYTE));
- if (g_groupToCpuCount == NULL)
- {
- goto FAILED;
- }
-
- memset(g_groupAndIndexToCpu, 0xff, g_groupCount * MaxCpusPerGroup * sizeof(short));
- memset(g_cpuToAffinity, 0xff, g_possibleCpuCount * sizeof(CpuAffinity));
- memset(g_groupToCpuMask, 0, g_groupCount * sizeof(KAFFINITY));
- memset(g_groupToCpuCount, 0, g_groupCount * sizeof(BYTE));
-
- return TRUE;
-
-FAILED:
- FreeLookupArrays();
-
- return FALSE;
-}
-
-/*++
-Function:
- GetFullAffinityMask
-
-Get affinity mask for the specified number of processors with all
-the processors enabled.
---*/
-KAFFINITY GetFullAffinityMask(int cpuCount)
-{
- if ((size_t)cpuCount < sizeof(KAFFINITY) * 8)
- {
- return ((KAFFINITY)1 << (cpuCount)) - 1;
- }
-
- return ~(KAFFINITY)0;
-}
/*++
Function:
@@ -208,73 +95,6 @@ FOR_ALL_NUMA_FUNCTIONS
else
{
g_numaAvailable = true;
-
- struct bitmask *mask = numa_allocate_cpumask();
- int numaNodesCount = numa_max_node() + 1;
-
- g_possibleCpuCount = numa_num_possible_cpus();
- g_cpuCount = 0;
- g_groupCount = 0;
-
- for (int i = 0; i < numaNodesCount; i++)
- {
- int st = numa_node_to_cpus(i, mask);
- // The only failure that can happen is that the mask is not large enough
- // but that cannot happen since the mask was allocated by numa_allocate_cpumask
- _ASSERTE(st == 0);
- unsigned int nodeCpuCount = numa_bitmask_weight(mask);
- g_cpuCount += nodeCpuCount;
- unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
- g_groupCount += nodeGroupCount;
- }
-
- if (!AllocateLookupArrays())
- {
- dlclose(numaHandle);
- return FALSE;
- }
-
- WORD currentGroup = 0;
- int currentGroupCpus = 0;
-
- for (int i = 0; i < numaNodesCount; i++)
- {
- int st = numa_node_to_cpus(i, mask);
- // The only failure that can happen is that the mask is not large enough
- // but that cannot happen since the mask was allocated by numa_allocate_cpumask
- _ASSERTE(st == 0);
- unsigned int nodeCpuCount = numa_bitmask_weight(mask);
- unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
- for (int j = 0; j < g_possibleCpuCount; j++)
- {
- if (numa_bitmask_isbitset(mask, j))
- {
- if (currentGroupCpus == MaxCpusPerGroup)
- {
- g_groupToCpuCount[currentGroup] = MaxCpusPerGroup;
- g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup);
- currentGroupCpus = 0;
- currentGroup++;
- }
- g_cpuToAffinity[j].Node = i;
- g_cpuToAffinity[j].Group = currentGroup;
- g_cpuToAffinity[j].Number = currentGroupCpus;
- g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j;
- currentGroupCpus++;
- }
- }
-
- if (currentGroupCpus != 0)
- {
- g_groupToCpuCount[currentGroup] = currentGroupCpus;
- g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus);
- currentGroupCpus = 0;
- currentGroup++;
- }
- }
-
- numa_free_cpumask(mask);
-
g_highestNumaNode = numa_max_node();
}
}
@@ -282,21 +102,7 @@ FOR_ALL_NUMA_FUNCTIONS
if (!g_numaAvailable)
{
// No NUMA
- g_possibleCpuCount = PAL_GetLogicalCpuCountFromOS();
- g_cpuCount = PAL_GetLogicalCpuCountFromOS();
- g_groupCount = 1;
g_highestNumaNode = 0;
-
- if (!AllocateLookupArrays())
- {
- return FALSE;
- }
-
- for (int i = 0; i < g_possibleCpuCount; i++)
- {
- g_cpuToAffinity[i].Number = i;
- g_cpuToAffinity[i].Group = 0;
- }
}
return TRUE;
@@ -311,7 +117,6 @@ Cleanup of the NUMA support data structures
VOID
NUMASupportCleanup()
{
- FreeLookupArrays();
#if HAVE_NUMA_H
if (g_numaAvailable)
{
@@ -346,493 +151,35 @@ GetNumaHighestNodeNumber(
/*++
Function:
- GetNumaProcessorNodeEx
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetNumaProcessorNodeEx(
- IN PPROCESSOR_NUMBER Processor,
- OUT PUSHORT NodeNumber
-)
-{
- PERF_ENTRY(GetNumaProcessorNodeEx);
- ENTRY("GetNumaProcessorNodeEx(Processor=%p, NodeNumber=%p)\n", Processor, NodeNumber);
+ PAL_GetNumaProcessorNode
- BOOL success = FALSE;
+Abstract
+ Get NUMA node of a processor
- if ((Processor->Group < g_groupCount) &&
- (Processor->Number < MaxCpusPerGroup) &&
- (Processor->Reserved == 0))
- {
- short cpu = g_groupAndIndexToCpu[Processor->Group * MaxCpusPerGroup + Processor->Number];
- if (cpu != -1)
- {
- *NodeNumber = g_cpuToAffinity[cpu].Node;
- success = TRUE;
- }
- }
-
- if (!success)
- {
- *NodeNumber = 0xffff;
- SetLastError(ERROR_INVALID_PARAMETER);
- }
+Parameters:
+ procNo - number of the processor to get the NUMA node for
+ node - the resulting NUMA node
- LOGEXIT("GetNumaProcessorNodeEx returns BOOL %d\n", success);
- PERF_EXIT(GetNumaProcessorNodeEx);
-
- return success;
-}
-
-/*++
-Function:
- GetLogicalProcessorInformationEx
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetLogicalProcessorInformationEx(
- IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType,
- OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer,
- IN OUT PDWORD ReturnedLength
-)
-{
- PERF_ENTRY(GetLogicalProcessorInformationEx);
- ENTRY("GetLogicalProcessorInformationEx(RelationshipType=%d, Buffer=%p, ReturnedLength=%p)\n", RelationshipType, Buffer, ReturnedLength);
-
- BOOL success = FALSE;
-
- if (RelationshipType == RelationGroup)
- {
- size_t requiredSize = __builtin_offsetof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Group);
- requiredSize += __builtin_offsetof(GROUP_RELATIONSHIP, GroupInfo);
- requiredSize += g_groupCount * sizeof(PROCESSOR_GROUP_INFO);
-
- if (*ReturnedLength >= requiredSize)
- {
- Buffer->Relationship = RelationGroup;
- Buffer->Size = requiredSize;
- Buffer->Group.MaximumGroupCount = g_groupCount;
- Buffer->Group.ActiveGroupCount = g_groupCount;
- for (int i = 0; i < g_groupCount; i++)
- {
- Buffer->Group.GroupInfo[i].MaximumProcessorCount = MaxCpusPerGroup;
- Buffer->Group.GroupInfo[i].ActiveProcessorCount = g_groupToCpuCount[i];
- Buffer->Group.GroupInfo[i].ActiveProcessorMask = g_groupToCpuMask[i];
- }
-
- success = TRUE;
- }
- else
- {
- SetLastError(ERROR_INSUFFICIENT_BUFFER);
- }
-
- *ReturnedLength = requiredSize;
- }
- else
- {
- // We only support the group relationship
- SetLastError(ERROR_INVALID_PARAMETER);
- }
-
- LOGEXIT("GetLogicalProcessorInformationEx returns BOOL %d\n", success);
- PERF_EXIT(GetLogicalProcessorInformationEx);
-
- return success;
-}
-
-/*++
-Function:
- GetThreadGroupAffinityInternal
-
-Get the group affinity for the specified pthread
---*/
-BOOL
-GetThreadGroupAffinityInternal(
- IN pthread_t thread,
- OUT PGROUP_AFFINITY GroupAffinity
-)
-{
- BOOL success = FALSE;
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
- cpu_set_t cpuSet;
-
- int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
- if (st == 0)
- {
- WORD group = NO_GROUP;
- KAFFINITY mask = 0;
-
- for (int i = 0; i < g_possibleCpuCount; i++)
- {
- if (CPU_ISSET(i, &cpuSet))
- {
- WORD g = g_cpuToAffinity[i].Group;
- // Unless the thread affinity was already set by SetThreadGroupAffinity, it is possible that
- // the current thread has affinity with processors from multiple groups. So we report just the
- // first group we find.
- if (group == NO_GROUP || g == group)
- {
- group = g;
- mask |= ((KAFFINITY)1) << g_cpuToAffinity[i].Number;
- }
- }
- }
-
- GroupAffinity->Group = group;
- GroupAffinity->Mask = mask;
- success = TRUE;
- }
- else
- {
- SetLastError(ERROR_GEN_FAILURE);
- }
-#else // HAVE_PTHREAD_GETAFFINITY_NP
- // There is no API to manage thread affinity, so let's return a group affinity
- // with all the CPUs on the system.
- GroupAffinity->Group = 0;
- GroupAffinity->Mask = GetFullAffinityMask(g_possibleCpuCount);
- success = TRUE;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
-
- return success;
-}
-
-/*++
-Function:
- GetThreadGroupAffinity
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetThreadGroupAffinity(
- IN HANDLE hThread,
- OUT PGROUP_AFFINITY GroupAffinity
-)
-{
- PERF_ENTRY(GetThreadGroupAffinity);
- ENTRY("GetThreadGroupAffinity(hThread=%p, GroupAffinity=%p)\n", hThread, GroupAffinity);
- CPalThread *pCurrentThread = InternalGetCurrentThread();
- CPalThread *pTargetThread = NULL;
- IPalObject *pTargetThreadObject = NULL;
-
- PAL_ERROR palErr =
- InternalGetThreadDataFromHandle(pCurrentThread, hThread,
- 0, // THREAD_SET_CONTEXT
- &pTargetThread, &pTargetThreadObject);
-
- if (NO_ERROR != palErr)
- {
- ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
- palErr);
- return FALSE;
- }
-
- BOOL success = GetThreadGroupAffinityInternal(
- pTargetThread->GetPThreadSelf(), GroupAffinity);
- LOGEXIT("GetThreadGroupAffinity returns BOOL %d\n", success);
- PERF_EXIT(GetThreadGroupAffinity);
-
- return success;
-}
-
-
-/*++
-Function:
- SetThreadGroupAffinity
-
-See MSDN doc.
+Return value:
+ TRUE if the function was able to get the NUMA node, FALSE if it has failed.
--*/
BOOL
PALAPI
-SetThreadGroupAffinity(
- IN HANDLE hThread,
- IN const GROUP_AFFINITY *GroupAffinity,
- OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity
-)
+PAL_GetNumaProcessorNode(WORD procNo, WORD* node)
{
- PERF_ENTRY(SetThreadGroupAffinity);
- ENTRY("SetThreadGroupAffinity(hThread=%p, GroupAffinity=%p, PreviousGroupAffinity=%p)\n", hThread, GroupAffinity, PreviousGroupAffinity);
-
- CPalThread *pCurrentThread = InternalGetCurrentThread();
- CPalThread *pTargetThread = NULL;
- IPalObject *pTargetThreadObject = NULL;
-
- PAL_ERROR palErr =
- InternalGetThreadDataFromHandle(pCurrentThread, hThread,
- 0, // THREAD_SET_CONTEXT
- &pTargetThread, &pTargetThreadObject);
-
- if (NO_ERROR != palErr)
- {
- ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
- palErr);
- return FALSE;
- }
-
- pthread_t thread = pTargetThread->GetPThreadSelf();
-
- if (PreviousGroupAffinity != NULL)
- {
- GetThreadGroupAffinityInternal(thread, PreviousGroupAffinity);
- }
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
- int groupStartIndex = GroupAffinity->Group * MaxCpusPerGroup;
- KAFFINITY mask = 1;
- cpu_set_t cpuSet;
- CPU_ZERO(&cpuSet);
-
- for (int i = 0; i < MaxCpusPerGroup; i++, mask <<= 1)
- {
- if (GroupAffinity->Mask & mask)
- {
- int cpu = g_groupAndIndexToCpu[groupStartIndex + i];
- if (cpu != -1)
- {
- CPU_SET(cpu, &cpuSet);
- }
- }
- }
-
- int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
- if (st != 0)
- {
- switch (st)
- {
- case EINVAL:
- // There is no processor in the mask that is allowed to execute the process
- SetLastError(ERROR_INVALID_PARAMETER);
- break;
- case ESRCH:
- SetLastError(ERROR_INVALID_HANDLE);
- break;
- default:
- SetLastError(ERROR_GEN_FAILURE);
- break;
- }
- }
-
- BOOL success = (st == 0);
-#else // HAVE_PTHREAD_GETAFFINITY_NP
- // There is no API to manage thread affinity, so let's ignore the request
- BOOL success = TRUE;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
-
- LOGEXIT("SetThreadGroupAffinity returns BOOL %d\n", success);
- PERF_EXIT(SetThreadGroupAffinity);
-
- return success;
-}
-
-/*++
-Function:
- SetThreadAffinityMask
-
-See MSDN doc.
---*/
-DWORD_PTR
-PALAPI
-SetThreadAffinityMask(
- IN HANDLE hThread,
- IN DWORD_PTR dwThreadAffinityMask
-)
-{
- PERF_ENTRY(SetThreadAffinityMask);
- ENTRY("SetThreadAffinityMask(hThread=%p, dwThreadAffinityMask=%p)\n", hThread, dwThreadAffinityMask);
-
- CPalThread *pCurrentThread = InternalGetCurrentThread();
- CPalThread *pTargetThread = NULL;
- IPalObject *pTargetThreadObject = NULL;
-
- PAL_ERROR palErr =
- InternalGetThreadDataFromHandle(pCurrentThread, hThread,
- 0, // THREAD_SET_CONTEXT
- &pTargetThread, &pTargetThreadObject);
-
- if (NO_ERROR != palErr)
- {
- ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
- palErr);
- return 0;
- }
-
- pthread_t thread = pTargetThread->GetPThreadSelf();
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
- cpu_set_t prevCpuSet;
- CPU_ZERO(&prevCpuSet);
- KAFFINITY prevMask = 0;
-
- int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet);
-
- if (st == 0)
- {
- for (int i = 0; i < std::min(8 * (int)sizeof(KAFFINITY), g_possibleCpuCount); i++)
- {
- if (CPU_ISSET(i, &prevCpuSet))
- {
- prevMask |= ((KAFFINITY)1) << i;
- }
- }
- }
-
- cpu_set_t cpuSet;
- CPU_ZERO(&cpuSet);
-
- int cpu = 0;
- while (dwThreadAffinityMask)
- {
- if (dwThreadAffinityMask & 1)
- {
- CPU_SET(cpu, &cpuSet);
- }
- cpu++;
- dwThreadAffinityMask >>= 1;
- }
-
- st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
- if (st != 0)
- {
- switch (st)
- {
- case EINVAL:
- // There is no processor in the mask that is allowed to execute the
- // process
- SetLastError(ERROR_INVALID_PARAMETER);
- break;
- case ESRCH:
- SetLastError(ERROR_INVALID_HANDLE);
- break;
- default:
- SetLastError(ERROR_GEN_FAILURE);
- break;
- }
- }
-
- DWORD_PTR ret = (st == 0) ? prevMask : 0;
-#else // HAVE_PTHREAD_GETAFFINITY_NP
- // There is no API to manage thread affinity, so let's ignore the request
- DWORD_PTR ret = 0;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
- LOGEXIT("SetThreadAffinityMask returns %lu\n", ret);
- PERF_EXIT(SetThreadAffinityMask);
-
- return ret;
-}
-
-/*++
-Function:
- GetCurrentProcessorNumberEx
-
-See MSDN doc.
---*/
-VOID
-PALAPI
-GetCurrentProcessorNumberEx(
- OUT PPROCESSOR_NUMBER ProcNumber
-)
-{
- PERF_ENTRY(GetCurrentProcessorNumberEx);
- ENTRY("GetCurrentProcessorNumberEx(ProcNumber=%p\n", ProcNumber);
-
- DWORD cpu = GetCurrentProcessorNumber();
- _ASSERTE((int)cpu < g_possibleCpuCount);
- ProcNumber->Group = g_cpuToAffinity[cpu].Group;
- ProcNumber->Number = g_cpuToAffinity[cpu].Number;
-
- LOGEXIT("GetCurrentProcessorNumberEx\n");
- PERF_EXIT(GetCurrentProcessorNumberEx);
-}
-
-/*++
-Function:
- GetProcessAffinityMask
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetProcessAffinityMask(
- IN HANDLE hProcess,
- OUT PDWORD_PTR lpProcessAffinityMask,
- OUT PDWORD_PTR lpSystemAffinityMask
-)
-{
- PERF_ENTRY(GetProcessAffinityMask);
- ENTRY("GetProcessAffinityMask(hProcess=%p, lpProcessAffinityMask=%p, lpSystemAffinityMask=%p\n", hProcess, lpProcessAffinityMask, lpSystemAffinityMask);
-
- BOOL success = FALSE;
-
- if (hProcess == GetCurrentProcess())
+#if HAVE_NUMA_H
+ if (g_numaAvailable)
{
- int cpuCountInMask = (g_cpuCount > 64) ? 64 : g_cpuCount;
-
- DWORD_PTR systemMask = GetFullAffinityMask(cpuCountInMask);
-
-#if HAVE_SCHED_GETAFFINITY
- int pid = getpid();
- cpu_set_t cpuSet;
- int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet);
- if (st == 0)
+ int result = numa_node_of_cpu(procNo);
+ if (result >= 0)
{
- DWORD_PTR processMask = 0;
-
- for (int i = 0; i < cpuCountInMask; i++)
- {
- if (CPU_ISSET(i, &cpuSet))
- {
- processMask |= ((DWORD_PTR)1) << i;
- }
- }
-
- success = TRUE;
-
- *lpProcessAffinityMask = processMask;
- *lpSystemAffinityMask = systemMask;
- }
- else if (errno == EINVAL)
- {
- // There are more processors than can fit in a cpu_set_t
- // return all bits set for all processors (upto 64) for both masks.
- *lpProcessAffinityMask = systemMask;
- *lpSystemAffinityMask = systemMask;
- success = TRUE;
- }
- else
- {
- // We should not get any of the errors that the sched_getaffinity can return since none
- // of them applies for the current thread, so this is an unexpected kind of failure.
- SetLastError(ERROR_GEN_FAILURE);
+ *node = (WORD)result;
+ return TRUE;
}
-#else // HAVE_SCHED_GETAFFINITY
- // There is no API to manage thread affinity, so let's return both affinity masks
- // with all the CPUs on the system set.
- *lpSystemAffinityMask = systemMask;
- *lpProcessAffinityMask = systemMask;
-
- success = TRUE;
-#endif // HAVE_SCHED_GETAFFINITY
- }
- else
- {
- // PAL supports getting affinity mask for the current process only
- SetLastError(ERROR_INVALID_PARAMETER);
}
+#endif // HAVE_NUMA_H
- LOGEXIT("GetProcessAffinityMask returns BOOL %d\n", success);
- PERF_EXIT(GetProcessAffinityMask);
-
- return success;
+ return FALSE;
}
/*++
@@ -898,115 +245,3 @@ VirtualAllocExNuma(
return result;
}
-
-/*++
-Function:
- SetThreadIdealProcessorEx
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-SetThreadIdealProcessorEx(
- IN HANDLE hThread,
- IN PPROCESSOR_NUMBER lpIdealProcessor,
- OUT PPROCESSOR_NUMBER lpPreviousIdealProcessor)
-{
- PERF_ENTRY(SetThreadIdealProcessorEx);
- ENTRY("SetThreadIdealProcessorEx(hThread=%p, lpIdealProcessor=%p)\n", hThread, lpIdealProcessor);
-
- CPalThread *pCurrentThread = InternalGetCurrentThread();
- CPalThread *pTargetThread = NULL;
- IPalObject *pTargetThreadObject = NULL;
-
- PAL_ERROR palErr =
- InternalGetThreadDataFromHandle(pCurrentThread, hThread,
- 0, // THREAD_SET_CONTEXT
- &pTargetThread, &pTargetThreadObject);
-
- if (NO_ERROR != palErr)
- {
- ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
- palErr);
- return 0;
- }
-
- pthread_t thread = pTargetThread->GetPThreadSelf();
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
- int cpu = -1;
- if ((lpIdealProcessor->Group < g_groupCount) &&
- (lpIdealProcessor->Number < MaxCpusPerGroup) &&
- (lpIdealProcessor->Reserved == 0))
- {
- cpu = g_groupAndIndexToCpu[lpIdealProcessor->Group * MaxCpusPerGroup + lpIdealProcessor->Number];
- }
-
- if (cpu == -1)
- {
- SetLastError(ERROR_INVALID_PARAMETER);
- return FALSE;
- }
-
- if (lpPreviousIdealProcessor != NULL)
- {
- cpu_set_t prevCpuSet;
- CPU_ZERO(&prevCpuSet);
- DWORD prevCpu = GetCurrentProcessorNumber();
-
- int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet);
-
- if (st == 0)
- {
- for (int i = 0; i < g_possibleCpuCount; i++)
- {
- if (CPU_ISSET(i, &prevCpuSet))
- {
- prevCpu = i;
- break;
- }
- }
- }
-
- _ASSERTE((int)prevCpu < g_possibleCpuCount);
- lpPreviousIdealProcessor->Group = g_cpuToAffinity[prevCpu].Group;
- lpPreviousIdealProcessor->Number = g_cpuToAffinity[prevCpu].Number;
- lpPreviousIdealProcessor->Reserved = 0;
- }
-
- cpu_set_t cpuSet;
- CPU_ZERO(&cpuSet);
- CPU_SET(cpu, &cpuSet);
-
- int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
- if (st != 0)
- {
- switch (st)
- {
- case EINVAL:
- // There is no processor in the mask that is allowed to execute the
- // process
- SetLastError(ERROR_INVALID_PARAMETER);
- break;
- case ESRCH:
- SetLastError(ERROR_INVALID_HANDLE);
- break;
- default:
- SetLastError(ERROR_GEN_FAILURE);
- break;
- }
- }
-
- BOOL success = (st == 0);
-
-#else // HAVE_PTHREAD_GETAFFINITY_NP
- // There is no API to manage thread affinity, so let's ignore the request
- BOOL success = FALSE;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
-
- LOGEXIT("SetThreadIdealProcessorEx returns BOOL %d\n", success);
- PERF_EXIT(SetThreadIdealProcessorEx);
-
- return success;
-}
diff --git a/src/pal/src/numa/numashim.h b/src/pal/src/numa/numashim.h
index dd7f58d6de..e56cfab9d1 100644
--- a/src/pal/src/numa/numashim.h
+++ b/src/pal/src/numa/numashim.h
@@ -13,19 +13,12 @@
#include <numa.h>
#include <numaif.h>
-#define numa_free_cpumask numa_bitmask_free
-
// List of all functions from the numa library that are used
#define FOR_ALL_NUMA_FUNCTIONS \
PER_FUNCTION_BLOCK(numa_available) \
PER_FUNCTION_BLOCK(mbind) \
- PER_FUNCTION_BLOCK(numa_num_possible_cpus) \
PER_FUNCTION_BLOCK(numa_max_node) \
- PER_FUNCTION_BLOCK(numa_allocate_cpumask) \
- PER_FUNCTION_BLOCK(numa_node_to_cpus) \
- PER_FUNCTION_BLOCK(numa_bitmask_weight) \
- PER_FUNCTION_BLOCK(numa_bitmask_isbitset) \
- PER_FUNCTION_BLOCK(numa_bitmask_free)
+ PER_FUNCTION_BLOCK(numa_node_of_cpu)
// Declare pointers to all the used numa functions
#define PER_FUNCTION_BLOCK(fn) extern decltype(fn)* fn##_ptr;
@@ -36,13 +29,8 @@ FOR_ALL_NUMA_FUNCTIONS
// to the functions of libnuma in the initialization.
#define numa_available() numa_available_ptr()
#define mbind(...) mbind_ptr(__VA_ARGS__)
-#define numa_num_possible_cpus() numa_num_possible_cpus_ptr()
#define numa_max_node() numa_max_node_ptr()
-#define numa_allocate_cpumask() numa_allocate_cpumask_ptr()
-#define numa_node_to_cpus(...) numa_node_to_cpus_ptr(__VA_ARGS__)
-#define numa_bitmask_weight(...) numa_bitmask_weight_ptr(__VA_ARGS__)
-#define numa_bitmask_isbitset(...) numa_bitmask_isbitset_ptr(__VA_ARGS__)
-#define numa_bitmask_free(...) numa_bitmask_free_ptr(__VA_ARGS__)
+#define numa_node_of_cpu(...) numa_node_of_cpu_ptr(__VA_ARGS__)
#endif // HAVE_NUMA_H
diff --git a/src/pal/src/thread/thread.cpp b/src/pal/src/thread/thread.cpp
index 86a08639c7..122e86014c 100644
--- a/src/pal/src/thread/thread.cpp
+++ b/src/pal/src/thread/thread.cpp
@@ -64,6 +64,7 @@ SET_DEFAULT_DEBUG_CHANNEL(THREAD); // some headers have code with asserts, so do
#include "pal/fakepoll.h"
#endif // HAVE_POLL
#include <limits.h>
+#include <algorithm>
#if HAVE_SYS_LWP_H
#include <sys/lwp.h>
@@ -2921,3 +2922,95 @@ int CorUnix::CThreadMachExceptionHandlers::GetIndexOfHandler(exception_mask_t bm
}
#endif // HAVE_MACH_EXCEPTIONS
+
+/*++
+Function:
+ PAL_SetCurrentThreadAffinity
+
+Abstract
+ Set affinity of the current thread to the specified processor.
+
+Parameters:
+ procNo - number of the processor to affinitize the current thread to
+
+Return value:
+ TRUE if the function was able to set the affinity, FALSE if it has failed.
+--*/
+BOOL
+PALAPI
+PAL_SetCurrentThreadAffinity(WORD procNo)
+{
+#if HAVE_PTHREAD_GETAFFINITY_NP
+ cpu_set_t cpuSet;
+ CPU_ZERO(&cpuSet);
+
+ int st = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet);
+
+ if (st == 0)
+ {
+ CPU_SET(procNo, &cpuSet);
+ st = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet);
+ }
+
+ return st == 0;
+#else // HAVE_PTHREAD_GETAFFINITY_NP
+ // There is no API to manage thread affinity, so let's ignore the request
+ return FALSE;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
+}
+
+/*++
+Function:
+ PAL_SetCurrentThreadAffinity
+
+Abstract
+ Get affinity set of the current thread. The set is represented by an array of "size" entries of UINT_PTR type.
+
+Parameters:
+ size - number of entries in the "data" array
+ data - pointer to the data of the resulting set, the LSB of the first entry in the array represents processor 0
+
+Return value:
+ TRUE if the function was able to get the affinity set, FALSE if it has failed.
+--*/
+BOOL
+PALAPI
+PAL_GetCurrentThreadAffinitySet(SIZE_T size, UINT_PTR* data)
+{
+ cpu_set_t cpuSet;
+ CPU_ZERO(&cpuSet);
+
+#if HAVE_PTHREAD_GETAFFINITY_NP
+ int st = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet);
+
+ if (st == 0)
+ {
+ const SIZE_T BitsPerBitsetEntry = 8 * sizeof(UINT_PTR);
+ int nrcpus = GetTotalCpuCount();
+
+ // Get info for as much processors as it is possible to fit into the resulting set
+ SIZE_T remainingCount = std::min(size * BitsPerBitsetEntry, (SIZE_T)nrcpus);
+ SIZE_T i = 0;
+ while (remainingCount != 0)
+ {
+ UINT_PTR entry = 0;
+ SIZE_T bitsToCopy = std::min(remainingCount, BitsPerBitsetEntry);
+ SIZE_T cpuSetOffset = i * BitsPerBitsetEntry;
+ for (SIZE_T j = 0; j < bitsToCopy; j++)
+ {
+ if (CPU_ISSET(cpuSetOffset + j, &cpuSet))
+ {
+ entry |= (UINT_PTR)1 << j;
+ }
+ }
+ remainingCount -= bitsToCopy;
+ data[i++] = entry;
+ }
+ }
+
+ return st == 0;
+#else // HAVE_PTHREAD_GETAFFINITY_NP
+ // There is no API to manage thread affinity, so let's ignore the request
+ return FALSE;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
+}
diff --git a/src/utilcode/util.cpp b/src/utilcode/util.cpp
index 61f41d7a22..e7091604af 100644
--- a/src/utilcode/util.cpp
+++ b/src/utilcode/util.cpp
@@ -733,10 +733,17 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr,
return ::VirtualAllocExNuma(hProc, lpAddr, dwSize, allocType, prot, node);
}
+#ifndef FEATURE_PAL
/*static*/ BOOL NumaNodeInfo::GetNumaProcessorNodeEx(PPROCESSOR_NUMBER proc_no, PUSHORT node_no)
{
return ::GetNumaProcessorNodeEx(proc_no, node_no);
}
+#else // !FEATURE_PAL
+/*static*/ BOOL NumaNodeInfo::GetNumaProcessorNodeEx(USHORT proc_no, PUSHORT node_no)
+{
+ return PAL_GetNumaProcessorNode(proc_no, node_no);
+}
+#endif // !FEATURE_PAL
#endif
/*static*/ BOOL NumaNodeInfo::m_enableGCNumaAware = FALSE;
@@ -749,15 +756,6 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr,
if (CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_GCNumaAware) == 0)
return FALSE;
-#ifndef FEATURE_PAL
- // check if required APIs are supported
- HMODULE hMod = GetModuleHandleW(WINDOWS_KERNEL32_DLLNAME_W);
-#else
- HMODULE hMod = GetCLRModule();
-#endif
- if (hMod == NULL)
- return FALSE;
-
// fail to get the highest numa node number
if (!::GetNumaHighestNodeNumber(&highest) || (highest == 0))
return FALSE;
@@ -778,8 +776,10 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr,
m_enableGCNumaAware = InitNumaNodeInfoAPI();
}
+#ifndef FEATURE_PAL
+
//******************************************************************************
-// NumaNodeInfo
+// CPUGroupInfo
//******************************************************************************
#if !defined(FEATURE_REDHAWK)
/*static*/ //CPUGroupInfo::PNTQSIEx CPUGroupInfo::m_pNtQuerySystemInformationEx = NULL;
@@ -1187,6 +1187,7 @@ BOOL CPUGroupInfo::GetCPUGroupRange(WORD group_number, WORD* group_begin, WORD*
LIMITED_METHOD_CONTRACT;
return m_threadUseAllCpuGroups;
}
+#endif // !FEATURE_PAL
//******************************************************************************
// Returns the number of processors that a process has been configured to run on
@@ -1206,6 +1207,8 @@ int GetCurrentProcessCpuCount()
return cCPUs;
unsigned int count = 0;
+
+#ifndef FEATURE_PAL
DWORD_PTR pmask, smask;
if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask))
@@ -1233,18 +1236,20 @@ int GetCurrentProcessCpuCount()
count = 64;
}
-#ifdef FEATURE_PAL
- uint32_t cpuLimit;
+#else // !FEATURE_PAL
+ count = PAL_GetLogicalCpuCountFromOS();
+ uint32_t cpuLimit;
if (PAL_GetCpuLimit(&cpuLimit) && cpuLimit < count)
count = cpuLimit;
-#endif
+#endif // !FEATURE_PAL
cCPUs = count;
return count;
}
+#ifndef FEATURE_PAL
DWORD_PTR GetCurrentProcessCpuMask()
{
CONTRACTL
@@ -1266,6 +1271,7 @@ DWORD_PTR GetCurrentProcessCpuMask()
return 0;
#endif
}
+#endif // !FEATURE_PAL
uint32_t GetOsPageSizeUncached()
{
diff --git a/src/vm/ceemain.cpp b/src/vm/ceemain.cpp
index 1b85649d67..9ac0cc6a71 100644
--- a/src/vm/ceemain.cpp
+++ b/src/vm/ceemain.cpp
@@ -654,8 +654,9 @@ void EEStartupHelper(COINITIEE fFlags)
// Need to do this as early as possible. Used by creating object handle
// table inside Ref_Initialization() before GC is initialized.
NumaNodeInfo::InitNumaNodeInfo();
+#ifndef FEATURE_PAL
CPUGroupInfo::EnsureInitialized();
-
+#endif // !FEATURE_PAL
// Initialize global configuration settings based on startup flags
// This needs to be done before the EE has started
diff --git a/src/vm/eeconfig.cpp b/src/vm/eeconfig.cpp
index e59a85e1e5..6bd0eddf2b 100644
--- a/src/vm/eeconfig.cpp
+++ b/src/vm/eeconfig.cpp
@@ -1226,7 +1226,14 @@ HRESULT EEConfig::sync()
tieredCompilation_StartupTier_CallCountingDelayMs =
CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TC_StartupTier_CallCountingDelayMs);
- if (CPUGroupInfo::HadSingleProcessorAtStartup())
+
+#ifndef FEATURE_PAL
+ bool hadSingleProcessorAtStartup = g_SystemInfo.dwNumberOfProcessors == 1;//CPUGroupInfo::HadSingleProcessorAtStartup();
+#else // !FEATURE_PAL
+ bool hadSingleProcessorAtStartup = g_SystemInfo.dwNumberOfProcessors == 1;
+#endif // !FEATURE_PAL
+
+ if (hadSingleProcessorAtStartup)
{
DWORD delayMultiplier =
CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TC_StartupTier_DelaySingleProcMultiplier);
diff --git a/src/vm/gcenv.os.cpp b/src/vm/gcenv.os.cpp
index a56215a207..8f9e1ba1bb 100644
--- a/src/vm/gcenv.os.cpp
+++ b/src/vm/gcenv.os.cpp
@@ -32,6 +32,10 @@ uint32_t g_pageSizeUnixInl = 0;
static AffinitySet g_processAffinitySet;
+#ifdef FEATURE_PAL
+static uint32_t g_currentProcessCpuCount;
+#endif // FEATURE_PAL
+
class GroupProcNo
{
uint16_t m_groupProc;
@@ -106,8 +110,21 @@ bool GCToOSInterface::Initialize()
#ifdef FEATURE_PAL
g_pageSizeUnixInl = GetOsPageSize();
-#endif
+ g_currentProcessCpuCount = PAL_GetLogicalCpuCountFromOS();
+ if (PAL_GetCurrentThreadAffinitySet(AffinitySet::BitsetDataSize, g_processAffinitySet.GetBitsetData()))
+ {
+ assert(g_currentProcessCpuCount == g_processAffinitySet.Count());
+ }
+ else
+ {
+ // There is no way to get affinity on the current OS, set the affinity set to reflect all processors
+ for (size_t i = 0; i < g_currentProcessCpuCount; i++)
+ {
+ g_processAffinitySet.Add(i);
+ }
+ }
+#else // FEATURE_PAL
if (CPUGroupInfo::CanEnableGCCPUGroups())
{
// When CPU groups are enabled, then the process is not bound by the process affinity set at process launch.
@@ -135,6 +152,7 @@ bool GCToOSInterface::Initialize()
}
}
}
+#endif // FEATURE_PAL
return true;
}
@@ -175,7 +193,7 @@ bool GCToOSInterface::SetCurrentThreadIdealAffinity(uint16_t srcProcNo, uint16_t
LIMITED_METHOD_CONTRACT;
bool success = true;
-
+#ifndef FEATURE_PAL
GroupProcNo srcGroupProcNo(srcProcNo);
GroupProcNo dstGroupProcNo(dstProcNo);
@@ -202,7 +220,6 @@ bool GCToOSInterface::SetCurrentThreadIdealAffinity(uint16_t srcProcNo, uint16_t
success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
}
-#if !defined(FEATURE_PAL)
else
{
if (GetThreadIdealProcessorEx(GetCurrentThread(), &proc))
@@ -211,10 +228,13 @@ bool GCToOSInterface::SetCurrentThreadIdealAffinity(uint16_t srcProcNo, uint16_t
success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, &proc);
}
}
-#endif // !defined(FEATURE_PAL)
-#endif
-
+#endif // !FEATURE_CORESYSTEM
return success;
+
+#else // !FEATURE_PAL
+ return GCToOSInterface::SetThreadAffinity(dstProcNo);
+
+#endif // !FEATURE_PAL
}
// Get the number of the current processor
@@ -472,7 +492,7 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
{
LIMITED_METHOD_CONTRACT;
-
+#ifndef FEATURE_PAL
GroupProcNo groupProcNo(procNo);
if (groupProcNo.GetGroup() != GroupProcNo::NoGroup)
@@ -489,6 +509,9 @@ bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
{
return !!SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)1 << groupProcNo.GetProcIndex());
}
+#else // FEATURE_PAL
+ return PAL_SetCurrentThreadAffinity(procNo);
+#endif // FEATURE_PAL
}
// Boosts the calling thread's thread priority to a level higher than the default
@@ -510,7 +533,9 @@ bool GCToOSInterface::BoostThreadPriority()
// set of enabled processors
const AffinitySet* GCToOSInterface::SetGCThreadsAffinitySet(uintptr_t configAffinityMask, const AffinitySet* configAffinitySet)
{
+#ifndef FEATURE_PAL
if (CPUGroupInfo::CanEnableGCCPUGroups())
+#endif // !FEATURE_PAL
{
if (!configAffinitySet->IsEmpty())
{
@@ -524,6 +549,7 @@ const AffinitySet* GCToOSInterface::SetGCThreadsAffinitySet(uintptr_t configAffi
}
}
}
+#ifndef FEATURE_PAL
else
{
if (configAffinityMask != 0)
@@ -538,6 +564,7 @@ const AffinitySet* GCToOSInterface::SetGCThreadsAffinitySet(uintptr_t configAffi
}
}
}
+#endif // !FEATURE_PAL
return &g_processAffinitySet;
}
@@ -549,10 +576,14 @@ uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
{
LIMITED_METHOD_CONTRACT;
+#ifndef FEATURE_PAL
// GetCurrentProcessCpuCount only returns up to 64 procs.
return CPUGroupInfo::CanEnableGCCPUGroups() ?
GCToOSInterface::GetTotalProcessorCount():
::GetCurrentProcessCpuCount();
+#else // !FEATURE_PAL
+ return g_currentProcessCpuCount;
+#endif // !FEATURE_PAL
}
// Return the size of the user-mode portion of the virtual address space of this process.
@@ -887,6 +918,7 @@ uint32_t GCToOSInterface::GetTotalProcessorCount()
{
LIMITED_METHOD_CONTRACT;
+#ifndef FEATURE_PAL
if (CPUGroupInfo::CanEnableGCCPUGroups())
{
return CPUGroupInfo::GetNumActiveProcessors();
@@ -895,6 +927,9 @@ uint32_t GCToOSInterface::GetTotalProcessorCount()
{
return g_SystemInfo.dwNumberOfProcessors;
}
+#else // !FEATURE_PAL
+ return g_currentProcessCpuCount;
+#endif // !FEATURE_PAL
}
bool GCToOSInterface::CanEnableGCNumaAware()
@@ -904,20 +939,6 @@ bool GCToOSInterface::CanEnableGCNumaAware()
return NumaNodeInfo::CanEnableGCNumaAware() != FALSE;
}
-bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
-{
- LIMITED_METHOD_CONTRACT;
-
- GroupProcNo groupProcNo(proc_no);
-
- PROCESSOR_NUMBER procNumber;
- procNumber.Group = groupProcNo.GetGroup();
- procNumber.Number = (BYTE)groupProcNo.GetProcIndex();
- procNumber.Reserved = 0;
-
- return NumaNodeInfo::GetNumaProcessorNodeEx(&procNumber, node_no) != FALSE;
-}
-
// Get processor number and optionally its NUMA node number for the specified heap number
// Parameters:
// heap_number - heap number to get the result for
@@ -929,53 +950,76 @@ bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_n
{
bool success = false;
- if (CPUGroupInfo::CanEnableGCCPUGroups())
+ // Locate heap_number-th available processor
+ uint16_t procNumber;
+ size_t cnt = heap_number;
+ for (uint16_t i = 0; i < GCToOSInterface::GetTotalProcessorCount(); i++)
{
- uint16_t gn, gpn;
- CPUGroupInfo::GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
+ if (g_processAffinitySet.Contains(i))
+ {
+ if (cnt == 0)
+ {
+ procNumber = i;
+ success = true;
+ break;
+ }
+
+ cnt--;
+ }
+ }
+
+ if (success)
+ {
+#ifndef FEATURE_PAL
+ WORD gn, gpn;
+
+ if (CPUGroupInfo::CanEnableGCCPUGroups())
+ {
+ CPUGroupInfo::GetGroupForProcessor(procNumber, &gn, &gpn);
+ }
+ else
+ {
+ gn = GroupProcNo::NoGroup;
+ gpn = procNumber;
+ }
+
+ GroupProcNo groupProcNo(gn, gpn);
+ *proc_no = groupProcNo.GetCombinedValue();
- *proc_no = GroupProcNo(gn, gpn).GetCombinedValue();
if (GCToOSInterface::CanEnableGCNumaAware())
{
- if (!GCToOSInterface::GetNumaProcessorNode(*proc_no, node_no))
+ PROCESSOR_NUMBER procNumber;
+
+ if (CPUGroupInfo::CanEnableGCCPUGroups())
+ {
+ procNumber.Group = gn;
+ }
+ else
+ {
+ // Get the current processor group
+ PROCESSOR_NUMBER procNumber;
+ GetCurrentProcessorNumberEx(&procNumber);
+ }
+
+ procNumber.Number = (BYTE)gpn;
+ procNumber.Reserved = 0;
+
+ if (NumaNodeInfo::GetNumaProcessorNodeEx(&procNumber, node_no))
{
*node_no = NUMA_NODE_UNDEFINED;
}
}
else
{ // no numa setting, each cpu group is treated as a node
- *node_no = gn;
+ *node_no = groupProcNo.GetGroup();
}
-
- success = true;
- }
- else
- {
- int bit_number = 0;
- uint8_t proc_number = 0;
- for (uintptr_t mask = 1; mask != 0; mask <<= 1)
+#else // !FEATURE_PAL
+ *proc_no = procNumber;
+ if (!GCToOSInterface::CanEnableGCNumaAware() || !NumaNodeInfo::GetNumaProcessorNodeEx(procNumber, (WORD*)node_no))
{
- if (g_processAffinitySet.Contains(proc_number))
- {
- if (bit_number == heap_number)
- {
- *proc_no = GroupProcNo(GroupProcNo::NoGroup, proc_number).GetCombinedValue();
-
- if (GCToOSInterface::CanEnableGCNumaAware())
- {
- if (!GCToOSInterface::GetNumaProcessorNode(proc_number, node_no))
- {
- *node_no = NUMA_NODE_UNDEFINED;
- }
- }
-
- success = true;
- break;
- }
- bit_number++;
- }
- proc_number++;
+ *node_no = NUMA_NODE_UNDEFINED;
}
+#endif // !FEATURE_PAL
}
return success;
@@ -993,6 +1037,7 @@ bool GCToOSInterface::ParseGCHeapAffinitizeRangesEntry(const char** config_strin
size_t index_offset = 0;
char* number_end;
+#ifndef FEATURE_PAL
size_t group_number = strtoul(*config_string, &number_end, 10);
if ((number_end == *config_string) || (*number_end != ':'))
@@ -1011,6 +1056,7 @@ bool GCToOSInterface::ParseGCHeapAffinitizeRangesEntry(const char** config_strin
index_offset = group_begin;
*config_string = number_end + 1;
+#endif // !FEATURE_PAL
size_t start, end;
if (!ParseIndexOrRange(config_string, &start, &end))
@@ -1018,11 +1064,13 @@ bool GCToOSInterface::ParseGCHeapAffinitizeRangesEntry(const char** config_strin
return false;
}
+#ifndef FEATURE_PAL
if ((start >= group_size) || (end >= group_size))
{
// Invalid CPU index values or range
return false;
}
+#endif // !FEATURE_PAL
*start_index = index_offset + start;
*end_index = index_offset + end;
diff --git a/src/vm/threads.cpp b/src/vm/threads.cpp
index 6890290caa..43976a91ff 100644
--- a/src/vm/threads.cpp
+++ b/src/vm/threads.cpp
@@ -451,7 +451,7 @@ void Thread::ChooseThreadCPUGroupAffinity()
GC_TRIGGERS;
}
CONTRACTL_END;
-
+#ifndef FEATURE_PAL
if (!CPUGroupInfo::CanEnableGCCPUGroups() || !CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
return;
@@ -471,6 +471,7 @@ void Thread::ChooseThreadCPUGroupAffinity()
CPUGroupInfo::SetThreadGroupAffinity(GetThreadHandle(), &groupAffinity, NULL);
m_wCPUGroup = groupAffinity.Group;
m_pAffinityMask = groupAffinity.Mask;
+#endif // !FEATURE_PAL
}
void Thread::ClearThreadCPUGroupAffinity()
@@ -481,7 +482,7 @@ void Thread::ClearThreadCPUGroupAffinity()
GC_NOTRIGGER;
}
CONTRACTL_END;
-
+#ifndef FEATURE_PAL
if (!CPUGroupInfo::CanEnableGCCPUGroups() || !CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
return;
@@ -499,6 +500,7 @@ void Thread::ClearThreadCPUGroupAffinity()
m_wCPUGroup = 0;
m_pAffinityMask = 0;
+#endif // !FEATURE_PAL
}
DWORD Thread::StartThread()
@@ -1561,8 +1563,10 @@ Thread::Thread()
m_fGCSpecial = FALSE;
+#ifndef FEATURE_PAL
m_wCPUGroup = 0;
m_pAffinityMask = 0;
+#endif // !FEATURE_PAL
m_pAllLoggedTypes = NULL;
diff --git a/src/vm/threads.h b/src/vm/threads.h
index 94ce275604..e5307d9a5d 100644
--- a/src/vm/threads.h
+++ b/src/vm/threads.h
@@ -4824,9 +4824,10 @@ public:
void SetGCSpecial(bool fGCSpecial);
private:
+#ifndef FEATURE_PAL
WORD m_wCPUGroup;
DWORD_PTR m_pAffinityMask;
-
+#endif // !FEATURE_PAL
public:
void ChooseThreadCPUGroupAffinity();
void ClearThreadCPUGroupAffinity();
diff --git a/src/vm/win32threadpool.cpp b/src/vm/win32threadpool.cpp
index 29c1d21c99..09a3a07745 100644
--- a/src/vm/win32threadpool.cpp
+++ b/src/vm/win32threadpool.cpp
@@ -345,12 +345,16 @@ BOOL ThreadpoolMgr::Initialize()
UnManagedPerAppDomainTPCount* pADTPCount;
pADTPCount = PerAppDomainTPCountList::GetUnmanagedTPCount();
+#ifndef FEATURE_PAL
//ThreadPool_CPUGroup
CPUGroupInfo::EnsureInitialized();
if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
NumberOfProcessors = CPUGroupInfo::GetNumActiveProcessors();
else
NumberOfProcessors = GetCurrentProcessCpuCount();
+#else // !FEATURE_PAL
+ NumberOfProcessors = GetCurrentProcessCpuCount();
+#endif // !FEATURE_PAL
InitPlatformVariables();
EX_TRY
@@ -380,20 +384,15 @@ BOOL ThreadpoolMgr::Initialize()
RetiredWorkerSemaphore = new CLRLifoSemaphore();
RetiredWorkerSemaphore->Create(0, ThreadCounter::MaxPossibleCount);
+#ifndef FEATURE_PAL
//ThreadPool_CPUGroup
if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
RecycledLists.Initialize( CPUGroupInfo::GetNumActiveProcessors() );
else
RecycledLists.Initialize( g_SystemInfo.dwNumberOfProcessors );
- /*
- {
- SYSTEM_INFO sysInfo;
-
- ::GetSystemInfo( &sysInfo );
-
- RecycledLists.Initialize( sysInfo.dwNumberOfProcessors );
- }
- */
+#else // !FEATURE_PAL
+ RecycledLists.Initialize( g_SystemInfo.dwNumberOfProcessors );
+#endif // !FEATURE_PAL
}
EX_CATCH
{
@@ -4095,9 +4094,10 @@ DWORD WINAPI ThreadpoolMgr::GateThreadStart(LPVOID lpArgs)
return 0;
}
+#ifndef FEATURE_PAL
//GateThread can start before EESetup, so ensure CPU group information is initialized;
CPUGroupInfo::EnsureInitialized();
-
+#endif // !FEATURE_PAL
// initialize CPU usage information structure;
prevCPUInfo.idleTime.QuadPart = 0;
prevCPUInfo.kernelTime.QuadPart = 0;
diff --git a/src/vm/win32threadpool.h b/src/vm/win32threadpool.h
index bb6ebc0613..55f321c37f 100644
--- a/src/vm/win32threadpool.h
+++ b/src/vm/win32threadpool.h
@@ -735,12 +735,22 @@ public:
{
LIMITED_METHOD_CONTRACT;
+ DWORD processorNumber = 0;
+
+#ifndef FEATURE_PAL
if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
- return pRecycledListPerProcessor[CPUGroupInfo::CalculateCurrentProcessorNumber()][memType];
+ processorNumber = CPUGroupInfo::CalculateCurrentProcessorNumber();
else
// Turns out GetCurrentProcessorNumber can return a value greater than the number of processors reported by
// GetSystemInfo, if we're running in WOW64 on a machine with >32 processors.
- return pRecycledListPerProcessor[GetCurrentProcessorNumber()%NumberOfProcessors][memType];
+ processorNumber = GetCurrentProcessorNumber()%NumberOfProcessors;
+#else // !FEATURE_PAL
+ if (PAL_HasGetCurrentProcessorNumber())
+ {
+ processorNumber = GetCurrentProcessorNumber();
+ }
+#endif // !FEATURE_PAL
+ return pRecycledListPerProcessor[processorNumber][memType];
}
};