Remove Unix CPU groups emulation

This change removes CPU groups emulation from Unix PAL and modifies the GC and thread pool code accordingly.
author: Jan Vorlicek <janvorli@microsoft.com> 2019-04-05 02:53:32 +0200
committer: Jan Vorlicek <janvorli@microsoft.com> 2019-04-09 01:26:55 +0200
commit: 060990945a0117cb824b9d1c1e6c3a336533a3ee (patch)
tree: e762ccb63b9843ccf75d6f87b6ab14cfe352d40c /src/gc/unix
parent: 187410154a323024938a3281fb5a49f5e59ec350 (diff)
download: coreclr-060990945a0117cb824b9d1c1e6c3a336533a3ee.tar.gz
coreclr-060990945a0117cb824b9d1c1e6c3a336533a3ee.tar.bz2
coreclr-060990945a0117cb824b9d1c1e6c3a336533a3ee.zip
1 files changed, 124 insertions, 13 deletions
diff --git a/src/gc/unix/gcenv.unix.cpp b/src/gc/unix/gcenv.unix.cpp
index c71d211e01..a6d56f2433 100644
--- a/src/gc/unix/gcenv.unix.cpp
+++ b/src/gc/unix/gcenv.unix.cpp
@@ -55,6 +55,33 @@
 #include "globals.h"
 #include "cgroup.h"
 
+#if HAVE_NUMA_H
+
+#include <numa.h>
+#include <numaif.h>
+#include <dlfcn.h>
+
+// List of all functions from the numa library that are used
+#define FOR_ALL_NUMA_FUNCTIONS \
+    PER_FUNCTION_BLOCK(mbind) \
+    PER_FUNCTION_BLOCK(numa_available) \
+    PER_FUNCTION_BLOCK(numa_max_node) \
+    PER_FUNCTION_BLOCK(numa_node_of_cpu)
+
+// Declare pointers to all the used numa functions
+#define PER_FUNCTION_BLOCK(fn) extern decltype(fn)* fn##_ptr;
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+
+// Redefine all calls to numa functions as calls through pointers that are set
+// to the functions of libnuma in the initialization.
+#define mbind(...) mbind_ptr(__VA_ARGS__)
+#define numa_available() numa_available_ptr()
+#define numa_max_node() numa_max_node_ptr()
+#define numa_node_of_cpu(...) numa_node_of_cpu_ptr(__VA_ARGS__)
+
+#endif // HAVE_NUMA_H
+
 #if defined(_ARM_) || defined(_ARM64_)
 #define SYSCONF_GET_NUMPROCS _SC_NPROCESSORS_CONF
 #else
@@ -109,6 +136,74 @@ uint32_t g_pageSizeUnixInl = 0;
 
 AffinitySet g_processAffinitySet;
 
+#if HAVE_CPUSET_T
+typedef cpuset_t cpu_set_t;
+#endif
+
+// The highest NUMA node available
+int g_highestNumaNode = 0;
+// Is numa available
+bool g_numaAvailable = false;
+
+void* g_numaHandle = nullptr;
+
+#if HAVE_NUMA_H
+#define PER_FUNCTION_BLOCK(fn) decltype(fn)* fn##_ptr;
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+#endif // HAVE_NUMA_H
+
+
+// Initialize data structures for getting and setting thread affinities to processors and
+// querying NUMA related processor information.
+// On systems with no NUMA support, it behaves as if there was a single NUMA node with
+// a single group of processors.
+void NUMASupportInitialize()
+{
+#if HAVE_NUMA_H
+    g_numaHandle = dlopen("libnuma.so", RTLD_LAZY);
+    if (g_numaHandle == 0)
+    {
+        g_numaHandle = dlopen("libnuma.so.1", RTLD_LAZY);
+    }
+    if (g_numaHandle != 0)
+    {
+        dlsym(g_numaHandle, "numa_allocate_cpumask");
+#define PER_FUNCTION_BLOCK(fn) \
+    fn##_ptr = (decltype(fn)*)dlsym(g_numaHandle, #fn); \
+    if (fn##_ptr == NULL) { fprintf(stderr, "Cannot get symbol " #fn " from libnuma\n"); abort(); }
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+
+        if (numa_available() == -1)
+        {
+            dlclose(g_numaHandle);
+        }
+        else
+        {
+            g_numaAvailable = true;
+            g_highestNumaNode = numa_max_node();
+        }
+    }
+#endif // HAVE_NUMA_H
+    if (!g_numaAvailable)
+    {
+        // No NUMA
+        g_highestNumaNode = 0;
+    }
+}
+
+// Cleanup of the NUMA support data structures
+void NUMASupportCleanup()
+{
+#if HAVE_NUMA_H
+    if (g_numaAvailable)
+    {
+        dlclose(g_numaHandle);
+    }
+#endif // HAVE_NUMA_H
+}
+
 // Initialize the interface implementation
 // Return:
 //  true if it has succeeded, false if it has failed
@@ -221,6 +316,8 @@ bool GCToOSInterface::Initialize()
 
 #endif // HAVE_SCHED_GETAFFINITY
 
+    NUMASupportInitialize();
+
     return true;
 }
 
@@ -235,6 +332,7 @@ void GCToOSInterface::Shutdown()
     munmap(g_helperPage, OS_PAGE_SIZE);
 
     CleanupCGroup();
+    NUMASupportCleanup();
 }
 
 // Get numeric id of the current thread if possible on the
@@ -468,8 +566,29 @@ void* GCToOSInterface::VirtualReserveAndCommitLargePages(size_t size)
 //  true if it has succeeded, false if it has failed
 bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint16_t node)
 {
-    assert(node == NUMA_NODE_UNDEFINED && "Numa allocation is not ported to local GC on unix yet");
-    return mprotect(address, size, PROT_WRITE | PROT_READ) == 0;
+    bool success = mprotect(address, size, PROT_WRITE | PROT_READ) == 0;
+
+#if HAVE_NUMA_H
+    if (success && g_numaAvailable && (node != NUMA_NODE_UNDEFINED))
+    {
+        if ((int)node <= g_highestNumaNode)
+        {
+            int nodeMaskLength = (g_highestNumaNode + 1 + sizeof(unsigned long) - 1) / sizeof(unsigned long);
+            unsigned long *nodeMask = (unsigned long*)alloca(nodeMaskLength * sizeof(unsigned long));
+            memset(nodeMask, 0, nodeMaskLength);
+
+            int index = node / sizeof(unsigned long);
+            int mask = ((unsigned long)1) << (node & (sizeof(unsigned long) - 1));
+            nodeMask[index] = mask;
+
+            int st = mbind(address, size, MPOL_PREFERRED, nodeMask, g_highestNumaNode, 0);
+            assert(st == 0);
+            // If the mbind fails, we still return the allocated memory since the node is just a hint
+        }
+    }
+#endif // HAVE_NUMA_H
+
+    return success;
 }
 
 // Decomit virtual memory range.
@@ -775,13 +894,7 @@ uint32_t GCToOSInterface::GetTotalProcessorCount()
 
 bool GCToOSInterface::CanEnableGCNumaAware()
 {
-    return false;
-}
-
-bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
-{
-    assert(!"Numa has not been ported to local GC for unix");
-    return false;
+    return g_numaAvailable;
 }
 
 // Get processor number and optionally its NUMA node number for the specified heap number
@@ -806,10 +919,8 @@ bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_n
 
                 if (GCToOSInterface::CanEnableGCNumaAware())
                 {
-                    if (!GCToOSInterface::GetNumaProcessorNode(procNumber, node_no))
-                    {
-                        *node_no = NUMA_NODE_UNDEFINED;
-                    }
+                    int result = numa_node_of_cpu(procNumber);
+                    *node_no = (result >= 0) ? (uint16_t)result : NUMA_NODE_UNDEFINED;
                 }
                 else
                 {
author	Jan Vorlicek <janvorli@microsoft.com>	2019-04-05 02:53:32 +0200
committer	Jan Vorlicek <janvorli@microsoft.com>	2019-04-09 01:26:55 +0200
commit	060990945a0117cb824b9d1c1e6c3a336533a3ee (patch)
tree	e762ccb63b9843ccf75d6f87b6ab14cfe352d40c /src/gc/unix
parent	187410154a323024938a3281fb5a49f5e59ec350 (diff)
download	coreclr-060990945a0117cb824b9d1c1e6c3a336533a3ee.tar.gz coreclr-060990945a0117cb824b9d1c1e6c3a336533a3ee.tar.bz2 coreclr-060990945a0117cb824b9d1c1e6c3a336533a3ee.zip