Remove CPU groups handling from the gc.cpp

This change removes all explicit manipulation and handling of CPU groups from the gc.cpp and hides it behind the GCToOSInterface. This is a step to prepare for removing the CPU groups emulation on Unix. In fact, I've already updated the standalone Unix GC to be able to affinitize GC threads to any subset of CPUs and added previously missing support for the affinity setting itself. The NUMA support still remains missing there. It also adds a new way to specify GC thread affinitization that is not limited to 64 threads. Instead of affinity mask stored in 64 bit integer, we now use a bitset that can contain as many processors as GC can support. And there is a new GC config to specify the affinity in a form of a range list.
author: Jan Vorlicek <janvorli@microsoft.com> 2019-03-25 20:52:07 +0100
committer: Jan Vorlicek <janvorli@microsoft.com> 2019-04-03 21:12:02 +0200
commit: 67f1116cea421ebbe516fafc963e213cc6f0e117 (patch)
tree: 671983515e49105c60ba376a79bbd28020ce766c /src/gc
parent: 17add60f708fa55a5447a67851783ab98e01f0cf (diff)
download: coreclr-67f1116cea421ebbe516fafc963e213cc6f0e117.tar.gz
coreclr-67f1116cea421ebbe516fafc963e213cc6f0e117.tar.bz2
coreclr-67f1116cea421ebbe516fafc963e213cc6f0e117.zip
8 files changed, 532 insertions, 443 deletions
diff --git a/src/gc/env/gcenv.base.h b/src/gc/env/gcenv.base.h
index e99149eb6f..35a8a0b796 100644
--- a/src/gc/env/gcenv.base.h
+++ b/src/gc/env/gcenv.base.h
@@ -71,6 +71,7 @@ inline HRESULT HRESULT_FROM_WIN32(unsigned long x)
 #define E_FAIL                  0x80004005
 #define E_OUTOFMEMORY           0x8007000E
 #define COR_E_EXECUTIONENGINE   0x80131506
+#define CLR_E_GC_BAD_AFFINITY_CONFIG 0x8013200A
 
 #define NOERROR                 0x0
 #define ERROR_TIMEOUT           1460
diff --git a/src/gc/env/gcenv.os.h b/src/gc/env/gcenv.os.h
index 3224f71d9d..f4dddb86f5 100644
--- a/src/gc/env/gcenv.os.h
+++ b/src/gc/env/gcenv.os.h
@@ -24,7 +24,7 @@
 #define YieldProcessor System_YieldProcessor
 #endif
 
-#define NUMA_NODE_UNDEFINED UINT32_MAX
+#define NUMA_NODE_UNDEFINED UINT16_MAX
 
 // Critical section used by the GC
 class CLRCriticalSection
@@ -55,17 +55,6 @@ struct VirtualReserveFlags
     };
 };
 
-// Affinity of a GC thread
-struct GCThreadAffinity
-{
-    static const int None = -1;
-
-    // Processor group index, None if no group is specified
-    int Group;
-    // Processor index, None if no affinity is specified
-    int Processor;
-};
-
 // An event is a synchronization object whose state can be set and reset
 // indicating that an event has occured. It is used pervasively throughout
 // the GC.
@@ -149,6 +138,86 @@ public:
 // GC thread function prototype
 typedef void (*GCThreadFunction)(void* param);
 
+#ifdef BIT64
+// Right now we support maximum 1024 procs - meaning that we will create at most
+// that many GC threads and GC heaps.
+#define MAX_SUPPORTED_CPUS 1024
+#else
+#define MAX_SUPPORTED_CPUS 64
+#endif // BIT64
+
+// Add of processor indices used to store affinity.
+class AffinitySet
+{
+    static const size_t BitsPerBitsetEntry = 8 * sizeof(uintptr_t);
+
+    uintptr_t m_bitset[MAX_SUPPORTED_CPUS / BitsPerBitsetEntry];
+
+    uintptr_t GetBitsetEntryMask(size_t cpuIndex)
+    {
+        return (uintptr_t)1 << (cpuIndex & (BitsPerBitsetEntry - 1));
+    }
+
+    size_t GetBitsetEntryIndex(size_t cpuIndex)
+    {
+        return cpuIndex / BitsPerBitsetEntry;
+    }
+
+public:
+
+    AffinitySet()
+    {
+        memset(m_bitset, 0, sizeof(m_bitset));
+    }
+
+    // Check if the set contains a processor
+    bool Contains(size_t cpuIndex)
+    {
+        return (m_bitset[GetBitsetEntryIndex(cpuIndex)] & GetBitsetEntryMask(cpuIndex)) != 0;
+    }
+
+    // Add a processor to the set
+    void Add(size_t cpuIndex)
+    {
+        m_bitset[GetBitsetEntryIndex(cpuIndex)] |= GetBitsetEntryMask(cpuIndex);
+    }
+
+    // Remove a processor from the set
+    void Remove(size_t cpuIndex)
+    {
+        m_bitset[GetBitsetEntryIndex(cpuIndex)] &= ~GetBitsetEntryMask(cpuIndex);
+    }
+
+    // Check if the set is empty
+    bool IsEmpty()
+    {
+        for (size_t i = 0; i < MAX_SUPPORTED_CPUS / BitsPerBitsetEntry; i++)
+        {
+            if (m_bitset[i] != 0)
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    // Return number of processors in the affinity set
+    size_t Count()
+    {
+        size_t count = 0;
+        for (size_t i = 0; i < MAX_SUPPORTED_CPUS; i++)
+        {
+            if (Contains(i))
+            {
+                count++;
+            }
+        }
+
+        return count;
+    }
+};
+
 // Interface that the GC uses to invoke OS specific functionality
 class GCToOSInterface
 {
@@ -202,7 +271,7 @@ public:
     //  size    - size of the virtual memory range
     // Return:
     //  true if it has succeeded, false if it has failed
-    static bool VirtualCommit(void *address, size_t size, uint32_t node = NUMA_NODE_UNDEFINED);
+    static bool VirtualCommit(void *address, size_t size, uint16_t node = NUMA_NODE_UNDEFINED);
 
     // Decomit virtual memory range.
     // Parameters:
@@ -267,13 +336,13 @@ public:
     // Check if the OS supports getting current processor number
     static bool CanGetCurrentProcessorNumber();
 
-    // Set ideal processor for the current thread
+    // Add ideal processor for the current thread
     // Parameters:
-    //  processorIndex - index of the processor in the group
-    //  affinity - ideal processor affinity for the thread
+    //  srcProcNo - processor number the thread currently runs on
+    //  dstProcNo - processor number the thread should be migrated to
     // Return:
     //  true if it has succeeded, false if it has failed
-    static bool SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity);
+    static bool MigrateThread(uint16_t srcProcNo, uint16_t dstProcNo);
 
     // Get numeric id of the current thread if possible on the
     // current platform. It is indended for logging purposes only.
@@ -303,14 +372,13 @@ public:
     //  The number of processors
     static uint32_t GetCurrentProcessCpuCount();
 
-    // Sets the calling thread's affinity to only run on the processor specified
-    // in the GCThreadAffinity structure.
+    // Sets the calling thread's affinity to only run on the processor specified.
     // Parameters:
-    //  affinity - The requested affinity for the calling thread. At most one processor
-    //             can be provided.
+    //  procNo - The requested affinity for the calling thread.
+    //
     // Return:
     //  true if setting the affinity was successful, false otherwise.
-    static bool SetThreadAffinity(GCThreadAffinity* affinity);
+    static bool SetThreadAffinity(uint16_t procNo);
 
     // Boosts the calling thread's thread priority to a level higher than the default
     // for new threads.
@@ -320,20 +388,10 @@ public:
     //  true if the priority boost was successful, false otherwise.
     static bool BoostThreadPriority();
 
-    // Get affinity mask of the current process
-    // Parameters:
-    //  processMask - affinity mask for the specified process
-    //  systemMask  - affinity mask for the system
+    // Get set of processors enabled for GC for the current process
     // Return:
-    //  true if it has succeeded, false if it has failed
-    // Remarks:
-    //  A process affinity mask is a bit vector in which each bit represents the processors that
-    //  a process is allowed to run on. A system affinity mask is a bit vector in which each bit
-    //  represents the processors that are configured into a system.
-    //  A process affinity mask is a subset of the system affinity mask. A process is only allowed
-    //  to run on the processors configured into a system. Therefore, the process affinity mask cannot
-    //  specify a 1 bit for a processor when the system affinity mask specifies a 0 bit for that processor.
-    static bool GetCurrentProcessAffinityMask(uintptr_t *processMask, uintptr_t *systemMask);
+    //  set of enabled processors
+    static AffinitySet* GetCurrentProcessAffinitySet();
 
     //
     // Global memory info
@@ -408,13 +466,16 @@ public:
     static bool CanEnableGCNumaAware();
 
     // Gets the NUMA node for the processor
-    static bool GetNumaProcessorNode(PPROCESSOR_NUMBER proc_no, uint16_t *node_no);
-
-    // Are CPU groups enabled
-    static bool CanEnableGCCPUGroups();
+    static bool GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no);
 
-    // Get the CPU group for the specified processor
-    static void GetGroupForProcessor(uint16_t processor_number, uint16_t* group_number, uint16_t* group_processor_number);
+    // Get processor number and optionally its NUMA node number for the specified heap number
+    // Parameters:
+    //  heap_number - heap number to get the result for
+    //  proc_no     - set to the selected processor number
+    //  node_no     - set to the NUMA node of the selected processor or to NUMA_NODE_UNDEFINED
+    // Return:
+    //  true if it succeeded
+    static bool GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_no, uint16_t* node_no);
 
 };
 
diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp
index 2e5ea1b26b..222f144cf4 100644
--- a/src/gc/gc.cpp
+++ b/src/gc/gc.cpp
@@ -75,14 +75,6 @@ BOOL bgc_heap_walk_for_etw_p = FALSE;
 #define LOH_PIN_QUEUE_LENGTH 100
 #define LOH_PIN_DECAY 10
 
-#ifdef BIT64
-// Right now we support maximum 1024 procs - meaning that we will create at most
-// that many GC threads and GC heaps. 
-#define MAX_SUPPORTED_CPUS 1024
-#else
-#define MAX_SUPPORTED_CPUS 64
-#endif // BIT64
-
 uint32_t yp_spin_count_unit = 0;
 size_t loh_size_threshold = LARGE_OBJECT_SIZE;
 
@@ -5062,8 +5054,6 @@ class heap_select
     static uint16_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
     static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
     static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
-    static uint16_t heap_no_to_cpu_group[MAX_SUPPORTED_CPUS];
-    static uint16_t heap_no_to_group_proc[MAX_SUPPORTED_CPUS];
     static uint16_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
 
     static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers)
@@ -5196,26 +5186,6 @@ public:
         heap_no_to_numa_node[heap_number] = numa_node;
     }
 
-    static uint16_t find_cpu_group_from_heap_no(int heap_number)
-    {
-        return heap_no_to_cpu_group[heap_number];
-    }
-
-    static void set_cpu_group_for_heap(int heap_number, uint16_t group_number)
-    {
-        heap_no_to_cpu_group[heap_number] = group_number;
-    }
-
-    static uint16_t find_group_proc_from_heap_no(int heap_number)
-    {
-        return heap_no_to_group_proc[heap_number];
-    }
-
-    static void set_group_proc_for_heap(int heap_number, uint16_t group_proc)
-    {
-        heap_no_to_group_proc[heap_number] = group_proc;
-    }
-
     static void init_numa_node_to_heap_map(int nheaps)
     {   // called right after GCHeap::Init() for each heap is finished
         // when numa is not enabled, heap_no_to_numa_node[] are all filled
@@ -5245,8 +5215,6 @@ unsigned heap_select::cur_sniff_index;
 uint16_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
 uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
 uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::heap_no_to_cpu_group[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::heap_no_to_group_proc[MAX_SUPPORTED_CPUS];
 uint16_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
 
 BOOL gc_heap::create_thread_support (unsigned number_of_heaps)
@@ -5289,79 +5257,22 @@ void gc_heap::destroy_thread_support ()
     }
 }
 
-void set_thread_group_affinity_for_heap(int heap_number, GCThreadAffinity* affinity)
+void set_thread_affinity_for_heap(int heap_number)
 {
-    affinity->Group = GCThreadAffinity::None;
-    affinity->Processor = GCThreadAffinity::None;
+    uint16_t proc_no;
+    uint16_t node_no;
 
-    uint16_t gn, gpn;
-    GCToOSInterface::GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
-
-    int bit_number = 0;
-    for (uintptr_t mask = 1; mask !=0; mask <<=1) 
+    if (GCToOSInterface::GetProcessorForHeap(heap_number, &proc_no, &node_no))
     {
-        if (bit_number == gpn)
+        heap_select::set_proc_no_for_heap(heap_number, proc_no);
+        if (node_no != NUMA_NODE_UNDEFINED)
         {
-            dprintf(3, ("using processor group %d, mask %Ix for heap %d\n", gn, mask, heap_number));
-            affinity->Processor = gpn;
-            affinity->Group = gn;
-            heap_select::set_cpu_group_for_heap(heap_number, gn);
-            heap_select::set_group_proc_for_heap(heap_number, gpn);
-            if (GCToOSInterface::CanEnableGCNumaAware())
-            {  
-                PROCESSOR_NUMBER proc_no;
-                proc_no.Group    = gn;
-                proc_no.Number   = (uint8_t)gpn;
-                proc_no.Reserved = 0;
-
-                uint16_t node_no = 0;
-                if (GCToOSInterface::GetNumaProcessorNode(&proc_no, &node_no))
-                    heap_select::set_numa_node_for_heap(heap_number, node_no);
-            }
-            else
-            {   // no numa setting, each cpu group is treated as a node
-                heap_select::set_numa_node_for_heap(heap_number, gn);
-            }
-            return;
+            heap_select::set_numa_node_for_heap(heap_number, node_no);
         }
-        bit_number++;
-    }
-}
-
-void set_thread_affinity_mask_for_heap(int heap_number, GCThreadAffinity* affinity)
-{
-    affinity->Group = GCThreadAffinity::None;
-    affinity->Processor = GCThreadAffinity::None;
-
-    uintptr_t pmask = process_mask;
-    int bit_number = 0; 
-    uint8_t proc_number = 0;
-    for (uintptr_t mask = 1; mask != 0; mask <<= 1)
-    {
-        if ((mask & pmask) != 0)
+        if (!GCToOSInterface::SetThreadAffinity(proc_no))
         {
-            if (bit_number == heap_number)
-            {
-                dprintf (3, ("Using processor %d for heap %d", proc_number, heap_number));
-                affinity->Processor = proc_number;
-                heap_select::set_proc_no_for_heap(heap_number, proc_number);
-                if (GCToOSInterface::CanEnableGCNumaAware())
-                {
-                    uint16_t node_no = 0;
-                    PROCESSOR_NUMBER proc_no;
-                    proc_no.Group = 0;
-                    proc_no.Number = (uint8_t)proc_number;
-                    proc_no.Reserved = 0;
-                    if (GCToOSInterface::GetNumaProcessorNode(&proc_no, &node_no))
-                    {
-                        heap_select::set_numa_node_for_heap(heap_number, node_no);
-                    }
-                }
-                return;
-            }
-            bit_number++;
+            dprintf(1, ("Failed to set thread affinity for server GC thread"));
         }
-        proc_number++;
     }
 }
 
@@ -5501,7 +5412,7 @@ bool gc_heap::virtual_alloc_commit_for_heap (void* addr, size_t size, int h_numb
     {
         if (GCToOSInterface::CanEnableGCNumaAware())
         {
-            uint32_t numa_node = heap_select::find_numa_node_from_heap_no(h_number);
+            uint16_t numa_node = heap_select::find_numa_node_from_heap_no(h_number);
             if (GCToOSInterface::VirtualCommit(addr, size, numa_node))
                 return true;
         }
@@ -13626,43 +13537,18 @@ try_again:
                     acontext->set_alloc_heap(GCHeap::GetHeap(max_hp->heap_number));
                     if (!gc_thread_no_affinitize_p)
                     {
-                        if (GCToOSInterface::CanEnableGCCPUGroups())
-                        {   //only set ideal processor when max_hp and org_hp are in the same cpu
-                            //group. DO NOT MOVE THREADS ACROSS CPU GROUPS
-                            uint16_t org_gn = heap_select::find_cpu_group_from_heap_no(org_hp->heap_number);
-                            uint16_t max_gn = heap_select::find_cpu_group_from_heap_no(max_hp->heap_number);
-                            if (org_gn == max_gn) //only set within CPU group, so SetThreadIdealProcessor is enough
-                            {   
-                                uint16_t group_proc_no = heap_select::find_group_proc_from_heap_no(max_hp->heap_number);
-
-                                GCThreadAffinity affinity;
-                                affinity.Processor = group_proc_no;
-                                affinity.Group = org_gn;
-                                if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity))
-                                {
-                                    dprintf (3, ("Failed to set the ideal processor and group for heap %d.",
-                                                org_hp->heap_number));
-                                }
-                            }
-                        }
-                        else 
-                        {
-                            uint16_t proc_no = heap_select::find_proc_no_from_heap_no(max_hp->heap_number);
-
-                            GCThreadAffinity affinity;
-                            affinity.Processor = proc_no;
-                            affinity.Group = GCThreadAffinity::None;
+                        uint16_t src_proc_no = heap_select::find_proc_no_from_heap_no(org_hp->heap_number);
+                        uint16_t dst_proc_no = heap_select::find_proc_no_from_heap_no(max_hp->heap_number);
 
-                            if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity))
-                            {
-                                dprintf (3, ("Failed to set the ideal processor for heap %d.",
-                                            org_hp->heap_number));
-                            }
+                        if (!GCToOSInterface::MigrateThread(src_proc_no, dst_proc_no))
+                        {
+                            dprintf (3, ("Failed to set the ideal processor for heap %d.",
+                                        org_hp->heap_number));
                         }
                     }
                     dprintf (3, ("Switching context %p (home heap %d) ", 
                                  acontext,
-                        acontext->get_home_heap()->pGenGCHeap->heap_number));
+                                 acontext->get_home_heap()->pGenGCHeap->heap_number));
                     dprintf (3, (" from heap %d (%Id free bytes, %d contexts) ", 
                                  org_hp->heap_number,
                                  org_size,
@@ -25449,22 +25335,10 @@ void gc_heap::gc_thread_stub (void* arg)
     gc_heap* heap = (gc_heap*)arg;
     if (!gc_thread_no_affinitize_p)
     {
-        GCThreadAffinity affinity;
-        affinity.Group = GCThreadAffinity::None;
-        affinity.Processor = GCThreadAffinity::None;
-
         // We are about to set affinity for GC threads. It is a good place to set up NUMA and
         // CPU groups because the process mask, processor number, and group number are all
         // readily available.
-        if (GCToOSInterface::CanEnableGCCPUGroups())
-            set_thread_group_affinity_for_heap(heap->heap_number, &affinity);
-        else
-            set_thread_affinity_mask_for_heap(heap->heap_number, &affinity);
-
-        if (!GCToOSInterface::SetThreadAffinity(&affinity))
-        {
-            dprintf(1, ("Failed to set thread affinity for server GC thread"));
-        }
+        set_thread_affinity_for_heap(heap->heap_number);
     }
 
     // server GC threads run at a higher priority than normal.
@@ -34211,12 +34085,91 @@ HRESULT GCHeap::Initialize()
     uint32_t nhp_from_config = 0;
 
 #ifdef MULTIPLE_HEAPS
+    AffinitySet config_affinity_set;
+
+    // Get the affinity set configured by the user
+    uintptr_t heap_affinity_mask = GCConfig::GetGCHeapAffinitizeMask();
+    if (heap_affinity_mask != 0)
+    {
+        for (size_t i = 0; i < 8 * sizeof(uintptr_t); i++)
+        {
+            if (heap_affinity_mask & ((uintptr_t)1 << i))
+            {
+                config_affinity_set.Add(i);
+            }
+        }
+    }
+    else
+    {
+        GCConfigStringHolder cpu_index_ranges_holder(GCConfig::GetGCHeapAffinitizeRanges());
+        const char* cpu_index_ranges = cpu_index_ranges_holder.Get();
+
+        // The cpu index ranges is a comma separated list of indices or ranges of indices (e.g. 1-5).
+        // Example 1,3,5,7-9,12
+
+        if (cpu_index_ranges != NULL)
+        {
+            char* number_end;
+
+            do
+            {
+                size_t start_index = strtoul(cpu_index_ranges, &number_end, 10);
+
+                if (number_end == cpu_index_ranges)
+                {
+                    // No number found, invalid format
+                    break;
+                }
+
+                size_t end_index = start_index;
+
+                if (*number_end == '-')
+                {
+                    char* range_end_start = number_end + 1;
+                    end_index = strtoul(range_end_start, &number_end, 10);
+                    if (number_end == range_end_start)
+                    {
+                        // No number found, invalid format
+                        break;
+                    }
+                }
+
+                if ((start_index < MAX_SUPPORTED_CPUS) && end_index < (MAX_SUPPORTED_CPUS))
+                {
+                    for (size_t i = start_index; i <= end_index; i++)
+                    {
+                        config_affinity_set.Add(i);
+                    }
+                }
+
+                cpu_index_ranges = number_end + 1;
+            }
+            while (*number_end == ',');
+        }
+    }
+
+    AffinitySet* process_affinity_set = GCToOSInterface::GetCurrentProcessAffinitySet();
+
+    if (!config_affinity_set.IsEmpty())
+    {
+        // Update the process affinity set using the configured set
+        for (size_t i = 0; i < MAX_SUPPORTED_CPUS; i++)
+        {
+            if (process_affinity_set->Contains(i) && !config_affinity_set.Contains(i))
+            {
+                process_affinity_set->Remove(i);
+            }
+        }
+    }
+
+    if (process_affinity_set->IsEmpty())
+    {
+        return CLR_E_GC_BAD_AFFINITY_CONFIG;
+    }
+
     nhp_from_config = static_cast<uint32_t>(GCConfig::GetHeapCount());
     
-    // GetCurrentProcessCpuCount only returns up to 64 procs.
-    uint32_t nhp_from_process = GCToOSInterface::CanEnableGCCPUGroups() ?
-                                GCToOSInterface::GetTotalProcessorCount():
-                                GCToOSInterface::GetCurrentProcessCpuCount();
+    uint32_t nhp_from_process = GCToOSInterface::GetCurrentProcessCpuCount();
 
     if (nhp_from_config)
     {
@@ -34231,63 +34184,23 @@ HRESULT GCHeap::Initialize()
 #ifndef FEATURE_REDHAWK
     gc_heap::gc_thread_no_affinitize_p = (gc_heap::heap_hard_limit ? false : (GCConfig::GetNoAffinitize() != 0));
 
-    size_t gc_thread_affinity_mask = static_cast<size_t>(GCConfig::GetGCHeapAffinitizeMask());
-
     if (gc_heap::heap_hard_limit)
     {
-        gc_heap::gc_thread_no_affinitize_p = (gc_thread_affinity_mask == 0);
+        gc_heap::gc_thread_no_affinitize_p = (config_affinity_set.Count() == 0);
     }
 
     if (!(gc_heap::gc_thread_no_affinitize_p))
     {
-        if (!(GCToOSInterface::CanEnableGCCPUGroups()))
-        {
-            uintptr_t pmask, smask;
-            if (GCToOSInterface::GetCurrentProcessAffinityMask(&pmask, &smask))
-            {
-                pmask &= smask;
-
-#ifdef FEATURE_PAL
-                // GetCurrentProcessAffinityMask can return pmask=0 and smask=0 on
-                // systems with more than 1 NUMA node. The pmask decides the
-                // number of GC heaps to be used and the processors they are
-                // affinitized with. So pmask is now set to reflect that 64
-                // processors are available to begin with. The actual processors in
-                // the system may be lower and are taken into account before
-                // finalizing the number of heaps.
-                if (!pmask)
-                {
-                    pmask = SIZE_T_MAX;
-                }
-#endif // FEATURE_PAL
-
-                if (gc_thread_affinity_mask)
-                {
-                    pmask &= gc_thread_affinity_mask;
-                }
-
-                process_mask = pmask;
-
-                unsigned int set_bits_in_pmask = 0;
-                while (pmask)
-                {
-                    if (pmask & 1)
-                        set_bits_in_pmask++;
-                    pmask >>= 1;
-                }
-
-                nhp = min (nhp, set_bits_in_pmask);
+        uint32_t num_affinitized_processors = (uint32_t)process_affinity_set->Count();
 
+        if (num_affinitized_processors != 0)
+        {
+            nhp = min(nhp, num_affinitized_processors);
+        }
 #ifdef FEATURE_PAL
-                // Limit the GC heaps to the number of processors available in the system.
-                nhp = min (nhp, GCToOSInterface::GetTotalProcessorCount());
+        // Limit the GC heaps to the number of processors available in the system.
+        nhp = min (nhp, GCToOSInterface::GetTotalProcessorCount());
 #endif // FEATURE_PAL
-            }
-            else
-            {
-                gc_heap::gc_thread_no_affinitize_p = true;
-            }
-        }
     }
 #endif //!FEATURE_REDHAWK
 #endif //MULTIPLE_HEAPS
diff --git a/src/gc/gcconfig.h b/src/gc/gcconfig.h
index 2a4afb8c29..c3c0d5d66d 100644
--- a/src/gc/gcconfig.h
+++ b/src/gc/gcconfig.h
@@ -95,6 +95,8 @@ public:
       "Specifies the ratio compacting GCs vs sweeping")                                        \
   INT_CONFIG(GCHeapAffinitizeMask, "GCHeapAffinitizeMask", 0,                                  \
       "Specifies processor mask for Server GC threads")                                        \
+  STRING_CONFIG(GCHeapAffinitizeRanges, "GCHeapAffinitizeRanges",                              \
+      "Specifies list of processors for Server GC threads")                                    \
   INT_CONFIG(GCHighMemPercent, "GCHighMemPercent", 0,                                          \
       "The percent for GC to consider as high memory")                                         \
   INT_CONFIG(GCProvModeStress, "GCProvModeStress", 0,                                          \
diff --git a/src/gc/unix/config.h.in b/src/gc/unix/config.h.in
index a4a59b663e..f43709a393 100644
--- a/src/gc/unix/config.h.in
+++ b/src/gc/unix/config.h.in
@@ -15,5 +15,6 @@
 #cmakedefine01 HAVE_PTHREAD_CONDATTR_SETCLOCK
 #cmakedefine01 HAVE_MACH_ABSOLUTE_TIME
 #cmakedefine01 HAVE_SCHED_GETAFFINITY
+#cmakedefine01 HAVE_PTHREAD_GETAFFINITY_NP
 
 #endif // __CONFIG_H__
diff --git a/src/gc/unix/configure.cmake b/src/gc/unix/configure.cmake
index c2d6afe483..7eb9053bca 100644
--- a/src/gc/unix/configure.cmake
+++ b/src/gc/unix/configure.cmake
@@ -54,6 +54,16 @@ check_cxx_source_runs("
     }
     " HAVE_MACH_ABSOLUTE_TIME)
 
+
 check_library_exists(c sched_getaffinity "" HAVE_SCHED_GETAFFINITY)
+check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD)
+
+if (HAVE_LIBPTHREAD)
+  set(PTHREAD_LIBRARY pthread)
+elseif (HAVE_PTHREAD_IN_LIBC)
+  set(PTHREAD_LIBRARY c)
+endif()
+
+check_library_exists(${PTHREAD_LIBRARY} pthread_getaffinity_np "" HAVE_PTHREAD_GETAFFINITY_NP)
 
 configure_file(${CMAKE_CURRENT_LIST_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
diff --git a/src/gc/unix/gcenv.unix.cpp b/src/gc/unix/gcenv.unix.cpp
index 572fc3b12f..4ab9e09260 100644
--- a/src/gc/unix/gcenv.unix.cpp
+++ b/src/gc/unix/gcenv.unix.cpp
@@ -47,9 +47,12 @@
 #define SYSCONF_GET_NUMPROCS _SC_NPROCESSORS_ONLN
 #endif
 
-// The cachced number of logical CPUs observed.
+// The cached number of logical CPUs observed.
 static uint32_t g_logicalCpuCount = 0;
 
+// The cached number of CPUs available for the current process.
+static uint32_t g_currentProcessCpuCount = 0;
+
 // Helper memory page used by the FlushProcessWriteBuffers
 static uint8_t* g_helperPage = 0;
 
@@ -64,6 +67,8 @@ static size_t g_RestrictedPhysicalMemoryLimit = 0;
 
 uint32_t g_pageSizeUnixInl = 0;
 
+AffinitySet g_processAffinitySet;
+
 // Initialize the interface implementation
 // Return:
 //  true if it has succeeded, false if it has failed
@@ -121,6 +126,42 @@ bool GCToOSInterface::Initialize()
 
     InitializeCGroup();
 
+#if HAVE_SCHED_GETAFFINITY
+
+    g_currentProcessCpuCount = 0;
+
+    cpu_set_t cpuSet;
+    int st = sched_getaffinity(0, sizeof(cpu_set_t), &cpuSet);
+
+    if (st == 0)
+    {
+        for (size_t i = 0; i < g_logicalCpuCount; i++)
+        {
+            if (CPU_ISSET(i, &cpuSet))
+            {
+                g_currentProcessCpuCount++;
+                g_processAffinitySet.Add(i);
+            }
+        }
+    }
+    else
+    {
+        // We should not get any of the errors that the sched_getaffinity can return since none
+        // of them applies for the current thread, so this is an unexpected kind of failure.
+        assert(false);
+    }
+
+#else // HAVE_SCHED_GETAFFINITY
+
+    g_currentProcessCpuCount = g_logicalCpuCount;
+
+    for (size_t i = 0; i < g_logicalCpuCount; i++)
+    {
+        g_processAffinitySet.Add(i);
+    }
+
+#endif // HAVE_SCHED_GETAFFINITY
+
     return true;
 }
 
@@ -163,15 +204,15 @@ uint32_t GCToOSInterface::GetCurrentProcessId()
     return getpid();
 }
 
-// Set ideal affinity for the current thread
+// Set ideal processor for the current thread
 // Parameters:
-//  affinity - ideal processor affinity for the thread
+//  srcProcNo - processor number the thread currently runs on
+//  dstProcNo - processor number the thread should be migrated to
 // Return:
 //  true if it has succeeded, false if it has failed
-bool GCToOSInterface::SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity)
+bool GCToOSInterface::MigrateThread(uint16_t srcProcNo, uint16_t dstProcNo)
 {
-    // TODO(segilles)
-    return false;
+    return GCToOSInterface::SetThreadAffinity(dstProcNo);
 }
 
 // Get the number of the current processor
@@ -324,7 +365,7 @@ bool GCToOSInterface::VirtualRelease(void* address, size_t size)
 //  size    - size of the virtual memory range
 // Return:
 //  true if it has succeeded, false if it has failed
-bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint32_t node)
+bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint16_t node)
 {
     assert(node == NUMA_NODE_UNDEFINED && "Numa allocation is not ported to local GC on unix yet");
     return mprotect(address, size, PROT_WRITE | PROT_READ) == 0;
@@ -416,16 +457,25 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
 }
 
 // Sets the calling thread's affinity to only run on the processor specified
-// in the GCThreadAffinity structure.
 // Parameters:
-//  affinity - The requested affinity for the calling thread. At most one processor
-//             can be provided.
+//  procNo - The requested processor for the calling thread.
 // Return:
 //  true if setting the affinity was successful, false otherwise.
-bool GCToOSInterface::SetThreadAffinity(GCThreadAffinity* affinity)
+bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
 {
-    // [LOCALGC TODO] Thread affinity for unix
+#if HAVE_PTHREAD_GETAFFINITY_NP
+    cpu_set_t cpuSet;
+    CPU_ZERO(&cpuSet);
+    CPU_SET((int)procNo, &cpuSet);
+
+    int st = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet);
+
+    return (st == 0);
+
+#else  // HAVE_PTHREAD_GETAFFINITY_NP
+    // There is no API to manage thread affinity, so let's ignore the request
     return false;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
 }
 
 // Boosts the calling thread's thread priority to a level higher than the default
@@ -440,87 +490,12 @@ bool GCToOSInterface::BoostThreadPriority()
     return false;
 }
 
-/*++
-Function:
-  GetFullAffinityMask
-
-Get affinity mask for the specified number of processors with all
-the processors enabled.
---*/
-static uintptr_t GetFullAffinityMask(int cpuCount)
-{
-    if ((size_t)cpuCount < sizeof(uintptr_t) * 8)
-    {
-        return ((uintptr_t)1 << cpuCount) - 1;
-    }
-
-    return ~(uintptr_t)0;
-}
-
-// Get affinity mask of the current process
-// Parameters:
-//  processMask - affinity mask for the specified process
-//  systemMask  - affinity mask for the system
+// Get set of processors enabled for GC for the current process
 // Return:
-//  true if it has succeeded, false if it has failed
-// Remarks:
-//  A process affinity mask is a bit vector in which each bit represents the processors that
-//  a process is allowed to run on. A system affinity mask is a bit vector in which each bit
-//  represents the processors that are configured into a system.
-//  A process affinity mask is a subset of the system affinity mask. A process is only allowed
-//  to run on the processors configured into a system. Therefore, the process affinity mask cannot
-//  specify a 1 bit for a processor when the system affinity mask specifies a 0 bit for that processor.
-bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processAffinityMask, uintptr_t* systemAffinityMask)
+//  set of enabled processors
+AffinitySet* GCToOSInterface::GetCurrentProcessAffinitySet()
 {
-    unsigned int cpuCountInMask = (g_logicalCpuCount > 64) ? 64 : g_logicalCpuCount;
-
-    uintptr_t systemMask = GetFullAffinityMask(cpuCountInMask);
-
-#if HAVE_SCHED_GETAFFINITY
-
-    int pid = getpid();
-    cpu_set_t cpuSet;
-    int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet);
-    if (st == 0)
-    {
-        uintptr_t processMask = 0;
-
-        for (unsigned int i = 0; i < cpuCountInMask; i++)
-        {
-            if (CPU_ISSET(i, &cpuSet))
-            {
-                processMask |= ((uintptr_t)1) << i;
-            }
-        }
-
-        *processAffinityMask = processMask;
-        *systemAffinityMask = systemMask;
-        return true;
-    }
-    else if (errno == EINVAL)
-    {
-        // There are more processors than can fit in a cpu_set_t
-        // return all bits set for all processors (upto 64) for both masks
-        *processAffinityMask = systemMask;
-        *systemAffinityMask = systemMask;
-        return true;
-    }
-    else
-    {
-        // We should not get any of the errors that the sched_getaffinity can return since none
-        // of them applies for the current thread, so this is an unexpected kind of failure.
-        return false;
-    }
-
-#else // HAVE_SCHED_GETAFFINITY
-
-    // There is no API to manage thread affinity, so let's return both affinity masks
-    // with all the CPUs on the system set.
-    *systemAffinityMask = systemMask;
-    *processAffinityMask = systemMask;
-    return true;
-
-#endif // HAVE_SCHED_GETAFFINITY
+    return &g_processAffinitySet;
 }
 
 // Get number of processors assigned to the current process
@@ -528,35 +503,7 @@ bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processAffinityMa
 //  The number of processors
 uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
 {
-    uintptr_t pmask, smask;
-    uint32_t cpuLimit;
-
-    if (!GetCurrentProcessAffinityMask(&pmask, &smask))
-        return 1;
-
-    pmask &= smask;
-
-    unsigned int count = 0;
-    while (pmask)
-    {
-        pmask &= (pmask - 1);
-        count++;
-    }
-
-    // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more
-    // than 64 processors, which would leave us with a count of 0.  Since the GC
-    // expects there to be at least one processor to run on (and thus at least one
-    // heap), we'll return 64 here if count is 0, since there are likely a ton of
-    // processors available in that case.  The GC also cannot (currently) handle
-    // the case where there are more than 64 processors, so we will return a
-    // maximum of 64 here.
-    if (count == 0 || count > 64)
-        count = 64;
-
-    if (GetCpuLimit(&cpuLimit) && cpuLimit < count)
-        count = cpuLimit;
-
-    return count;
+    return g_currentProcessCpuCount;
 }
 
 // Return the size of the user-mode portion of the virtual address space of this process.
@@ -715,20 +662,52 @@ bool GCToOSInterface::CanEnableGCNumaAware()
     return false;
 }
 
-bool GCToOSInterface::GetNumaProcessorNode(PPROCESSOR_NUMBER proc_no, uint16_t *node_no)
+bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
 {
     assert(!"Numa has not been ported to local GC for unix");
     return false;
 }
 
-bool GCToOSInterface::CanEnableGCCPUGroups()
+// Get processor number and optionally its NUMA node number for the specified heap number
+// Parameters:
+//  heap_number - heap number to get the result for
+//  proc_no     - set to the selected processor number
+//  node_no     - set to the NUMA node of the selected processor or to NUMA_NODE_UNDEFINED
+// Return:
+//  true if it succeeded
+bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_no, uint16_t* node_no)
 {
-    return false;
-}
+    bool success = false;
 
-void GCToOSInterface::GetGroupForProcessor(uint16_t processor_number, uint16_t* group_number, uint16_t* group_processor_number)
-{
-    assert(!"CpuGroup has not been ported to local GC for unix");
+    uint16_t availableProcNumber = 0;
+    for (size_t procNumber = 0; procNumber < g_logicalCpuCount; procNumber++)
+    {
+        if (g_processAffinitySet.Contains(procNumber))
+        {
+            if (availableProcNumber == heap_number)
+            {
+                *proc_no = procNumber;
+
+                if (GCToOSInterface::CanEnableGCNumaAware())
+                {
+                    if (!GCToOSInterface::GetNumaProcessorNode(procNumber, node_no))
+                    {
+                        *node_no = NUMA_NODE_UNDEFINED;
+                    }
+                }
+                else
+                {
+                    *node_no = NUMA_NODE_UNDEFINED;
+                }
+
+                success = true;
+                break;
+            }
+            availableProcNumber++;
+        }
+    }
+
+    return success;
 }
 
 // Initialize the critical section
diff --git a/src/gc/windows/gcenv.windows.cpp b/src/gc/windows/gcenv.windows.cpp
index f7d069c874..100b2183d2 100644
--- a/src/gc/windows/gcenv.windows.cpp
+++ b/src/gc/windows/gcenv.windows.cpp
@@ -27,6 +27,8 @@ static size_t g_RestrictedPhysicalMemoryLimit = (size_t)UINTPTR_MAX;
 // memory on the machine/in the container, we need to restrict by the VM.
 static bool g_UseRestrictedVirtualMemory = false;
 
+static AffinitySet g_processAffinitySet;
+
 typedef BOOL (WINAPI *PIS_PROCESS_IN_JOB)(HANDLE processHandle, HANDLE jobHandle, BOOL* result);
 typedef BOOL (WINAPI *PQUERY_INFORMATION_JOB_OBJECT)(HANDLE jobHandle, JOBOBJECTINFOCLASS jobObjectInfoClass, void* lpJobObjectInfo, DWORD cbJobObjectInfoLength, LPDWORD lpReturnLength);
 
@@ -34,6 +36,29 @@ namespace {
 
 static bool g_fEnableGCNumaAware;
 
+class GroupProcNo
+{
+    uint16_t m_groupProc;
+
+public:
+
+    static const uint16_t NoGroup = 0x3ff;
+
+    GroupProcNo(uint16_t groupProc) : m_groupProc(groupProc)
+    {
+    }
+
+    GroupProcNo(uint16_t group, uint16_t procIndex) : m_groupProc((group << 6) | procIndex)
+    {
+        assert(group <= 0x3ff);
+        assert(procIndex <= 0x3f);
+    }
+
+    uint16_t GetGroup() { return m_groupProc >> 6; }
+    uint16_t GetProcIndex() { return m_groupProc & 0x3f; }
+    uint16_t GetCombinedValue() { return m_groupProc; }
+};
+
 struct CPU_Group_Info 
 {
     WORD    nr_active;  // at most 64
@@ -443,6 +468,37 @@ Exit:
     return cache_size;
 }
 
+bool CanEnableGCCPUGroups()
+{
+    return g_fEnableGCCPUGroups;
+}
+
+// Get the CPU group for the specified processor
+void GetGroupForProcessor(uint16_t processor_number, uint16_t* group_number, uint16_t* group_processor_number)
+{
+    assert(g_fEnableGCCPUGroups);
+
+#if !defined(FEATURE_REDHAWK) && (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
+    WORD bTemp = 0;
+    WORD bDiff = processor_number - bTemp;
+
+    for (WORD i=0; i < g_nGroups; i++)
+    {
+        bTemp += g_CPUGroupInfoArray[i].nr_active;
+        if (bTemp > processor_number)
+        {
+            *group_number = i;
+            *group_processor_number = bDiff;
+            break;
+        }
+        bDiff = processor_number - bTemp;
+    }
+#else
+    *group_number = 0;
+    *group_processor_number = 0;
+#endif
+}
+
 } // anonymous namespace
 
 // Initialize the interface implementation
@@ -462,6 +518,20 @@ bool GCToOSInterface::Initialize()
     InitNumaNodeInfo();
     InitCPUGroupInfo();
 
+    uintptr_t pmask, smask;
+    if (!!::GetProcessAffinityMask(::GetCurrentProcess(), (PDWORD_PTR)&pmask, (PDWORD_PTR)&smask))
+    {
+        pmask &= smask;
+
+        for (size_t i = 0; i < 8 * sizeof(uintptr_t); i++)
+        {
+            if ((pmask & ((uintptr_t)1 << i)) != 0)
+            {
+                g_processAffinitySet.Add(i);
+            }
+        }
+    }
+
     return true;
 }
 
@@ -486,24 +556,40 @@ uint32_t GCToOSInterface::GetCurrentProcessId()
     return ::GetCurrentThreadId();
 }
 
-// Set ideal affinity for the current thread
+// Set ideal processor for the current thread
 // Parameters:
-//  affinity - ideal processor affinity for the thread
+//  srcProcNo - processor number the thread currently runs on
+//  dstProcNo - processor number the thread should be migrated to
 // Return:
 //  true if it has succeeded, false if it has failed
-bool GCToOSInterface::SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity)
+bool GCToOSInterface::MigrateThread(uint16_t srcProcNo, uint16_t dstProcNo)
 {
+    LIMITED_METHOD_CONTRACT;
+
     bool success = true;
 
+    GroupProcNo srcGroupProcNo(srcProcNo);
+    GroupProcNo dstGroupProcNo(dstProcNo);
+
+    if (CanEnableGCCPUGroups())
+    {
+        if (srcGroupProcNo.GetGroup() != dstGroupProcNo.GetGroup())
+        {
+            //only set ideal processor when srcProcNo and dstProcNo are in the same cpu
+            //group. DO NOT MOVE THREADS ACROSS CPU GROUPS
+            return true;
+        }
+    }
+
 #if !defined(FEATURE_CORESYSTEM)
-    SetThreadIdealProcessor(GetCurrentThread(), (DWORD)affinity->Processor);
+    SetThreadIdealProcessor(GetCurrentThread(), (DWORD)dstGroupProcNo.GetProcIndex());
 #else
     PROCESSOR_NUMBER proc;
 
-    if (affinity->Group != -1)
+    if (dstGroupProcNo.GetGroup() != GroupProcNo::NoGroup)
     {
-        proc.Group = (WORD)affinity->Group;
-        proc.Number = (BYTE)affinity->Processor;
+        proc.Group = (WORD)dstGroupProcNo.GetGroup();
+        proc.Number = (BYTE)dstGroupProcNo.GetProcIndex();
         proc.Reserved = 0;
 
         success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
@@ -512,8 +598,8 @@ bool GCToOSInterface::SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity)
     {
         if (GetThreadIdealProcessorEx(GetCurrentThread(), &proc))
         {
-            proc.Number = affinity->Processor;
-            success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
+            proc.Number = (BYTE)dstGroupProcNo.GetProcIndex();
+            success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, &proc);
         }
     }
 #endif
@@ -605,7 +691,7 @@ bool GCToOSInterface::VirtualRelease(void* address, size_t size)
 //  size    - size of the virtual memory range
 // Return:
 //  true if it has succeeded, false if it has failed
-bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint32_t node)
+bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint16_t node)
 {
     if (node == NUMA_NODE_UNDEFINED)
     {
@@ -832,34 +918,28 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
 }
 
 // Sets the calling thread's affinity to only run on the processor specified
-// in the GCThreadAffinity structure.
 // Parameters:
-//  affinity - The requested affinity for the calling thread. At most one processor
-//             can be provided.
+//  procNo - The requested processor for the calling thread.
 // Return:
 //  true if setting the affinity was successful, false otherwise.
-bool GCToOSInterface::SetThreadAffinity(GCThreadAffinity* affinity)
+bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
 {
-    assert(affinity != nullptr);
-    if (affinity->Group != GCThreadAffinity::None)
-    {
-        assert(affinity->Processor != GCThreadAffinity::None);
+    GroupProcNo groupProcNo(procNo);
 
+    if (groupProcNo.GetGroup() != GroupProcNo::NoGroup)
+    {
         GROUP_AFFINITY ga;
-        ga.Group = (WORD)affinity->Group;
+        ga.Group = (WORD)groupProcNo.GetGroup();
         ga.Reserved[0] = 0; // reserve must be filled with zero
         ga.Reserved[1] = 0; // otherwise call may fail
         ga.Reserved[2] = 0;
-        ga.Mask = (size_t)1 << affinity->Processor;
+        ga.Mask = (size_t)1 << groupProcNo.GetProcIndex();
         return !!SetThreadGroupAffinity(GetCurrentThread(), &ga, nullptr);
     }
-    else if (affinity->Processor != GCThreadAffinity::None)
+    else
     {
-        return !!SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)1 << affinity->Processor);
+        return !!SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)1 << groupProcNo.GetProcIndex());
     }
-
-    // Given affinity must specify at least one processor to use.
-    return false;
 }
 
 // Boosts the calling thread's thread priority to a level higher than the default
@@ -873,22 +953,12 @@ bool GCToOSInterface::BoostThreadPriority()
     return !!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
 }
 
-// Get affinity mask of the current process
-// Parameters:
-//  processMask - affinity mask for the specified process
-//  systemMask  - affinity mask for the system
+// Get set of processors enabled for GC for the current process
 // Return:
-//  true if it has succeeded, false if it has failed
-// Remarks:
-//  A process affinity mask is a bit vector in which each bit represents the processors that
-//  a process is allowed to run on. A system affinity mask is a bit vector in which each bit
-//  represents the processors that are configured into a system.
-//  A process affinity mask is a subset of the system affinity mask. A process is only allowed
-//  to run on the processors configured into a system. Therefore, the process affinity mask cannot
-//  specify a 1 bit for a processor when the system affinity mask specifies a 0 bit for that processor.
-bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processMask, uintptr_t* systemMask)
-{
-    return !!::GetProcessAffinityMask(::GetCurrentProcess(), (PDWORD_PTR)processMask, (PDWORD_PTR)systemMask);
+//  set of enabled processors
+AffinitySet* GCToOSInterface::GetCurrentProcessAffinitySet()
+{
+    return &g_processAffinitySet;
 }
 
 // Get number of processors assigned to the current process
@@ -901,32 +971,41 @@ uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
     if (cCPUs != 0)
         return cCPUs;
 
-    int count = 0;
-    DWORD_PTR pmask, smask;
+    int count;
 
-    if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask))
+    if (CanEnableGCCPUGroups())
     {
-        count = 1;
+        count = GCToOSInterface::GetTotalProcessorCount();
     }
     else
     {
-        pmask &= smask;
+        DWORD_PTR pmask, smask;
 
-        while (pmask)
+        if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask))
         {
-            pmask &= (pmask - 1);
-            count++;
+            count = 1;
         }
+        else
+        {
+            count = 0;
+            pmask &= smask;
 
-        // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more
-        // than 64 processors, which would leave us with a count of 0.  Since the GC
-        // expects there to be at least one processor to run on (and thus at least one
-        // heap), we'll return 64 here if count is 0, since there are likely a ton of
-        // processors available in that case.  The GC also cannot (currently) handle
-        // the case where there are more than 64 processors, so we will return a
-        // maximum of 64 here.
-        if (count == 0 || count > 64)
-            count = 64;
+            while (pmask)
+            {
+                pmask &= (pmask - 1);
+                count++;
+            }
+
+            // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more
+            // than 64 processors, which would leave us with a count of 0.  Since the GC
+            // expects there to be at least one processor to run on (and thus at least one
+            // heap), we'll return 64 here if count is 0, since there are likely a ton of
+            // processors available in that case.  The GC also cannot (currently) handle
+            // the case where there are more than 64 processors, so we will return a
+            // maximum of 64 here.
+            if (count == 0 || count > 64)
+                count = 64;
+        }
     }
 
     cCPUs = count;
@@ -1097,40 +1176,83 @@ bool GCToOSInterface::CanEnableGCNumaAware()
     return g_fEnableGCNumaAware;
 }
 
-bool GCToOSInterface::GetNumaProcessorNode(PPROCESSOR_NUMBER proc_no, uint16_t *node_no)
+bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
 {
+    GroupProcNo groupProcNo(proc_no);
+
+    PROCESSOR_NUMBER procNumber;
+    procNumber.Group    = groupProcNo.GetGroup();
+    procNumber.Number   = (BYTE)groupProcNo.GetProcIndex();
+    procNumber.Reserved = 0;
+
     assert(g_fEnableGCNumaAware);
-    return ::GetNumaProcessorNodeEx(proc_no, node_no) != FALSE;
+    return ::GetNumaProcessorNodeEx(&procNumber, node_no) != FALSE;
 }
 
-bool GCToOSInterface::CanEnableGCCPUGroups()
+// Get processor number and optionally its NUMA node number for the specified heap number
+// Parameters:
+//  heap_number - heap number to get the result for
+//  proc_no     - set to the selected processor number
+//  node_no     - set to the NUMA node of the selected processor or to NUMA_NODE_UNDEFINED
+// Return:
+//  true if it succeeded
+bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_no, uint16_t* node_no)
 {
-    return g_fEnableGCCPUGroups;
-}
+    bool success = false;
 
-void GCToOSInterface::GetGroupForProcessor(uint16_t processor_number, uint16_t* group_number, uint16_t* group_processor_number)
-{
-    assert(g_fEnableGCCPUGroups);
+    if (CanEnableGCCPUGroups())
+    {
+        uint16_t gn, gpn;
+        GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
 
-#if !defined(FEATURE_REDHAWK) && (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
-    WORD bTemp = 0;
-    WORD bDiff = processor_number - bTemp;
+//      dprintf(3, ("using processor group %d, mask %Ix for heap %d\n", gn, (uintptr_t)1 << gpn), heap_number));
+        *proc_no = GroupProcNo(gn, gpn).GetCombinedValue();
 
-    for (WORD i=0; i < g_nGroups; i++)
+        if (GCToOSInterface::CanEnableGCNumaAware())
+        {
+            if (!GCToOSInterface::GetNumaProcessorNode(*proc_no, node_no))
+            {
+                *node_no = NUMA_NODE_UNDEFINED;
+            }
+        }
+        else
+        {   // no numa setting, each cpu group is treated as a node
+            *node_no = gn;
+        }
+
+        success = true;
+    }
+    else
     {
-        bTemp += g_CPUGroupInfoArray[i].nr_active;
-        if (bTemp > processor_number)
+        int bit_number = 0;
+        uint8_t proc_number = 0;
+        for (uintptr_t mask = 1; mask != 0; mask <<= 1)
         {
-            *group_number = i;
-            *group_processor_number = bDiff;
-            break;
+            if (g_processAffinitySet.Contains(proc_number))
+            {
+                if (bit_number == heap_number)
+                {
+                    //dprintf (3, ("Using processor %d for heap %d", proc_number, heap_number));
+                    *proc_no = GroupProcNo(GroupProcNo::NoGroup, proc_number).GetCombinedValue();
+
+                    if (GCToOSInterface::CanEnableGCNumaAware())
+                    {
+                        if (!GCToOSInterface::GetNumaProcessorNode(proc_number, node_no))
+                        {
+                            *node_no = NUMA_NODE_UNDEFINED;
+                        }
+                    }
+
+                    success = true;
+                    break;
+                }
+                bit_number++;
+            }
+            proc_number++;
         }
-        bDiff = processor_number - bTemp;
     }
-#else
-    *group_number = 0;
-    *group_processor_number = 0;
-#endif
+
+    return success;
 }
 
 // Parameters of the GC thread stub
author	Jan Vorlicek <janvorli@microsoft.com>	2019-03-25 20:52:07 +0100
committer	Jan Vorlicek <janvorli@microsoft.com>	2019-04-03 21:12:02 +0200
commit	67f1116cea421ebbe516fafc963e213cc6f0e117 (patch)
tree	671983515e49105c60ba376a79bbd28020ce766c /src/gc
parent	17add60f708fa55a5447a67851783ab98e01f0cf (diff)
download	coreclr-67f1116cea421ebbe516fafc963e213cc6f0e117.tar.gz coreclr-67f1116cea421ebbe516fafc963e213cc6f0e117.tar.bz2 coreclr-67f1116cea421ebbe516fafc963e213cc6f0e117.zip