diff options
author | Ludovic Henry <luhenry@microsoft.com> | 2019-05-13 13:15:05 -0700 |
---|---|---|
committer | Vladimir Sadov <vsadov@microsoft.com> | 2019-05-13 13:15:04 -0700 |
commit | 934d73fea95b4c479b67fee0ff53caea4a325ee5 (patch) | |
tree | 35d0d77780d31625887625ef74f614327a9c3f00 | |
parent | 24cdacaeb1e42a92051b32a6c4051b7a12ff2770 (diff) | |
download | coreclr-934d73fea95b4c479b67fee0ff53caea4a325ee5.tar.gz coreclr-934d73fea95b4c479b67fee0ff53caea4a325ee5.tar.bz2 coreclr-934d73fea95b4c479b67fee0ff53caea4a325ee5.zip |
Implement GC.GetTotalAllocatedBytes (#23852)
* keep what's allocated so far on each heap
* Implement GC.GetTotalAllocatedBytes
It is based on https://github.com/dotnet/corefx/issues/34631 and https://github.com/dotnet/corefx/issues/30644
* Fixing races related to dead_threads_non_alloc_bytes
* separated per-heap SOH and LOH counters. Different locks imply that we need different counters.
* allow/ignore torn 64bit reads on 32bit in imprecise mode.
* PR feedback
* simplified the test a little to avoid OOM on ARM
-rw-r--r-- | src/System.Private.CoreLib/src/System/GC.cs | 13 | ||||
-rw-r--r-- | src/gc/gc.cpp | 36 | ||||
-rw-r--r-- | src/gc/gcimpl.h | 2 | ||||
-rw-r--r-- | src/gc/gcinterface.h | 2 | ||||
-rw-r--r-- | src/gc/gcpriv.h | 8 | ||||
-rw-r--r-- | src/vm/comutilnative.cpp | 41 | ||||
-rw-r--r-- | src/vm/comutilnative.h | 3 | ||||
-rw-r--r-- | src/vm/ecalllist.h | 3 | ||||
-rw-r--r-- | src/vm/threads.cpp | 6 | ||||
-rw-r--r-- | src/vm/threads.h | 3 | ||||
-rw-r--r-- | tests/src/GC/API/GC/GetTotalAllocatedBytes.cs | 167 | ||||
-rw-r--r-- | tests/src/GC/API/GC/GetTotalAllocatedBytes.csproj | 39 |
12 files changed, 312 insertions, 11 deletions
diff --git a/src/System.Private.CoreLib/src/System/GC.cs b/src/System.Private.CoreLib/src/System/GC.cs index c12048c20e..501178f826 100644 --- a/src/System.Private.CoreLib/src/System/GC.cs +++ b/src/System.Private.CoreLib/src/System/GC.cs @@ -345,12 +345,15 @@ namespace System private static extern IntPtr _UnregisterFrozenSegment(IntPtr segmentHandle); [MethodImplAttribute(MethodImplOptions.InternalCall)] - private static extern long _GetAllocatedBytesForCurrentThread(); + public static extern long GetAllocatedBytesForCurrentThread(); - public static long GetAllocatedBytesForCurrentThread() - { - return _GetAllocatedBytesForCurrentThread(); - } + + /// <summary> + /// Get a count of the bytes allocated over the lifetime of the process. + /// <param name="precise">If true, gather a precise number, otherwise gather a fairly count. Gathering a precise value triggers at a significant performance penalty.</param> + /// </summary> + [MethodImplAttribute(MethodImplOptions.InternalCall)] + public static extern long GetTotalAllocatedBytes(bool precise = false); [MethodImplAttribute(MethodImplOptions.InternalCall)] private static extern bool _RegisterForFullGCNotification(int maxGenerationPercentage, int largeObjectHeapPercentage); diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp index f240e73650..e8f2400c1b 100644 --- a/src/gc/gc.cpp +++ b/src/gc/gc.cpp @@ -2634,6 +2634,10 @@ gen_to_condemn_tuning gc_heap::gen_to_condemn_reasons; size_t gc_heap::etw_allocation_running_amount[2]; +uint64_t gc_heap::total_alloc_bytes_soh = 0; + +uint64_t gc_heap::total_alloc_bytes_loh = 0; + int gc_heap::gc_policy = 0; size_t gc_heap::allocation_running_time; @@ -5950,6 +5954,8 @@ void gc_heap::fix_allocation_context (alloc_context* acontext, BOOL for_gc_p, { // We need to update the alloc_bytes to reflect the portion that we have not used acontext->alloc_bytes -= (acontext->alloc_limit - acontext->alloc_ptr); + total_alloc_bytes_soh -= (acontext->alloc_limit - acontext->alloc_ptr); + acontext->alloc_ptr = 0; acontext->alloc_limit = acontext->alloc_ptr; } @@ -10666,6 +10672,8 @@ gc_heap::init_gc_heap (int h_number) etw_allocation_running_amount[0] = 0; etw_allocation_running_amount[1] = 0; + total_alloc_bytes_soh = 0; + total_alloc_bytes_loh = 0; //needs to be done after the dynamic data has been initialized #ifndef MULTIPLE_HEAPS @@ -11464,6 +11472,7 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size, size_t size, { bool loh_p = (gen_number > 0); GCSpinLock* msl = loh_p ? &more_space_lock_loh : &more_space_lock_soh; + uint64_t& total_alloc_bytes = loh_p ? total_alloc_bytes_loh : total_alloc_bytes_soh; size_t aligned_min_obj_size = Align(min_obj_size, align_const); @@ -11496,6 +11505,7 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size, size_t size, // when we are finishing an allocation from a free list // we know that the free area was Align(min_obj_size) larger acontext->alloc_bytes -= ac_size; + total_alloc_bytes -= ac_size; size_t free_obj_size = ac_size + aligned_min_obj_size; make_unused_array (hole, free_obj_size); generation_free_obj_space (generation_of (gen_number)) += free_obj_size; @@ -11514,7 +11524,9 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size, size_t size, } } acontext->alloc_limit = (start + limit_size - aligned_min_obj_size); - acontext->alloc_bytes += limit_size - ((gen_number < max_generation + 1) ? aligned_min_obj_size : 0); + size_t added_bytes = limit_size - ((gen_number < max_generation + 1) ? aligned_min_obj_size : 0); + acontext->alloc_bytes += added_bytes; + total_alloc_bytes += added_bytes; #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING if (g_fEnableAppDomainMonitoring) @@ -11523,6 +11535,7 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size, size_t size, } #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING + uint8_t* saved_used = 0; if (seg) @@ -12124,7 +12137,9 @@ void gc_heap::bgc_loh_alloc_clr (uint8_t* alloc_start, } } #endif //VERIFY_HEAP - + + total_alloc_bytes_loh += size - Align (min_obj_size, align_const); + dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear large obj", heap_number)); add_saved_spinlock_info (true, me_release, mt_clr_large_mem); leave_spin_lock (&more_space_lock_loh); @@ -36039,6 +36054,23 @@ size_t GCHeap::GetTotalBytesInUse () #endif //MULTIPLE_HEAPS } +// Get the total allocated bytes +uint64_t GCHeap::GetTotalAllocatedBytes() +{ +#ifdef MULTIPLE_HEAPS + uint64_t total_alloc_bytes = 0; + for (int i = 0; i < gc_heap::n_heaps; i++) + { + gc_heap* hp = gc_heap::g_heaps[i]; + total_alloc_bytes += hp->total_alloc_bytes_soh; + total_alloc_bytes += hp->total_alloc_bytes_loh; + } + return total_alloc_bytes; +#else + return (pGenGCHeap->total_alloc_bytes_soh + pGenGCHeap->total_alloc_bytes_loh); +#endif //MULTIPLE_HEAPS +} + int GCHeap::CollectionCount (int generation, int get_bgc_fgc_count) { if (get_bgc_fgc_count != 0) diff --git a/src/gc/gcimpl.h b/src/gc/gcimpl.h index bdf3a5cc36..711ecb5fdc 100644 --- a/src/gc/gcimpl.h +++ b/src/gc/gcimpl.h @@ -76,6 +76,8 @@ public: // Gets the amount of bytes objects currently occupy on the GC heap. size_t GetCurrentObjSize(); + uint64_t GetTotalAllocatedBytes(); + size_t GetLastGCStartTime(int generation); size_t GetLastGCDuration(int generation); size_t GetNow(); diff --git a/src/gc/gcinterface.h b/src/gc/gcinterface.h index c710d6d321..92e7987cbf 100644 --- a/src/gc/gcinterface.h +++ b/src/gc/gcinterface.h @@ -676,6 +676,8 @@ public: // Gets the total number of bytes in use. virtual size_t GetTotalBytesInUse() = 0; + virtual uint64_t GetTotalAllocatedBytes() = 0; + // Forces a garbage collection of the given generation. Also used extensively // throughout the VM. virtual HRESULT GarbageCollect(int generation = -1, bool low_memory_p = false, int mode = collection_blocking) = 0; diff --git a/src/gc/gcpriv.h b/src/gc/gcpriv.h index 2288ffee25..a4dd54f066 100644 --- a/src/gc/gcpriv.h +++ b/src/gc/gcpriv.h @@ -3006,12 +3006,18 @@ public: size_t etw_allocation_running_amount[2]; PER_HEAP + uint64_t total_alloc_bytes_soh; + + PER_HEAP + uint64_t total_alloc_bytes_loh; + + PER_HEAP int gc_policy; //sweep, compact, expand #ifdef MULTIPLE_HEAPS PER_HEAP_ISOLATED bool gc_thread_no_affinitize_p; - + PER_HEAP_ISOLATED GCEvent gc_start_event; diff --git a/src/vm/comutilnative.cpp b/src/vm/comutilnative.cpp index 46cdbad829..d745c9717f 100644 --- a/src/vm/comutilnative.cpp +++ b/src/vm/comutilnative.cpp @@ -34,6 +34,7 @@ #include "typestring.h" #include "sha1.h" #include "finalizerthread.h" +#include "threadsuspend.h" #ifdef FEATURE_COMINTEROP #include "comcallablewrapper.h" @@ -1288,6 +1289,46 @@ FCIMPL3(Object*, GCInterface::AllocateNewArray, void* arrayTypeHandle, INT32 len } FCIMPLEND + +FCIMPL1(INT64, GCInterface::GetTotalAllocatedBytes, CLR_BOOL precise) +{ + FCALL_CONTRACT; + + if (!precise) + { + // NOTE: we do not want to make imprecise flavor too slow. + // As it could be noticed we read 64bit values that may be concurrently updated. + // Such reads are not guaranteed to be atomic on 32bit and inrare cases we may see torn values resultng in outlier results. + // That would be extremely rare and in a context of imprecise helper is not worth additional synchronization. + uint64_t unused_bytes = Thread::dead_threads_non_alloc_bytes; + return GCHeapUtilities::GetGCHeap()->GetTotalAllocatedBytes() - unused_bytes; + } + + INT64 allocated; + + HELPER_METHOD_FRAME_BEGIN_RET_0(); + + // We need to suspend/restart the EE to get each thread's + // non-allocated memory from their allocation contexts + + ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_OTHER); + + allocated = GCHeapUtilities::GetGCHeap()->GetTotalAllocatedBytes() - Thread::dead_threads_non_alloc_bytes; + + for (Thread *pThread = ThreadStore::GetThreadList(NULL); pThread; pThread = ThreadStore::GetThreadList(pThread)) + { + gc_alloc_context* ac = pThread->GetAllocContext(); + allocated -= ac->alloc_limit - ac->alloc_ptr; + } + + ThreadSuspend::RestartEE(FALSE, TRUE); + + HELPER_METHOD_FRAME_END(); + + return allocated; +} +FCIMPLEND; + #ifdef FEATURE_BASICFREEZE /*===============================RegisterFrozenSegment=============================== diff --git a/src/vm/comutilnative.h b/src/vm/comutilnative.h index 24cb85e323..46ea7ff06f 100644 --- a/src/vm/comutilnative.h +++ b/src/vm/comutilnative.h @@ -138,7 +138,8 @@ public: static FCDECL1(void, ReRegisterForFinalize, Object *obj); static FCDECL2(int, CollectionCount, INT32 generation, INT32 getSpecialGCCount); - static FCDECL0(INT64, GetAllocatedBytesForCurrentThread); + static FCDECL0(INT64, GetAllocatedBytesForCurrentThread); + static FCDECL1(INT64, GetTotalAllocatedBytes, CLR_BOOL precise); static FCDECL3(Object*, AllocateNewArray, void* elementTypeHandle, INT32 length, CLR_BOOL zeroingOptional); diff --git a/src/vm/ecalllist.h b/src/vm/ecalllist.h index 87bb8bd612..89c05a5c7e 100644 --- a/src/vm/ecalllist.h +++ b/src/vm/ecalllist.h @@ -779,7 +779,8 @@ FCFuncStart(gGCInterfaceFuncs) FCFuncElement("_SuppressFinalize", GCInterface::SuppressFinalize) FCFuncElement("_ReRegisterForFinalize", GCInterface::ReRegisterForFinalize) - FCFuncElement("_GetAllocatedBytesForCurrentThread", GCInterface::GetAllocatedBytesForCurrentThread) + FCFuncElement("GetAllocatedBytesForCurrentThread", GCInterface::GetAllocatedBytesForCurrentThread) + FCFuncElement("GetTotalAllocatedBytes", GCInterface::GetTotalAllocatedBytes) FCFuncElement("AllocateNewArray", GCInterface::AllocateNewArray) diff --git a/src/vm/threads.cpp b/src/vm/threads.cpp index daadda8944..715b2223b9 100644 --- a/src/vm/threads.cpp +++ b/src/vm/threads.cpp @@ -54,7 +54,7 @@ #include "eventpipebuffermanager.h" #endif // FEATURE_PERFTRACING - +uint64_t Thread::dead_threads_non_alloc_bytes = 0; SPTR_IMPL(ThreadStore, ThreadStore, s_pThreadStore); CONTEXT *ThreadStore::s_pOSContext = NULL; @@ -2903,6 +2903,9 @@ void Thread::OnThreadTerminate(BOOL holdingLock) if (ThisThreadID == CurrentThreadID) { GCX_COOP(); + // GetTotalAllocatedBytes reads dead_threads_non_alloc_bytes, but will suspend EE, being in COOP mode we cannot race with that + // however, there could be other threads terminating and doing the same Add. + FastInterlockExchangeAddLong((LONG64*)&dead_threads_non_alloc_bytes, m_alloc_context.alloc_limit - m_alloc_context.alloc_ptr); GCHeapUtilities::GetGCHeap()->FixAllocContext(&m_alloc_context, NULL, NULL); m_alloc_context.init(); } @@ -2960,6 +2963,7 @@ void Thread::OnThreadTerminate(BOOL holdingLock) { // We must be holding the ThreadStore lock in order to clean up alloc context. // We should never call FixAllocContext during GC. + dead_threads_non_alloc_bytes += m_alloc_context.alloc_limit - m_alloc_context.alloc_ptr; GCHeapUtilities::GetGCHeap()->FixAllocContext(&m_alloc_context, NULL, NULL); m_alloc_context.init(); } diff --git a/src/vm/threads.h b/src/vm/threads.h index 66d0672d56..68a66c3284 100644 --- a/src/vm/threads.h +++ b/src/vm/threads.h @@ -5010,6 +5010,9 @@ public: private: OBJECTHANDLE m_DeserializationTracker; + +public: + static uint64_t dead_threads_non_alloc_bytes; }; // End of class Thread diff --git a/tests/src/GC/API/GC/GetTotalAllocatedBytes.cs b/tests/src/GC/API/GC/GetTotalAllocatedBytes.cs new file mode 100644 index 0000000000..e35bb41ca0 --- /dev/null +++ b/tests/src/GC/API/GC/GetTotalAllocatedBytes.cs @@ -0,0 +1,167 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// Tests GC.Collect() + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Reflection; +using System.Threading; +using System.Threading.Tasks; + +public class Test +{ + static Random Rand = new Random(); + static volatile object s_stash; // static volatile variable to keep the jit from eliding allocations or anything. + + delegate long GetTotalAllocatedBytesDelegate(bool precise); + static GetTotalAllocatedBytesDelegate GetTotalAllocatedBytes = Get_GetTotalAllocatedBytesDelegate(); + + private static GetTotalAllocatedBytesDelegate Get_GetTotalAllocatedBytesDelegate() + { + const string name = "GetTotalAllocatedBytes"; + var typeInfo = typeof(GC).GetTypeInfo(); + var method = typeInfo.GetMethod(name, BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic); + GetTotalAllocatedBytesDelegate del = (GetTotalAllocatedBytesDelegate)method.CreateDelegate(typeof(GetTotalAllocatedBytesDelegate)); + // Prime the delegate to ensure its been called some. + del(true); + del(false); + + return del; + } + + private static long CallGetTotalAllocatedBytes(long previous, out long differenceBetweenPreciseAndImprecise) + { + long precise = GetTotalAllocatedBytes(true); + long imprecise = GetTotalAllocatedBytes(false); + + if (precise <= 0) + { + throw new Exception($"Bytes allocated is not positive, this is unlikely. precise = {precise}"); + } + + if (imprecise < precise) + { + throw new Exception($"Imprecise total bytes allocated less than precise, imprecise is required to be a conservative estimate (that estimates high). imprecise = {imprecise}, precise = {precise}"); + } + + if (previous > precise) + { + throw new Exception($"Expected more memory to be allocated. previous = {previous}, precise = {precise}, difference = {previous - precise}"); + } + + differenceBetweenPreciseAndImprecise = imprecise - precise; + return precise; + } + + private static long CallGetTotalAllocatedBytes(long previous) + { + long differenceBetweenPreciseAndImprecise; + previous = CallGetTotalAllocatedBytes(previous, out differenceBetweenPreciseAndImprecise); + s_stash = new byte[differenceBetweenPreciseAndImprecise]; + previous = CallGetTotalAllocatedBytes(previous, out differenceBetweenPreciseAndImprecise); + return previous; + } + + public static void TestSingleThreaded() + { + long previous = 0; + for (int i = 0; i < 1000; ++i) + { + s_stash = new byte[1234]; + previous = CallGetTotalAllocatedBytes(previous); + } + } + + public static void TestSingleThreadedLOH() + { + long previous = 0; + for (int i = 0; i < 1000; ++i) + { + s_stash = new byte[123456]; + previous = CallGetTotalAllocatedBytes(previous); + } + } + + public static void TestAnotherThread() + { + bool running = true; + Task tsk = null; + + try + { + object lck = new object(); + + tsk = Task.Run(() => { + while (running) + { + Thread thd = new Thread(() => { + lock (lck) + { + s_stash = new byte[1234]; + } + }); + + thd.Start(); + thd.Join(); + } + }); + + long previous = 0; + for (int i = 0; i < 1000; ++i) + { + lock (lck) + { + previous = CallGetTotalAllocatedBytes(previous); + } + + Thread.Sleep(1); + } + } + finally + { + running = false; + tsk?.Wait(1000); + } + } + + public static void TestLohSohConcurrently() + { + List<Thread> threads = new List<Thread>(); + ManualResetEventSlim me = new ManualResetEventSlim(); + int threadNum = Environment.ProcessorCount + Environment.ProcessorCount / 2; + for (int i = 0; i < threadNum; i++) + { + Thread thr = new Thread(() => + { + me.Wait(); + long previous = 0; + for (int i = 0; i < 2; ++i) + { + s_stash = new byte[123456]; + previous = CallGetTotalAllocatedBytes(previous); + s_stash = new byte[1234]; + previous = CallGetTotalAllocatedBytes(previous); + } + }); + + thr.Start(); + threads.Add(thr); + } + + me.Set(); + + foreach (var thr in threads) + thr.Join(); + } + + public static int Main() + { + TestSingleThreaded(); + TestSingleThreadedLOH(); + TestAnotherThread(); + TestLohSohConcurrently(); + return 100; + } +} diff --git a/tests/src/GC/API/GC/GetTotalAllocatedBytes.csproj b/tests/src/GC/API/GC/GetTotalAllocatedBytes.csproj new file mode 100644 index 0000000000..dd349ba306 --- /dev/null +++ b/tests/src/GC/API/GC/GetTotalAllocatedBytes.csproj @@ -0,0 +1,39 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" /> + <PropertyGroup> + <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> + <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> + <AssemblyName>$(MSBuildProjectName)</AssemblyName> + <SchemaVersion>2.0</SchemaVersion> + <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid> + <OutputType>Exe</OutputType> + <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids> + <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir> + <HeapVerifyIncompatible Condition="'$(Platform)' == 'arm'">true</HeapVerifyIncompatible> + + </PropertyGroup> + <!-- Default configurations to help VS understand the configurations --> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "></PropertyGroup> + <ItemGroup> + <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies"> + <Visible>False</Visible> + </CodeAnalysisDependentAssemblyPaths> + </ItemGroup> + <PropertyGroup> + <!-- Set to 'Full' if the Debug? column is marked in the spreadsheet. Leave blank otherwise. --> + <DebugType>PdbOnly</DebugType> + <NoLogo>True</NoLogo> + <DefineConstants>$(DefineConstants);DESKTOP</DefineConstants> + <GCStressIncompatible>true</GCStressIncompatible> + </PropertyGroup> + <ItemGroup> + <Compile Include="GetTotalAllocatedBytes.cs" /> + </ItemGroup> + <ItemGroup> + <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" /> + </ItemGroup> + <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" /> + <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup> +</Project> |