diff options
-rw-r--r-- | CMakeLists.txt | 3 | ||||
-rw-r--r-- | src/gc/env/gcenv.base.h | 6 | ||||
-rw-r--r-- | src/gc/gc.cpp | 346 | ||||
-rw-r--r-- | src/gc/gcpriv.h | 8 | ||||
-rw-r--r-- | src/gc/sample/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/gc/sample/gcenv.ee.cpp | 12 | ||||
-rw-r--r-- | src/gc/sample/gcenv.h | 6 | ||||
-rw-r--r-- | src/gc/softwarewritewatch.cpp | 243 | ||||
-rw-r--r-- | src/gc/softwarewritewatch.h | 339 | ||||
-rw-r--r-- | src/inc/stdmacros.h | 11 | ||||
-rw-r--r-- | src/vm/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/vm/amd64/JitHelpers_Fast.asm | 84 | ||||
-rw-r--r-- | src/vm/amd64/JitHelpers_FastWriteBarriers.asm | 228 | ||||
-rw-r--r-- | src/vm/amd64/JitHelpers_Slow.asm | 18 | ||||
-rw-r--r-- | src/vm/amd64/jithelpers_fast.S | 160 | ||||
-rw-r--r-- | src/vm/amd64/jithelpers_fastwritebarriers.S | 247 | ||||
-rw-r--r-- | src/vm/amd64/jithelpers_slow.S | 15 | ||||
-rw-r--r-- | src/vm/amd64/jitinterfaceamd64.cpp | 455 | ||||
-rw-r--r-- | src/vm/arm/stubs.cpp | 11 | ||||
-rw-r--r-- | src/vm/arm64/stubs.cpp | 4 | ||||
-rw-r--r-- | src/vm/gcenv.h | 2 | ||||
-rw-r--r-- | src/vm/gchelpers.cpp | 31 | ||||
-rw-r--r-- | src/vm/gchelpers.h | 4 | ||||
-rw-r--r-- | src/vm/i386/jitinterfacex86.cpp | 21 | ||||
-rw-r--r-- | src/vm/jitinterface.h | 43 | ||||
-rwxr-xr-x | tests/runtest.sh | 20 |
26 files changed, 1814 insertions, 505 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c65bfa625..02694a3cfe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -859,6 +859,9 @@ if(CLR_CMAKE_PLATFORM_UNIX_TARGET_AMD64) add_definitions(-DFEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) endif (CLR_CMAKE_PLATFORM_UNIX_TARGET_AMD64) add_definitions(-DFEATURE_USE_ASM_GC_WRITE_BARRIERS) +if(CLR_CMAKE_PLATFORM_ARCH_AMD64 AND NOT WIN32) + add_definitions(-DFEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) +endif(CLR_CMAKE_PLATFORM_ARCH_AMD64 AND NOT WIN32) add_definitions(-DFEATURE_VERSIONING) if(WIN32) add_definitions(-DFEATURE_VERSIONING_LOG) diff --git a/src/gc/env/gcenv.base.h b/src/gc/env/gcenv.base.h index f3317fc79e..96d6917696 100644 --- a/src/gc/env/gcenv.base.h +++ b/src/gc/env/gcenv.base.h @@ -244,6 +244,8 @@ typedef uintptr_t TADDR; extern type var #define GVAL_IMPL(type, var) \ type var +#define GVAL_IMPL_INIT(type, var, init) \ + type var = init #define GPTR_DECL(type, var) \ extern type* var @@ -543,8 +545,8 @@ void LogSpewAlways(const char *fmt, ...); // ----------------------------------------------------------------------------------------------------------- -void StompWriteBarrierEphemeral(); -void StompWriteBarrierResize(bool bReqUpperBoundsCheck); +void StompWriteBarrierEphemeral(bool isRuntimeSuspended); +void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck); class CLRConfig { diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp index d039947518..a98fa953f2 100644 --- a/src/gc/gc.cpp +++ b/src/gc/gc.cpp @@ -78,6 +78,10 @@ BOOL bgc_heap_walk_for_etw_p = FALSE; int compact_ratio = 0; #endif //GC_CONFIG_DRIVEN +#if defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) && defined(NO_WRITE_BARRIER) +#error Software write watch requires write barriers. +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP && NO_WRITE_BARRIER + // See comments in reset_memory. BOOL reset_mm_p = TRUE; @@ -597,7 +601,8 @@ enum gc_join_stage gc_join_after_commit_soh_no_gc = 35, gc_join_expand_loh_no_gc = 36, gc_join_final_no_gc = 37, - gc_join_max = 38 + gc_join_disable_software_write_watch = 38, + gc_join_max = 39 }; enum gc_join_flavor @@ -1435,19 +1440,21 @@ void reset_memory (uint8_t* o, size_t sizeo); #ifdef WRITE_WATCH -static bool virtual_alloc_write_watch = false; +#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +static bool virtual_alloc_hardware_write_watch = false; +#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -static bool write_watch_capability = false; +static bool hardware_write_watch_capability = false; #ifndef DACCESS_COMPILE //check if the write watch APIs are supported. -void write_watch_api_supported() +void hardware_write_watch_api_supported() { if (GCToOSInterface::SupportsWriteWatch()) { - write_watch_capability = true; + hardware_write_watch_capability = true; dprintf (2, ("WriteWatch supported")); } else @@ -1458,9 +1465,23 @@ void write_watch_api_supported() #endif //!DACCESS_COMPILE -inline bool can_use_write_watch() +inline bool can_use_hardware_write_watch() +{ + return hardware_write_watch_capability; +} + +inline bool can_use_write_watch_for_gc_heap() +{ +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + return true; +#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + return can_use_hardware_write_watch(); +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +} + +inline bool can_use_write_watch_for_card_table() { - return write_watch_capability; + return can_use_hardware_write_watch(); } #else @@ -4255,7 +4276,13 @@ void* virtual_alloc (size_t size) } } - uint32_t flags = virtual_alloc_write_watch ? VirtualReserveFlags::WriteWatch : VirtualReserveFlags::None; + uint32_t flags = VirtualReserveFlags::None; +#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (virtual_alloc_hardware_write_watch) + { + flags = VirtualReserveFlags::WriteWatch; + } +#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP void* prgmem = GCToOSInterface::VirtualReserve (0, requested_size, card_size * card_word_width, flags); void *aligned_mem = prgmem; @@ -6490,7 +6517,7 @@ uint32_t* translate_card_bundle_table (uint32_t* cb) void gc_heap::enable_card_bundles () { - if (can_use_write_watch() && (!card_bundles_enabled())) + if (can_use_write_watch_for_card_table() && (!card_bundles_enabled())) { dprintf (3, ("Enabling card bundles")); //set all of the card bundles @@ -6826,13 +6853,21 @@ void release_card_table (uint32_t* c_table) destroy_card_table (c_table); // sever the link from the parent if (&g_card_table[card_word (gcard_of(g_lowest_address))] == c_table) + { g_card_table = 0; - uint32_t* p_table = &g_card_table[card_word (gcard_of(g_lowest_address))]; - if (p_table) +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + SoftwareWriteWatch::StaticClose(); +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + } + else { - while (p_table && (card_table_next (p_table) != c_table)) - p_table = card_table_next (p_table); - card_table_next (p_table) = 0; + uint32_t* p_table = &g_card_table[card_word (gcard_of(g_lowest_address))]; + if (p_table) + { + while (p_table && (card_table_next (p_table) != c_table)) + p_table = card_table_next (p_table); + card_table_next (p_table) = 0; + } } } } @@ -6866,13 +6901,24 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end) size_t cb = 0; #ifdef CARD_BUNDLE - if (can_use_write_watch()) + if (can_use_write_watch_for_card_table()) { virtual_reserve_flags |= VirtualReserveFlags::WriteWatch; cb = size_card_bundle_of (g_lowest_address, g_highest_address); } #endif //CARD_BUNDLE + size_t wws = 0; +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + size_t sw_ww_table_offset = 0; + if (gc_can_use_concurrent) + { + size_t sw_ww_size_before_table = sizeof(card_table_info) + cs + bs + cb; + sw_ww_table_offset = SoftwareWriteWatch::GetTableStartByteOffset(sw_ww_size_before_table); + wws = sw_ww_table_offset - sw_ww_size_before_table + SoftwareWriteWatch::GetTableByteSize(start, end); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + #ifdef GROWABLE_SEG_MAPPING_TABLE size_t st = size_seg_mapping_table_of (g_lowest_address, g_highest_address); #else //GROWABLE_SEG_MAPPING_TABLE @@ -6881,29 +6927,29 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end) // it is impossible for alloc_size to overflow due bounds on each of // its components. - size_t alloc_size = sizeof (uint8_t)*(bs + cs + cb + ms + st + sizeof (card_table_info)); + size_t alloc_size = sizeof (uint8_t)*(sizeof(card_table_info) + cs + bs + cb + wws + st + ms); size_t alloc_size_aligned = Align (alloc_size, g_SystemInfo.dwAllocationGranularity-1); - uint32_t* ct = (uint32_t*)GCToOSInterface::VirtualReserve (0, alloc_size_aligned, 0, virtual_reserve_flags); + uint8_t* mem = (uint8_t*)GCToOSInterface::VirtualReserve (0, alloc_size_aligned, 0, virtual_reserve_flags); - if (!ct) + if (!mem) return 0; - dprintf (2, ("init - table alloc for %Id bytes: [%Ix, %Ix[", - alloc_size, (size_t)ct, (size_t)((uint8_t*)ct+alloc_size))); + dprintf (2, ("Init - Card table alloc for %Id bytes: [%Ix, %Ix[", + alloc_size, (size_t)mem, (size_t)(mem+alloc_size))); // mark array will be committed separately (per segment). size_t commit_size = alloc_size - ms; - if (!GCToOSInterface::VirtualCommit ((uint8_t*)ct, commit_size)) + if (!GCToOSInterface::VirtualCommit (mem, commit_size)) { - dprintf (2, ("Table commit failed")); - GCToOSInterface::VirtualRelease ((uint8_t*)ct, alloc_size_aligned); + dprintf (2, ("Card table commit failed")); + GCToOSInterface::VirtualRelease (mem, alloc_size_aligned); return 0; } // initialize the ref count - ct = (uint32_t*)((uint8_t*)ct+sizeof (card_table_info)); + uint32_t* ct = (uint32_t*)(mem+sizeof (card_table_info)); card_table_refcount (ct) = 0; card_table_lowest_address (ct) = start; card_table_highest_address (ct) = end; @@ -6915,15 +6961,22 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end) card_table_card_bundle_table (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs); #endif //CARD_BUNDLE +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (gc_can_use_concurrent) + { + SoftwareWriteWatch::InitializeUntranslatedTable(mem + sw_ww_table_offset, start); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + #ifdef GROWABLE_SEG_MAPPING_TABLE - seg_mapping_table = (seg_mapping*)((uint8_t*)card_table_brick_table (ct) + bs + cb); + seg_mapping_table = (seg_mapping*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws); seg_mapping_table = (seg_mapping*)((uint8_t*)seg_mapping_table - size_seg_mapping_table_of (0, (align_lower_segment (g_lowest_address)))); #endif //GROWABLE_SEG_MAPPING_TABLE #ifdef MARK_ARRAY if (gc_can_use_concurrent) - card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + st); + card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws + st); else card_table_mark_array (ct) = NULL; #endif //MARK_ARRAY @@ -7033,13 +7086,27 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, size_t cb = 0; #ifdef CARD_BUNDLE - if (can_use_write_watch()) + if (can_use_write_watch_for_card_table()) { virtual_reserve_flags = VirtualReserveFlags::WriteWatch; cb = size_card_bundle_of (saved_g_lowest_address, saved_g_highest_address); } #endif //CARD_BUNDLE + size_t wws = 0; +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + size_t sw_ww_table_offset = 0; + if (gc_can_use_concurrent) + { + size_t sw_ww_size_before_table = sizeof(card_table_info) + cs + bs + cb; + sw_ww_table_offset = SoftwareWriteWatch::GetTableStartByteOffset(sw_ww_size_before_table); + wws = + sw_ww_table_offset - + sw_ww_size_before_table + + SoftwareWriteWatch::GetTableByteSize(saved_g_lowest_address, saved_g_highest_address); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + #ifdef GROWABLE_SEG_MAPPING_TABLE size_t st = size_seg_mapping_table_of (saved_g_lowest_address, saved_g_highest_address); #else //GROWABLE_SEG_MAPPING_TABLE @@ -7048,10 +7115,10 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, // it is impossible for alloc_size to overflow due bounds on each of // its components. - size_t alloc_size = sizeof (uint8_t)*(bs + cs + cb + ms +st + sizeof (card_table_info)); + size_t alloc_size = sizeof (uint8_t)*(sizeof(card_table_info) + cs + bs + cb + wws + st + ms); size_t alloc_size_aligned = Align (alloc_size, g_SystemInfo.dwAllocationGranularity-1); - dprintf (GC_TABLE_LOG, ("brick table: %Id; card table: %Id; mark array: %Id, card bundle: %Id, seg table: %Id", - bs, cs, ms, cb, st)); + dprintf (GC_TABLE_LOG, ("card table: %Id; brick table: %Id; card bundle: %Id; sw ww table: %Id; seg table: %Id; mark array: %Id", + cs, bs, cb, wws, st, ms)); uint8_t* mem = (uint8_t*)GCToOSInterface::VirtualReserve (0, alloc_size_aligned, 0, virtual_reserve_flags); @@ -7104,7 +7171,7 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, #ifdef GROWABLE_SEG_MAPPING_TABLE { - seg_mapping* new_seg_mapping_table = (seg_mapping*)((uint8_t*)card_table_brick_table (ct) + bs + cb); + seg_mapping* new_seg_mapping_table = (seg_mapping*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws); new_seg_mapping_table = (seg_mapping*)((uint8_t*)new_seg_mapping_table - size_seg_mapping_table_of (0, (align_lower_segment (saved_g_lowest_address)))); memcpy(&new_seg_mapping_table[seg_mapping_word_of(g_lowest_address)], @@ -7117,7 +7184,7 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, #ifdef MARK_ARRAY if(gc_can_use_concurrent) - card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + st); + card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws + st); else card_table_mark_array (ct) = NULL; #endif //MARK_ARRAY @@ -7155,11 +7222,51 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, } #endif //BACKGROUND_GC - // This passes a bool telling whether we need to switch to the post - // grow version of the write barrier. This test tells us if the new - // segment was allocated at a lower address than the old, requiring - // that we start doing an upper bounds check in the write barrier. - StompWriteBarrierResize(la != saved_g_lowest_address); + { + bool write_barrier_updated = false; +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (gc_can_use_concurrent) + { + // The current design of software write watch requires that the runtime is suspended during resize. Suspending + // on resize is preferred because it is a far less frequent operation than GetWriteWatch() / ResetWriteWatch(). + // Suspending here allows copying dirty state from the old table into the new table, and not have to merge old + // table info lazily as done for card tables. + + BOOL is_runtime_suspended = IsSuspendEEThread(); + if (!is_runtime_suspended) + { + suspend_EE(); + } + + SoftwareWriteWatch::SetResizedUntranslatedTable( + mem + sw_ww_table_offset, + saved_g_lowest_address, + saved_g_highest_address); + + // Since the runtime is already suspended, update the write barrier here as well. + // This passes a bool telling whether we need to switch to the post + // grow version of the write barrier. This test tells us if the new + // segment was allocated at a lower address than the old, requiring + // that we start doing an upper bounds check in the write barrier. + StompWriteBarrierResize(true, la != saved_g_lowest_address); + write_barrier_updated = true; + + if (!is_runtime_suspended) + { + restart_EE(); + } + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + + if (!write_barrier_updated) + { + // This passes a bool telling whether we need to switch to the post + // grow version of the write barrier. This test tells us if the new + // segment was allocated at a lower address than the old, requiring + // that we start doing an upper bounds check in the write barrier. + StompWriteBarrierResize(!!IsSuspendEEThread(), la != saved_g_lowest_address); + } + } // We need to make sure that other threads executing checked write barriers // will see the g_card_table update before g_lowest/highest_address updates. @@ -7367,8 +7474,6 @@ void gc_heap::copy_brick_card_table() #else //GROWABLE_SEG_MAPPING_TABLE size_t st = 0; #endif //GROWABLE_SEG_MAPPING_TABLE - assert (!gc_can_use_concurrent || - (((uint8_t*)card_table_card_bundle_table (ct) + size_card_bundle_of (g_lowest_address, g_highest_address) + st) == (uint8_t*)card_table_mark_array (ct))); #endif //MARK_ARRAY && _DEBUG card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct)); assert (&card_bundle_table [card_bundle_word (cardw_card_bundle (card_word (card_of (g_lowest_address))))] == @@ -9279,6 +9384,27 @@ void gc_heap::update_card_table_bundle() } #endif //CARD_BUNDLE +// static +void gc_heap::reset_write_watch_for_gc_heap(void* base_address, size_t region_size) +{ +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + SoftwareWriteWatch::ClearDirty(base_address, region_size); +#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + GCToOSInterface::ResetWriteWatch(base_address, region_size); +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +} + +// static +void gc_heap::get_write_watch_for_gc_heap(bool reset, void *base_address, size_t region_size, void** dirty_pages, uintptr_t* dirty_page_count_ref, bool is_runtime_suspended) +{ +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + SoftwareWriteWatch::GetDirty(base_address, region_size, dirty_pages, dirty_page_count_ref, reset, is_runtime_suspended); +#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + bool success = GCToOSInterface::GetWriteWatch(reset, base_address, region_size, dirty_pages, dirty_page_count_ref); + assert(success); +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +} + const size_t ww_reset_quantum = 128*1024*1024; inline @@ -9302,7 +9428,7 @@ void gc_heap::reset_ww_by_chunk (uint8_t* start_address, size_t total_reset_size next_reset_size = ((remaining_reset_size >= ww_reset_quantum) ? ww_reset_quantum : remaining_reset_size); if (next_reset_size) { - GCToOSInterface::ResetWriteWatch (start_address, next_reset_size); + reset_write_watch_for_gc_heap(start_address, next_reset_size); reset_size += next_reset_size; switch_one_quantum(); @@ -9333,6 +9459,11 @@ void gc_heap::switch_on_reset (BOOL concurrent_p, size_t* current_total_reset_si void gc_heap::reset_write_watch (BOOL concurrent_p) { +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // Software write watch currently requires the runtime to be suspended during reset. See SoftwareWriteWatch::ClearDirty(). + assert(!concurrent_p); +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (max_generation))); PREFIX_ASSUME(seg != NULL); @@ -9371,7 +9502,7 @@ void gc_heap::reset_write_watch (BOOL concurrent_p) #endif //TIME_WRITE_WATCH dprintf (3, ("h%d: soh ww: [%Ix(%Id)", heap_number, (size_t)base_address, region_size)); //reset_ww_by_chunk (base_address, region_size); - GCToOSInterface::ResetWriteWatch (base_address, region_size); + reset_write_watch_for_gc_heap(base_address, region_size); #ifdef TIME_WRITE_WATCH unsigned int time_stop = GetCycleCount32(); @@ -9414,7 +9545,7 @@ void gc_heap::reset_write_watch (BOOL concurrent_p) #endif //TIME_WRITE_WATCH dprintf (3, ("h%d: loh ww: [%Ix(%Id)", heap_number, (size_t)base_address, region_size)); //reset_ww_by_chunk (base_address, region_size); - GCToOSInterface::ResetWriteWatch (base_address, region_size); + reset_write_watch_for_gc_heap(base_address, region_size); #ifdef TIME_WRITE_WATCH unsigned int time_stop = GetCycleCount32(); @@ -9514,7 +9645,7 @@ void gc_heap::adjust_ephemeral_limits () (size_t)ephemeral_low, (size_t)ephemeral_high)) // This updates the write barrier helpers with the new info. - StompWriteBarrierEphemeral(); + StompWriteBarrierEphemeral(!!IsSuspendEEThread()); } #if defined(TRACE_GC) || defined(GC_CONFIG_DRIVEN) @@ -9636,12 +9767,14 @@ HRESULT gc_heap::initialize_gc (size_t segment_size, HRESULT hres = S_OK; #ifdef WRITE_WATCH - write_watch_api_supported(); + hardware_write_watch_api_supported(); #ifdef BACKGROUND_GC - if (can_use_write_watch () && g_pConfig->GetGCconcurrent()!=0) + if (can_use_write_watch_for_gc_heap() && g_pConfig->GetGCconcurrent() != 0) { gc_can_use_concurrent = true; - virtual_alloc_write_watch = true; +#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + virtual_alloc_hardware_write_watch = true; +#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP } else { @@ -9673,10 +9806,11 @@ HRESULT gc_heap::initialize_gc (size_t segment_size, uint64_t th = (uint64_t)SH_TH_CARD_BUNDLE; #endif //MULTIPLE_HEAPS - if ((can_use_write_watch() && reserved_memory >= th)) + if (can_use_write_watch_for_card_table() && reserved_memory >= th) { settings.card_bundles = TRUE; - } else + } + else { settings.card_bundles = FALSE; } @@ -18428,10 +18562,11 @@ void gc_heap::fix_card_table () continue; } else - { + { break; } } + uint8_t* base_address = align_lower_page (heap_segment_mem (seg)); uint8_t* high_address = align_on_page ( (seg != ephemeral_heap_segment) ? @@ -18451,15 +18586,14 @@ void gc_heap::fix_card_table () #ifdef TIME_WRITE_WATCH unsigned int time_start = GetCycleCount32(); #endif //TIME_WRITE_WATCH - bool success = GCToOSInterface::GetWriteWatch(reset_watch_state, base_address, region_size, - (void**)g_addresses, - &bcount); - assert (success); + get_write_watch_for_gc_heap(reset_watch_state, base_address, region_size, + (void**)g_addresses, + &bcount, true); #ifdef TIME_WRITE_WATCH unsigned int time_stop = GetCycleCount32(); tot_cycles += time_stop - time_start; - printf ("GetWriteWatch Duration: %d, total: %d\n", + printf ("get_write_watch_for_gc_heap Duration: %d, total: %d\n", time_stop - time_start, tot_cycles); #endif //TIME_WRITE_WATCH @@ -18483,6 +18617,7 @@ void gc_heap::fix_card_table () } while (bcount >= array_size); seg = heap_segment_next_rw (seg); } + #ifdef BACKGROUND_GC if (settings.concurrent) { @@ -18491,7 +18626,7 @@ void gc_heap::fix_card_table () align_on_page (generation_allocation_start (generation_of (0))); size_t region_size = heap_segment_allocated (ephemeral_heap_segment) - base_address; - GCToOSInterface::ResetWriteWatch (base_address, region_size); + reset_write_watch_for_gc_heap(base_address, region_size); } #endif //BACKGROUND_GC #endif //WRITE_WATCH @@ -24295,6 +24430,15 @@ void gc_heap::gcmemcopy (uint8_t* dest, uint8_t* src, size_t len, BOOL copy_car //dprintf(3,(" Memcopy [%Ix->%Ix, %Ix->%Ix[", (size_t)src, (size_t)dest, (size_t)src+len, (size_t)dest+len)); dprintf(3,(" mc: [%Ix->%Ix, %Ix->%Ix[", (size_t)src, (size_t)dest, (size_t)src+len, (size_t)dest+len)); memcopy (dest - plug_skew, src - plug_skew, (int)len); +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (SoftwareWriteWatch::IsEnabledForGCHeap()) + { + // The ranges [src - plug_kew .. src[ and [src + len - plug_skew .. src + len[ are ObjHeaders, which don't have GC + // references, and are not relevant for write watch. The latter range actually corresponds to the ObjHeader for the + // object at (src + len), so it can be ignored anyway. + SoftwareWriteWatch::SetDirtyRegion(dest, len - plug_skew); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP copy_cards_range (dest, src, len, copy_cards_p); } } @@ -25570,6 +25714,28 @@ void gc_heap::background_mark_phase () if (bgc_t_join.joined()) #endif //MULTIPLE_HEAPS { +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + SoftwareWriteWatch::EnableForGCHeap(); + + // Resetting write watch for software write watch is pretty fast, much faster than for hardware write watch. Reset + // can be done while the runtime is suspended or after the runtime is restarted, the preference was to reset while + // the runtime is suspended. The reset for hardware write watch is done after the runtime is restarted below. +#ifdef WRITE_WATCH + concurrent_print_time_delta ("CRWW begin"); + +#ifdef MULTIPLE_HEAPS + for (int i = 0; i < n_heaps; i++) + { + g_heaps[i]->reset_write_watch (FALSE); + } +#else + reset_write_watch (FALSE); +#endif //MULTIPLE_HEAPS + + concurrent_print_time_delta ("CRWW"); +#endif //WRITE_WATCH +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + num_sizedrefs = SystemDomain::System()->GetTotalNumSizedRefHandles(); // this c_write is not really necessary because restart_vm @@ -25597,12 +25763,16 @@ void gc_heap::background_mark_phase () { disable_preemptive (current_thread, TRUE); +#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // When software write watch is enabled, resetting write watch is done while the runtime is suspended above. The + // post-reset call to revisit_written_pages is only necessary for concurrent reset_write_watch, to discard dirtied + // pages during the concurrent reset. + #ifdef WRITE_WATCH concurrent_print_time_delta ("CRWW begin"); #ifdef MULTIPLE_HEAPS - int i; - for (i = 0; i < n_heaps; i++) + for (int i = 0; i < n_heaps; i++) { g_heaps[i]->reset_write_watch (TRUE); } @@ -25614,7 +25784,7 @@ void gc_heap::background_mark_phase () #endif //WRITE_WATCH #ifdef MULTIPLE_HEAPS - for (i = 0; i < n_heaps; i++) + for (int i = 0; i < n_heaps; i++) { g_heaps[i]->revisit_written_pages (TRUE, TRUE); } @@ -25623,9 +25793,10 @@ void gc_heap::background_mark_phase () #endif //MULTIPLE_HEAPS concurrent_print_time_delta ("CRW"); +#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP #ifdef MULTIPLE_HEAPS - for (i = 0; i < n_heaps; i++) + for (int i = 0; i < n_heaps; i++) { g_heaps[i]->current_bgc_state = bgc_mark_handles; } @@ -25818,6 +25989,23 @@ void gc_heap::background_mark_phase () //concurrent_print_time_delta ("nonconcurrent revisit dirtied pages on LOH"); concurrent_print_time_delta ("NRre LOH"); +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +#ifdef MULTIPLE_HEAPS + bgc_t_join.join(this, gc_join_disable_software_write_watch); + if (bgc_t_join.joined()) +#endif // MULTIPLE_HEAPS + { + // The runtime is suspended, and we will be doing a final query of dirty pages, so pause tracking written pages to + // avoid further perf penalty after the runtime is restarted + SoftwareWriteWatch::DisableForGCHeap(); + +#ifdef MULTIPLE_HEAPS + dprintf(3, ("Restarting BGC threads after disabling software write watch")); + bgc_t_join.restart(); +#endif // MULTIPLE_HEAPS + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + dprintf (2, ("before NR 1st Hov count: %d", bgc_overflow_count)); bgc_overflow_count = 0; @@ -26223,6 +26411,7 @@ void gc_heap::revisit_written_pages (BOOL concurrent_p, BOOL reset_only_p) PREFIX_ASSUME(seg != NULL); bool reset_watch_state = !!concurrent_p; + bool is_runtime_suspended = !concurrent_p; BOOL small_object_segments = TRUE; int align_const = get_alignment_constant (small_object_segments); @@ -26327,18 +26516,27 @@ void gc_heap::revisit_written_pages (BOOL concurrent_p, BOOL reset_only_p) ptrdiff_t region_size = high_address - base_address; dprintf (3, ("h%d: gw: [%Ix(%Id)", heap_number, (size_t)base_address, (size_t)region_size)); - bool success = GCToOSInterface::GetWriteWatch (reset_watch_state, base_address, region_size, - (void**)background_written_addresses, - &bcount); +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // When the runtime is not suspended, it's possible for the table to be resized concurrently with the scan + // for dirty pages below. Prevent that by synchronizing with grow_brick_card_tables(). When the runtime is + // suspended, it's ok to scan for dirty pages concurrently from multiple background GC threads for disjoint + // memory regions. + if (!is_runtime_suspended) + { + enter_spin_lock(&gc_lock); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + + get_write_watch_for_gc_heap (reset_watch_state, base_address, region_size, + (void**)background_written_addresses, + &bcount, is_runtime_suspended); - //#ifdef _DEBUG - if (!success) +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (!is_runtime_suspended) { - printf ("GetWriteWatch Error "); - printf ("Probing pages [%Ix, %Ix[\n", (size_t)base_address, (size_t)high_address); + leave_spin_lock(&gc_lock); } - //#endif - assert (success); +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP if (bcount != 0) { @@ -33396,6 +33594,9 @@ HRESULT GCHeap::Shutdown () { destroy_card_table (ct); g_card_table = 0; +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + SoftwareWriteWatch::StaticClose(); +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP } //destroy all segments on the standby list @@ -33555,7 +33756,7 @@ HRESULT GCHeap::Initialize () WaitForGCEvent->CreateManualEvent(TRUE); - StompWriteBarrierResize(FALSE); + StompWriteBarrierResize(true, false); #ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way #if defined (STRESS_HEAP) && !defined (MULTIPLE_HEAPS) @@ -35903,6 +36104,13 @@ GCHeap::SetCardsAfterBulkCopy( Object **StartPoint, size_t len ) updateGCShadow(&StartPoint[i], StartPoint[i]); #endif //WRITE_BARRIER_CHECK && !SERVER_GC +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (SoftwareWriteWatch::IsEnabledForGCHeap()) + { + SoftwareWriteWatch::SetDirtyRegion(StartPoint, len); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // If destination is in Gen 0 don't bother if ( #ifdef BACKGROUND_GC diff --git a/src/gc/gcpriv.h b/src/gc/gcpriv.h index 5f41f5e9ce..bfb6f8146d 100644 --- a/src/gc/gcpriv.h +++ b/src/gc/gcpriv.h @@ -106,7 +106,7 @@ inline void FATAL_GC_ERROR() #define MARK_ARRAY //Mark bit in an array #endif //BACKGROUND_GC -#if defined(BACKGROUND_GC) || defined (CARD_BUNDLE) +#if defined(BACKGROUND_GC) || defined (CARD_BUNDLE) || defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) #define WRITE_WATCH //Write Watch feature #endif //BACKGROUND_GC || CARD_BUNDLE @@ -1648,6 +1648,12 @@ protected: void rearrange_large_heap_segments(); PER_HEAP void rearrange_heap_segments(BOOL compacting); + + PER_HEAP_ISOLATED + void reset_write_watch_for_gc_heap(void* base_address, size_t region_size); + PER_HEAP_ISOLATED + void get_write_watch_for_gc_heap(bool reset, void *base_address, size_t region_size, void** dirty_pages, uintptr_t* dirty_page_count_ref, bool is_runtime_suspended); + PER_HEAP void switch_one_quantum(); PER_HEAP diff --git a/src/gc/sample/CMakeLists.txt b/src/gc/sample/CMakeLists.txt index 45cdbb2b9d..f0ba28edc7 100644 --- a/src/gc/sample/CMakeLists.txt +++ b/src/gc/sample/CMakeLists.txt @@ -17,6 +17,7 @@ set(SOURCES ../handletablecore.cpp ../handletablescan.cpp ../objecthandle.cpp + ../softwarewritewatch.cpp ) if(WIN32) diff --git a/src/gc/sample/gcenv.ee.cpp b/src/gc/sample/gcenv.ee.cpp index 7180165f86..3205900c91 100644 --- a/src/gc/sample/gcenv.ee.cpp +++ b/src/gc/sample/gcenv.ee.cpp @@ -241,11 +241,19 @@ bool IsGCSpecialThread() return false; } -void StompWriteBarrierEphemeral() +void StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */) { } -void StompWriteBarrierResize(bool /*bReqUpperBoundsCheck*/) +void StompWriteBarrierResize(bool /* isRuntimeSuspended */, bool /*bReqUpperBoundsCheck*/) +{ +} + +void SwitchToWriteWatchBarrier() +{ +} + +void SwitchToNonWriteWatchBarrier() { } diff --git a/src/gc/sample/gcenv.h b/src/gc/sample/gcenv.h index c4e8015392..1798cde7f6 100644 --- a/src/gc/sample/gcenv.h +++ b/src/gc/sample/gcenv.h @@ -79,6 +79,12 @@ public: Thread * GetThread(); +inline BOOL IsSuspendEEThread() +{ + // TODO: Implement + return false; +} + class ThreadStore { public: diff --git a/src/gc/softwarewritewatch.cpp b/src/gc/softwarewritewatch.cpp new file mode 100644 index 0000000000..bbd37ef94b --- /dev/null +++ b/src/gc/softwarewritewatch.cpp @@ -0,0 +1,243 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include "common.h" +#include "softwarewritewatch.h" + +#include "../inc/static_assert.h" +#include "gcenv.h" + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +#ifndef DACCESS_COMPILE + +static_assert_no_msg((static_cast<size_t>(1) << SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift) == OS_PAGE_SIZE); + +extern "C" +{ + uint8_t *g_sw_ww_table = nullptr; + bool g_sw_ww_enabled_for_gc_heap = false; +} + +void SoftwareWriteWatch::StaticClose() +{ + if (GetTable() == nullptr) + { + return; + } + + g_sw_ww_enabled_for_gc_heap = false; + g_sw_ww_table = nullptr; +} + +bool SoftwareWriteWatch::GetDirtyFromBlock( + uint8_t *block, + uint8_t *firstPageAddressInBlock, + size_t startByteIndex, + size_t endByteIndex, + void **dirtyPages, + size_t *dirtyPageIndexRef, + size_t dirtyPageCount, + bool clearDirty) +{ + assert(block != nullptr); + assert(ALIGN_DOWN(block, sizeof(size_t)) == block); + assert(firstPageAddressInBlock == reinterpret_cast<uint8_t *>(GetPageAddress(block - GetTable()))); + assert(startByteIndex < endByteIndex); + assert(endByteIndex <= sizeof(size_t)); + assert(dirtyPages != nullptr); + assert(dirtyPageIndexRef != nullptr); + + size_t &dirtyPageIndex = *dirtyPageIndexRef; + assert(dirtyPageIndex < dirtyPageCount); + + size_t dirtyBytes = *reinterpret_cast<size_t *>(block); + if (dirtyBytes == 0) + { + return true; + } + + if (startByteIndex != 0) + { + size_t numLowBitsToClear = startByteIndex * 8; + dirtyBytes >>= numLowBitsToClear; + dirtyBytes <<= numLowBitsToClear; + } + if (endByteIndex != sizeof(size_t)) + { + size_t numHighBitsToClear = (sizeof(size_t) - endByteIndex) * 8; + dirtyBytes <<= numHighBitsToClear; + dirtyBytes >>= numHighBitsToClear; + } + + while (dirtyBytes != 0) + { + DWORD bitIndex; + static_assert_no_msg(sizeof(size_t) <= 8); + if (sizeof(size_t) == 8) + { + BitScanForward64(&bitIndex, static_cast<DWORD64>(dirtyBytes)); + } + else + { + BitScanForward(&bitIndex, static_cast<DWORD>(dirtyBytes)); + } + + // Each byte is only ever set to 0 or 0xff + assert(bitIndex % 8 == 0); + size_t byteMask = static_cast<size_t>(0xff) << bitIndex; + assert((dirtyBytes & byteMask) == byteMask); + dirtyBytes ^= byteMask; + + DWORD byteIndex = bitIndex / 8; + if (clearDirty) + { + // Clear only the bytes for which pages are recorded as dirty + block[byteIndex] = 0; + } + + void *pageAddress = firstPageAddressInBlock + byteIndex * OS_PAGE_SIZE; + assert(pageAddress >= GetHeapStartAddress()); + assert(pageAddress < GetHeapEndAddress()); + assert(dirtyPageIndex < dirtyPageCount); + dirtyPages[dirtyPageIndex] = pageAddress; + ++dirtyPageIndex; + if (dirtyPageIndex == dirtyPageCount) + { + return false; + } + } + return true; +} + +void SoftwareWriteWatch::GetDirty( + void *baseAddress, + size_t regionByteSize, + void **dirtyPages, + size_t *dirtyPageCountRef, + bool clearDirty, + bool isRuntimeSuspended) +{ + VerifyCreated(); + VerifyMemoryRegion(baseAddress, regionByteSize); + assert(dirtyPages != nullptr); + assert(dirtyPageCountRef != nullptr); + + size_t dirtyPageCount = *dirtyPageCountRef; + if (dirtyPageCount == 0) + { + return; + } + + if (!isRuntimeSuspended) + { + // When a page is marked as dirty, a memory barrier is not issued after the write most of the time. Issue a memory + // barrier on all active threads of the process now to make recent changes to dirty state visible to this thread. + GCToOSInterface::FlushProcessWriteBuffers(); + } + + uint8_t *tableRegionStart; + size_t tableRegionByteSize; + TranslateToTableRegion(baseAddress, regionByteSize, &tableRegionStart, &tableRegionByteSize); + uint8_t *tableRegionEnd = tableRegionStart + tableRegionByteSize; + + uint8_t *blockStart = ALIGN_DOWN(tableRegionStart, sizeof(size_t)); + assert(blockStart >= GetUntranslatedTable()); + uint8_t *blockEnd = ALIGN_UP(tableRegionEnd, sizeof(size_t)); + assert(blockEnd <= GetUntranslatedTableEnd()); + uint8_t *fullBlockEnd = ALIGN_DOWN(tableRegionEnd, sizeof(size_t)); + + size_t dirtyPageIndex = 0; + uint8_t *currentBlock = blockStart; + uint8_t *firstPageAddressInCurrentBlock = reinterpret_cast<uint8_t *>(GetPageAddress(currentBlock - GetTable())); + + do + { + if (blockStart == fullBlockEnd) + { + if (GetDirtyFromBlock( + currentBlock, + firstPageAddressInCurrentBlock, + tableRegionStart - blockStart, + tableRegionEnd - fullBlockEnd, + dirtyPages, + &dirtyPageIndex, + dirtyPageCount, + clearDirty)) + { + *dirtyPageCountRef = dirtyPageIndex; + } + break; + } + + if (tableRegionStart != blockStart) + { + if (!GetDirtyFromBlock( + currentBlock, + firstPageAddressInCurrentBlock, + tableRegionStart - blockStart, + sizeof(size_t), + dirtyPages, + &dirtyPageIndex, + dirtyPageCount, + clearDirty)) + { + break; + } + currentBlock += sizeof(size_t); + firstPageAddressInCurrentBlock += sizeof(size_t) * OS_PAGE_SIZE; + } + + while (currentBlock < fullBlockEnd) + { + if (!GetDirtyFromBlock( + currentBlock, + firstPageAddressInCurrentBlock, + 0, + sizeof(size_t), + dirtyPages, + &dirtyPageIndex, + dirtyPageCount, + clearDirty)) + { + break; + } + currentBlock += sizeof(size_t); + firstPageAddressInCurrentBlock += sizeof(size_t) * OS_PAGE_SIZE; + } + if (currentBlock < fullBlockEnd) + { + break; + } + + if (tableRegionEnd != fullBlockEnd && + !GetDirtyFromBlock( + currentBlock, + firstPageAddressInCurrentBlock, + 0, + tableRegionEnd - fullBlockEnd, + dirtyPages, + &dirtyPageIndex, + dirtyPageCount, + clearDirty)) + { + break; + } + + *dirtyPageCountRef = dirtyPageIndex; + } while (false); + + if (!isRuntimeSuspended && clearDirty && dirtyPageIndex != 0) + { + // When dirtying a page, the dirty state of the page is first checked to see if the page is already dirty. If already + // dirty, the write to mark it as dirty is skipped. So, when the dirty state of a page is cleared, we need to make sure + // the cleared state is visible to other threads that may dirty the page, before marking through objects in the page, so + // that the GC will not miss marking through dirtied objects in the page. Issue a memory barrier on all active threads + // of the process now. + MemoryBarrier(); // flush writes from this thread first to guarantee ordering + GCToOSInterface::FlushProcessWriteBuffers(); + } +} + +#endif // !DACCESS_COMPILE +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP diff --git a/src/gc/softwarewritewatch.h b/src/gc/softwarewritewatch.h new file mode 100644 index 0000000000..3c8491cecb --- /dev/null +++ b/src/gc/softwarewritewatch.h @@ -0,0 +1,339 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#ifndef __SOFTWARE_WRITE_WATCH_H__ +#define __SOFTWARE_WRITE_WATCH_H__ + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +#ifndef DACCESS_COMPILE + +extern void SwitchToWriteWatchBarrier(bool isRuntimeSuspended); +extern void SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended); + +#define SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift 0xc + +extern "C" +{ + // Table containing the dirty state. This table is translated to exclude the lowest address it represents, see + // TranslateTableToExcludeHeapStartAddress. + extern uint8_t *g_sw_ww_table; + + // Write watch may be disabled when it is not needed (between GCs for instance). This indicates whether it is enabled. + extern bool g_sw_ww_enabled_for_gc_heap; + + extern uint8_t *g_lowest_address; // start address of the GC heap + extern uint8_t *g_highest_address; // end address of the GC heap +} + +class SoftwareWriteWatch +{ +private: + // The granularity of dirty state in the table is one page. Dirtiness is tracked per byte of the table so that + // synchronization is not required when changing the dirty state. Shifting-right an address by the following value yields + // the byte index of the address into the write watch table. For instance, + // GetTable()[address >> AddressToTableByteIndexShift] is the byte that represents the region of memory for 'address'. + static const uint8_t AddressToTableByteIndexShift = SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift; + +private: + static void VerifyCreated(); + static void VerifyMemoryRegion(void *baseAddress, size_t regionByteSize); + static void VerifyMemoryRegion(void *baseAddress, size_t regionByteSize, void *heapStartAddress, void *heapEndAddress); + +public: + static uint8_t *GetTable(); +private: + static uint8_t *GetUntranslatedTable(); + static uint8_t *GetUntranslatedTable(uint8_t *table, void *heapStartAddress); + static uint8_t *GetUntranslatedTableEnd(); + static uint8_t *GetUntranslatedTableEnd(uint8_t *table, void *heapEndAddress); +public: + static void InitializeUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress); +private: + static void SetUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress); +public: + static void SetResizedUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress, void *heapEndAddress); + static bool IsEnabledForGCHeap(); + static void EnableForGCHeap(); + static void DisableForGCHeap(); +private: + static void *GetHeapStartAddress(); + static void *GetHeapEndAddress(); + +public: + static void StaticClose(); + +private: + static size_t GetTableByteIndex(void *address); + static void *GetPageAddress(size_t tableByteIndex); +public: + static size_t GetTableByteSize(void *heapStartAddress, void *heapEndAddress); + static size_t GetTableStartByteOffset(size_t byteSizeBeforeTable); +private: + static uint8_t *TranslateTableToExcludeHeapStartAddress(uint8_t *table, void *heapStartAddress); + static void TranslateToTableRegion(void *baseAddress, size_t regionByteSize, uint8_t **tableBaseAddressRef, size_t *tableRegionByteSizeRef); + +public: + static void ClearDirty(void *baseAddress, size_t regionByteSize); + static void SetDirty(void *address, size_t writeByteSize); + static void SetDirtyRegion(void *baseAddress, size_t regionByteSize); +private: + static bool GetDirtyFromBlock(uint8_t *block, uint8_t *firstPageAddressInBlock, size_t startByteIndex, size_t endByteIndex, void **dirtyPages, size_t *dirtyPageIndexRef, size_t dirtyPageCount, bool clearDirty); +public: + static void GetDirty(void *baseAddress, size_t regionByteSize, void **dirtyPages, size_t *dirtyPageCountRef, bool clearDirty, bool isRuntimeSuspended); +}; + +inline void SoftwareWriteWatch::VerifyCreated() +{ + assert(GetTable() != nullptr); + assert(GetHeapStartAddress() != nullptr); + assert(GetHeapEndAddress() != nullptr); + assert(GetHeapStartAddress() < GetHeapEndAddress()); +} + +inline void SoftwareWriteWatch::VerifyMemoryRegion(void *baseAddress, size_t regionByteSize) +{ + VerifyMemoryRegion(baseAddress, regionByteSize, GetHeapStartAddress(), GetHeapEndAddress()); +} + +inline void SoftwareWriteWatch::VerifyMemoryRegion( + void *baseAddress, + size_t regionByteSize, + void *heapStartAddress, + void *heapEndAddress) +{ + VerifyCreated(); + assert(baseAddress != nullptr); + assert(heapStartAddress != nullptr); + assert(heapStartAddress >= GetHeapStartAddress()); + assert(heapEndAddress != nullptr); + assert(heapEndAddress <= GetHeapEndAddress()); + assert(baseAddress >= heapStartAddress); + assert(baseAddress < heapEndAddress); + assert(regionByteSize != 0); + assert(regionByteSize <= reinterpret_cast<size_t>(heapEndAddress) - reinterpret_cast<size_t>(baseAddress)); +} + +inline uint8_t *SoftwareWriteWatch::GetTable() +{ + return g_sw_ww_table; +} + +inline uint8_t *SoftwareWriteWatch::GetUntranslatedTable() +{ + VerifyCreated(); + return GetUntranslatedTable(GetTable(), GetHeapStartAddress()); +} + +inline uint8_t *SoftwareWriteWatch::GetUntranslatedTable(uint8_t *table, void *heapStartAddress) +{ + assert(table != nullptr); + assert(heapStartAddress != nullptr); + assert(heapStartAddress >= GetHeapStartAddress()); + + uint8_t *untranslatedTable = table + GetTableByteIndex(heapStartAddress); + assert(ALIGN_DOWN(untranslatedTable, sizeof(size_t)) == untranslatedTable); + return untranslatedTable; +} + +inline uint8_t *SoftwareWriteWatch::GetUntranslatedTableEnd() +{ + VerifyCreated(); + return GetUntranslatedTableEnd(GetTable(), GetHeapEndAddress()); +} + +inline uint8_t *SoftwareWriteWatch::GetUntranslatedTableEnd(uint8_t *table, void *heapEndAddress) +{ + assert(table != nullptr); + assert(heapEndAddress != nullptr); + assert(heapEndAddress <= GetHeapEndAddress()); + + return ALIGN_UP(&table[GetTableByteIndex(reinterpret_cast<uint8_t *>(heapEndAddress) - 1) + 1], sizeof(size_t)); +} + +inline void SoftwareWriteWatch::InitializeUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress) +{ + assert(GetTable() == nullptr); + SetUntranslatedTable(untranslatedTable, heapStartAddress); +} + +inline void SoftwareWriteWatch::SetUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress) +{ + assert(untranslatedTable != nullptr); + assert(ALIGN_DOWN(untranslatedTable, sizeof(size_t)) == untranslatedTable); + assert(heapStartAddress != nullptr); + + g_sw_ww_table = TranslateTableToExcludeHeapStartAddress(untranslatedTable, heapStartAddress); +} + +inline void SoftwareWriteWatch::SetResizedUntranslatedTable( + uint8_t *untranslatedTable, + void *heapStartAddress, + void *heapEndAddress) +{ + // The runtime needs to be suspended during this call, and background GC threads need to synchronize calls to ClearDirty() + // and GetDirty() such that they are not called concurrently with this function + + VerifyCreated(); + assert(untranslatedTable != nullptr); + assert(ALIGN_DOWN(untranslatedTable, sizeof(size_t)) == untranslatedTable); + assert(heapStartAddress != nullptr); + assert(heapEndAddress != nullptr); + assert(heapStartAddress <= GetHeapStartAddress()); + assert(heapEndAddress >= GetHeapEndAddress()); + assert(heapStartAddress < GetHeapStartAddress() || heapEndAddress > GetHeapEndAddress()); + + uint8_t *oldUntranslatedTable = GetUntranslatedTable(); + void *oldTableHeapStartAddress = GetHeapStartAddress(); + size_t oldTableByteSize = GetTableByteSize(oldTableHeapStartAddress, GetHeapEndAddress()); + SetUntranslatedTable(untranslatedTable, heapStartAddress); + + uint8_t *tableRegionStart = &GetTable()[GetTableByteIndex(oldTableHeapStartAddress)]; + memcpy(tableRegionStart, oldUntranslatedTable, oldTableByteSize); +} + +inline bool SoftwareWriteWatch::IsEnabledForGCHeap() +{ + return g_sw_ww_enabled_for_gc_heap; +} + +inline void SoftwareWriteWatch::EnableForGCHeap() +{ + // The runtime needs to be suspended during this call. This is how it currently guarantees that GC heap writes from other + // threads between calls to EnableForGCHeap() and DisableForGCHeap() will be tracked. + + VerifyCreated(); + assert(!IsEnabledForGCHeap()); + + g_sw_ww_enabled_for_gc_heap = true; + SwitchToWriteWatchBarrier(true); +} + +inline void SoftwareWriteWatch::DisableForGCHeap() +{ + // The runtime needs to be suspended during this call. This is how it currently guarantees that GC heap writes from other + // threads between calls to EnableForGCHeap() and DisableForGCHeap() will be tracked. + + VerifyCreated(); + assert(IsEnabledForGCHeap()); + + g_sw_ww_enabled_for_gc_heap = false; + SwitchToNonWriteWatchBarrier(true); +} + +inline void *SoftwareWriteWatch::GetHeapStartAddress() +{ + return g_lowest_address; +} + +inline void *SoftwareWriteWatch::GetHeapEndAddress() +{ + return g_highest_address; +} + +inline size_t SoftwareWriteWatch::GetTableByteIndex(void *address) +{ + assert(address != nullptr); + + size_t tableByteIndex = reinterpret_cast<size_t>(address) >> AddressToTableByteIndexShift; + assert(tableByteIndex != 0); + return tableByteIndex; +} + +inline void *SoftwareWriteWatch::GetPageAddress(size_t tableByteIndex) +{ + assert(tableByteIndex != 0); + + void *pageAddress = reinterpret_cast<void *>(tableByteIndex << AddressToTableByteIndexShift); + assert(pageAddress >= GetHeapStartAddress()); + assert(pageAddress < GetHeapEndAddress()); + assert(ALIGN_DOWN(pageAddress, OS_PAGE_SIZE) == pageAddress); + return pageAddress; +} + +inline size_t SoftwareWriteWatch::GetTableByteSize(void *heapStartAddress, void *heapEndAddress) +{ + assert(heapStartAddress != nullptr); + assert(heapEndAddress != nullptr); + assert(heapStartAddress < heapEndAddress); + + size_t tableByteSize = + GetTableByteIndex(reinterpret_cast<uint8_t *>(heapEndAddress) - 1) - GetTableByteIndex(heapStartAddress) + 1; + tableByteSize = ALIGN_UP(tableByteSize, sizeof(size_t)); + return tableByteSize; +} + +inline size_t SoftwareWriteWatch::GetTableStartByteOffset(size_t byteSizeBeforeTable) +{ + return ALIGN_UP(byteSizeBeforeTable, sizeof(size_t)); // start of the table needs to be aligned to size_t +} + +inline uint8_t *SoftwareWriteWatch::TranslateTableToExcludeHeapStartAddress(uint8_t *table, void *heapStartAddress) +{ + assert(table != nullptr); + assert(heapStartAddress != nullptr); + + // Exclude the table byte index corresponding to the heap start address from the table pointer, so that each lookup in the + // table by address does not have to calculate (address - heapStartAddress) + return table - GetTableByteIndex(heapStartAddress); +} + +inline void SoftwareWriteWatch::TranslateToTableRegion( + void *baseAddress, + size_t regionByteSize, + uint8_t **tableBaseAddressRef, + size_t *tableRegionByteSizeRef) +{ + VerifyCreated(); + VerifyMemoryRegion(baseAddress, regionByteSize); + assert(tableBaseAddressRef != nullptr); + assert(tableRegionByteSizeRef != nullptr); + + size_t baseAddressTableByteIndex = GetTableByteIndex(baseAddress); + *tableBaseAddressRef = &GetTable()[baseAddressTableByteIndex]; + *tableRegionByteSizeRef = + GetTableByteIndex(reinterpret_cast<uint8_t *>(baseAddress) + (regionByteSize - 1)) - baseAddressTableByteIndex + 1; +} + +inline void SoftwareWriteWatch::ClearDirty(void *baseAddress, size_t regionByteSize) +{ + VerifyCreated(); + VerifyMemoryRegion(baseAddress, regionByteSize); + + uint8_t *tableBaseAddress; + size_t tableRegionByteSize; + TranslateToTableRegion(baseAddress, regionByteSize, &tableBaseAddress, &tableRegionByteSize); + memset(tableBaseAddress, 0, tableRegionByteSize); +} + +inline void SoftwareWriteWatch::SetDirty(void *address, size_t writeByteSize) +{ + VerifyCreated(); + VerifyMemoryRegion(address, writeByteSize); + assert(address != nullptr); + assert(writeByteSize <= sizeof(void *)); + + size_t tableByteIndex = GetTableByteIndex(address); + assert(GetTableByteIndex(reinterpret_cast<uint8_t *>(address) + (writeByteSize - 1)) == tableByteIndex); + + uint8_t *tableByteAddress = &GetTable()[tableByteIndex]; + if (*tableByteAddress == 0) + { + *tableByteAddress = 0xff; + } +} + +inline void SoftwareWriteWatch::SetDirtyRegion(void *baseAddress, size_t regionByteSize) +{ + VerifyCreated(); + VerifyMemoryRegion(baseAddress, regionByteSize); + + uint8_t *tableBaseAddress; + size_t tableRegionByteSize; + TranslateToTableRegion(baseAddress, regionByteSize, &tableBaseAddress, &tableRegionByteSize); + memset(tableBaseAddress, ~0, tableRegionByteSize); +} + +#endif // !DACCESS_COMPILE +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +#endif // !__SOFTWARE_WRITE_WATCH_H__ diff --git a/src/inc/stdmacros.h b/src/inc/stdmacros.h index 6f27c211c5..ab77a2cd91 100644 --- a/src/inc/stdmacros.h +++ b/src/inc/stdmacros.h @@ -188,6 +188,12 @@ inline void* ALIGN_UP( void* val, size_t alignment ) return (void*) ALIGN_UP( (size_t)val, alignment ); } +inline uint8_t* ALIGN_UP( uint8_t* val, size_t alignment ) +{ + WRAPPER_NO_CONTRACT; + + return (uint8_t*) ALIGN_UP( (size_t)val, alignment ); +} inline size_t ALIGN_DOWN( size_t val, size_t alignment ) { @@ -203,6 +209,11 @@ inline void* ALIGN_DOWN( void* val, size_t alignment ) WRAPPER_NO_CONTRACT; return (void*) ALIGN_DOWN( (size_t)val, alignment ); } +inline uint8_t* ALIGN_DOWN( uint8_t* val, size_t alignment ) +{ + WRAPPER_NO_CONTRACT; + return (uint8_t*) ALIGN_DOWN( (size_t)val, alignment ); +} inline BOOL IS_ALIGNED( size_t val, size_t alignment ) { diff --git a/src/vm/CMakeLists.txt b/src/vm/CMakeLists.txt index a2676b5385..13106a4a68 100644 --- a/src/vm/CMakeLists.txt +++ b/src/vm/CMakeLists.txt @@ -122,6 +122,7 @@ set(VM_SOURCES_DAC_AND_WKS_COMMON securitydescriptorassembly.cpp sigformat.cpp siginfo.cpp + ../gc/softwarewritewatch.cpp stackwalk.cpp stublink.cpp stubmgr.cpp diff --git a/src/vm/amd64/JitHelpers_Fast.asm b/src/vm/amd64/JitHelpers_Fast.asm index 8e39a6d39f..90185205af 100644 --- a/src/vm/amd64/JitHelpers_Fast.asm +++ b/src/vm/amd64/JitHelpers_Fast.asm @@ -27,6 +27,11 @@ EXTERN g_lowest_address:QWORD EXTERN g_highest_address:QWORD EXTERN g_card_table:QWORD +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +EXTERN g_sw_ww_table:QWORD +EXTERN g_sw_ww_enabled_for_gc_heap:BYTE +endif + ifdef WRITE_BARRIER_CHECK ; Those global variables are always defined, but should be 0 for Server GC g_GCShadow TEXTEQU <?g_GCShadow@@3PEAEEA> @@ -466,6 +471,67 @@ ifdef _DEBUG jmp JIT_WriteBarrier_Debug endif +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + ; JIT_WriteBarrier_WriteWatch_PostGrow64 + + ; Regarding patchable constants: + ; - 64-bit constants have to be loaded into a register + ; - The constants have to be aligned to 8 bytes so that they can be patched easily + ; - The constant loads have been located to minimize NOP padding required to align the constants + ; - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special + ; non-volatile calling convention, this should be changed to use just one register. + + ; Do the move into the GC . It is correct to take an AV here, the EH code + ; figures out that this came from a WriteBarrier and correctly maps it back + ; to the managed method which called the WriteBarrier (see setup in + ; InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rcx], rdx + + ; Update the write watch table if necessary + mov rax, rcx + mov r8, 0F0F0F0F0F0F0F0F0h + shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift + NOP_2_BYTE ; padding for alignment of constant + mov r9, 0F0F0F0F0F0F0F0F0h + add rax, r8 + cmp byte ptr [rax], 0h + jne CheckCardTable + mov byte ptr [rax], 0FFh + + NOP_3_BYTE ; padding for alignment of constant + + ; Check the lower and upper ephemeral region bounds + CheckCardTable: + cmp rdx, r9 + jb Exit + + NOP_3_BYTE ; padding for alignment of constant + + mov r8, 0F0F0F0F0F0F0F0F0h + + cmp rdx, r8 + jae Exit + + nop ; padding for alignment of constant + + mov rax, 0F0F0F0F0F0F0F0F0h + + ; Touch the card table entry, if not already dirty. + shr rcx, 0Bh + cmp byte ptr [rcx + rax], 0FFh + jne UpdateCardTable + REPRET + + UpdateCardTable: + mov byte ptr [rcx + rax], 0FFh + ret + + align 16 + Exit: + REPRET +else + ; JIT_WriteBarrier_PostGrow64 + ; Do the move into the GC . It is correct to take an AV here, the EH code ; figures out that this came from a WriteBarrier and correctly maps it back ; to the managed method which called the WriteBarrier (see setup in @@ -510,6 +576,8 @@ endif align 16 Exit: REPRET +endif + ; make sure this guy is bigger than any of the other guys align 16 nop @@ -577,7 +645,8 @@ LEAF_END JIT_PatchedCodeLast, _TEXT ; Entry: ; RDI - address of ref-field (assigned to) ; RSI - address of the data (source) -; RCX can be trashed +; RCX is trashed +; RAX is trashed when FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP is defined ; Exit: ; RDI, RSI are incremented by SIZEOF(LPVOID) LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT @@ -653,7 +722,20 @@ ifdef WRITE_BARRIER_CHECK pop r10 endif +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + ; Update the write watch table if necessary + cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h + je CheckCardTable + mov rax, rdi + shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift + add rax, qword ptr [g_sw_ww_table] + cmp byte ptr [rax], 0h + jne CheckCardTable + mov byte ptr [rax], 0FFh +endif + ; See if we can just quick out + CheckCardTable: cmp rcx, [g_ephemeral_low] jb Exit cmp rcx, [g_ephemeral_high] diff --git a/src/vm/amd64/JitHelpers_FastWriteBarriers.asm b/src/vm/amd64/JitHelpers_FastWriteBarriers.asm index 17730142ed..07e985f94f 100644 --- a/src/vm/amd64/JitHelpers_FastWriteBarriers.asm +++ b/src/vm/amd64/JitHelpers_FastWriteBarriers.asm @@ -41,39 +41,6 @@ include asmconstants.inc ; (card table, ephemeral region ranges, etc) are naturally aligned since ; there are codepaths that will overwrite these values while the EE is running. ; -LEAF_ENTRY JIT_WriteBarrier_PreGrow32, _TEXT - align 4 - ; Do the move into the GC . It is correct to take an AV here, the EH code - ; figures out that this came from a WriteBarrier and correctly maps it back - ; to the managed method which called the WriteBarrier (see setup in - ; InitializeExceptionHandling, vm\exceptionhandling.cpp). - mov [rcx], rdx - - NOP_2_BYTE ; padding for alignment of constant - -PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_Lower - cmp rdx, 0F0F0F0F0h - jb Exit - - shr rcx, 0Bh -PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Check - cmp byte ptr [rcx + 0F0F0F0F0h], 0FFh - jne UpdateCardTable - REPRET - - nop ; padding for alignment of constant - -PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Update - UpdateCardTable: - mov byte ptr [rcx + 0F0F0F0F0h], 0FFh - ret - - align 16 - Exit: - REPRET -LEAF_END_MARKED JIT_WriteBarrier_PreGrow32, _TEXT - - LEAF_ENTRY JIT_WriteBarrier_PreGrow64, _TEXT align 8 ; Do the move into the GC . It is correct to take an AV here, the EH code @@ -165,57 +132,107 @@ PATCH_LABEL JIT_WriteBarrier_PostGrow64_Patch_Label_CardTable REPRET LEAF_END_MARKED JIT_WriteBarrier_PostGrow64, _TEXT -LEAF_ENTRY JIT_WriteBarrier_PostGrow32, _TEXT - align 4 + +ifdef FEATURE_SVR_GC + +LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT + align 8 + ; + ; SVR GC has multiple heaps, so it cannot provide one single + ; ephemeral region to bounds check against, so we just skip the + ; bounds checking all together and do our card table update + ; unconditionally. + ; + ; Do the move into the GC . It is correct to take an AV here, the EH code ; figures out that this came from a WriteBarrier and correctly maps it back ; to the managed method which called the WriteBarrier (see setup in ; InitializeExceptionHandling, vm\exceptionhandling.cpp). mov [rcx], rdx - NOP_2_BYTE ; padding for alignment of constant + NOP_3_BYTE ; padding for alignment of constant - ; Check the lower and upper ephemeral region bounds +PATCH_LABEL JIT_WriteBarrier_SVR64_PatchLabel_CardTable + mov rax, 0F0F0F0F0F0F0F0F0h -PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_Lower - cmp rdx, 0F0F0F0F0h - jb Exit + shr rcx, 0Bh - NOP_3_BYTE ; padding for alignment of constant + cmp byte ptr [rcx + rax], 0FFh + jne UpdateCardTable + REPRET -PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_Upper - cmp rdx, 0F0F0F0F0h - jae Exit + UpdateCardTable: + mov byte ptr [rcx + rax], 0FFh + ret +LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT + +endif + + +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT + align 8 + + ; Regarding patchable constants: + ; - 64-bit constants have to be loaded into a register + ; - The constants have to be aligned to 8 bytes so that they can be patched easily + ; - The constant loads have been located to minimize NOP padding required to align the constants + ; - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special + ; non-volatile calling convention, this should be changed to use just one register. + + ; Do the move into the GC . It is correct to take an AV here, the EH code + ; figures out that this came from a WriteBarrier and correctly maps it back + ; to the managed method which called the WriteBarrier (see setup in + ; InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rcx], rdx + + ; Update the write watch table if necessary + mov rax, rcx +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_WriteWatchTable + mov r8, 0F0F0F0F0F0F0F0F0h + shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift + NOP_2_BYTE ; padding for alignment of constant +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_Lower + mov r9, 0F0F0F0F0F0F0F0F0h + add rax, r8 + cmp byte ptr [rax], 0h + jne CheckCardTable + mov byte ptr [rax], 0FFh + + ; Check the lower ephemeral region bound. + CheckCardTable: + cmp rdx, r9 + jb Exit ; Touch the card table entry, if not already dirty. shr rcx, 0Bh - -PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_CheckCardTable - cmp byte ptr [rcx + 0F0F0F0F0h], 0FFh + NOP_2_BYTE ; padding for alignment of constant +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardTable + mov rax, 0F0F0F0F0F0F0F0F0h + cmp byte ptr [rcx + rax], 0FFh jne UpdateCardTable REPRET - nop ; padding for alignment of constant - -PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_UpdateCardTable UpdateCardTable: - mov byte ptr [rcx + 0F0F0F0F0h], 0FFh + mov byte ptr [rcx + rax], 0FFh ret align 16 Exit: REPRET -LEAF_END_MARKED JIT_WriteBarrier_PostGrow32, _TEXT +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT -LEAF_ENTRY JIT_WriteBarrier_SVR32, _TEXT - align 4 - ; - ; SVR GC has multiple heaps, so it cannot provide one single - ; ephemeral region to bounds check against, so we just skip the - ; bounds checking all together and do our card table update - ; unconditionally. - ; +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT + align 8 + + ; Regarding patchable constants: + ; - 64-bit constants have to be loaded into a register + ; - The constants have to be aligned to 8 bytes so that they can be patched easily + ; - The constant loads have been located to minimize NOP padding required to align the constants + ; - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special + ; non-volatile calling convention, this should be changed to use just one register. ; Do the move into the GC . It is correct to take an AV here, the EH code ; figures out that this came from a WriteBarrier and correctly maps it back @@ -223,25 +240,67 @@ LEAF_ENTRY JIT_WriteBarrier_SVR32, _TEXT ; InitializeExceptionHandling, vm\exceptionhandling.cpp). mov [rcx], rdx - shr rcx, 0Bh + ; Update the write watch table if necessary + mov rax, rcx +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_WriteWatchTable + mov r8, 0F0F0F0F0F0F0F0F0h + shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift + NOP_2_BYTE ; padding for alignment of constant +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Lower + mov r9, 0F0F0F0F0F0F0F0F0h + add rax, r8 + cmp byte ptr [rax], 0h + jne CheckCardTable + mov byte ptr [rax], 0FFh NOP_3_BYTE ; padding for alignment of constant -PATCH_LABEL JIT_WriteBarrier_SVR32_PatchLabel_CheckCardTable - cmp byte ptr [rcx + 0F0F0F0F0h], 0FFh - jne UpdateCardTable - REPRET + ; Check the lower and upper ephemeral region bounds + CheckCardTable: + cmp rdx, r9 + jb Exit + + NOP_3_BYTE ; padding for alignment of constant + +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Upper + mov r8, 0F0F0F0F0F0F0F0F0h + + cmp rdx, r8 + jae Exit nop ; padding for alignment of constant -PATCH_LABEL JIT_WriteBarrier_SVR32_PatchLabel_UpdateCardTable +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardTable + mov rax, 0F0F0F0F0F0F0F0F0h + + ; Touch the card table entry, if not already dirty. + shr rcx, 0Bh + cmp byte ptr [rcx + rax], 0FFh + jne UpdateCardTable + REPRET + UpdateCardTable: - mov byte ptr [rcx + 0F0F0F0F0h], 0FFh + mov byte ptr [rcx + rax], 0FFh ret -LEAF_END_MARKED JIT_WriteBarrier_SVR32, _TEXT -LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT + align 16 + Exit: + REPRET +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT + + +ifdef FEATURE_SVR_GC + +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_SVR64, _TEXT align 8 + + ; Regarding patchable constants: + ; - 64-bit constants have to be loaded into a register + ; - The constants have to be aligned to 8 bytes so that they can be patched easily + ; - The constant loads have been located to minimize NOP padding required to align the constants + ; - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special + ; non-volatile calling convention, this should be changed to use just one register. + ; ; SVR GC has multiple heaps, so it cannot provide one single ; ephemeral region to bounds check against, so we just skip the @@ -255,21 +314,32 @@ LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT ; InitializeExceptionHandling, vm\exceptionhandling.cpp). mov [rcx], rdx - NOP_3_BYTE ; padding for alignment of constant - -PATCH_LABEL JIT_WriteBarrier_SVR64_PatchLabel_CardTable - mov rax, 0F0F0F0F0F0F0F0F0h - + ; Update the write watch table if necessary + mov rax, rcx +PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_WriteWatchTable + mov r8, 0F0F0F0F0F0F0F0F0h + shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift + NOP_2_BYTE ; padding for alignment of constant +PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardTable + mov r9, 0F0F0F0F0F0F0F0F0h + add rax, r8 + cmp byte ptr [rax], 0h + jne CheckCardTable + mov byte ptr [rax], 0FFh + + CheckCardTable: shr rcx, 0Bh - - cmp byte ptr [rcx + rax], 0FFh + cmp byte ptr [rcx + r9], 0FFh jne UpdateCardTable REPRET UpdateCardTable: - mov byte ptr [rcx + rax], 0FFh + mov byte ptr [rcx + r9], 0FFh ret -LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_SVR64, _TEXT - end +endif +endif + + end diff --git a/src/vm/amd64/JitHelpers_Slow.asm b/src/vm/amd64/JitHelpers_Slow.asm index 51829cad42..64b9a82e61 100644 --- a/src/vm/amd64/JitHelpers_Slow.asm +++ b/src/vm/amd64/JitHelpers_Slow.asm @@ -28,6 +28,11 @@ EXTERN g_lowest_address:QWORD EXTERN g_highest_address:QWORD EXTERN g_card_table:QWORD +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +EXTERN g_sw_ww_table:QWORD +EXTERN g_sw_ww_enabled_for_gc_heap:BYTE +endif + ifdef WRITE_BARRIER_CHECK ; Those global variables are always defined, but should be 0 for Server GC g_GCShadow TEXTEQU <?g_GCShadow@@3PEAEEA> @@ -118,6 +123,19 @@ ifdef WRITE_BARRIER_CHECK DoneShadow: endif +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + ; Update the write watch table if necessary + cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h + je CheckCardTable + mov r10, rcx + shr r10, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift + add r10, qword ptr [g_sw_ww_table] + cmp byte ptr [r10], 0h + jne CheckCardTable + mov byte ptr [r10], 0FFh +endif + + CheckCardTable: ; See if we can just quick out cmp rax, [g_ephemeral_low] jb Exit diff --git a/src/vm/amd64/jithelpers_fast.S b/src/vm/amd64/jithelpers_fast.S index 22f21bb8de..a0650759f6 100644 --- a/src/vm/amd64/jithelpers_fast.S +++ b/src/vm/amd64/jithelpers_fast.S @@ -10,6 +10,45 @@ LEAF_ENTRY JIT_PatchedCodeStart, _TEXT ret LEAF_END JIT_PatchedCodeStart, _TEXT + +// There is an even more optimized version of these helpers possible which takes +// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2 +// that check (this is more significant in the JIT_WriteBarrier case). +// +// Additionally we can look into providing helpers which will take the src/dest from +// specific registers (like x86) which _could_ (??) make for easier register allocation +// for the JIT64, however it might lead to having to have some nasty code that treats +// these guys really special like... :(. +// +// Version that does the move, checks whether or not it's in the GC and whether or not +// it needs to have it's card updated +// +// void JIT_CheckedWriteBarrier(Object** dst, Object* src) +LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT + + // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference + // but if it isn't then it will just return. + // + // See if this is in GCHeap + PREPARE_EXTERNAL_VAR g_lowest_address, rax + cmp rdi, [rax] + // jb NotInHeap + .byte 0x72, 0x0e + PREPARE_EXTERNAL_VAR g_highest_address, rax + cmp rdi, [rax] + // jnb NotInHeap + .byte 0x73, 0x02 + + // call C_FUNC(JIT_WriteBarrier) + .byte 0xeb, 0x05 + + NotInHeap: + // See comment above about possible AV + mov [rdi], rsi + ret +LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT + + // This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow // or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_ // change at runtime as the GC changes. Initially it should simply be a copy of the @@ -22,6 +61,71 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT jmp C_FUNC(JIT_WriteBarrier_Debug) #endif +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // JIT_WriteBarrier_WriteWatch_PostGrow64 + + // Regarding patchable constants: + // - 64-bit constants have to be loaded into a register + // - The constants have to be aligned to 8 bytes so that they can be patched easily + // - The constant loads have been located to minimize NOP padding required to align the constants + // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special + // non-volatile calling convention, this should be changed to use just one register. + + // Do the move into the GC . It is correct to take an AV here, the EH code + // figures out that this came from a WriteBarrier and correctly maps it back + // to the managed method which called the WriteBarrier (see setup in + // InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rdi], rsi + + // Update the write watch table if necessary + mov rax, rdi + movabs r10, 0xF0F0F0F0F0F0F0F0 + shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift + NOP_2_BYTE // padding for alignment of constant + movabs r11, 0xF0F0F0F0F0F0F0F0 + add rax, r10 + cmp byte ptr [rax], 0h + .byte 0x75, 0x06 + // jne CheckCardTable + mov byte ptr [rax], 0FFh + + NOP_3_BYTE // padding for alignment of constant + + // Check the lower and upper ephemeral region bounds + CheckCardTable: + cmp rsi, r11 + .byte 0x72,0x3D + // jb Exit + + NOP_3_BYTE // padding for alignment of constant + + movabs r10, 0xF0F0F0F0F0F0F0F0 + + cmp rsi, r10 + .byte 0x73,0x2B + // jae Exit + + nop // padding for alignment of constant + + movabs rax, 0xF0F0F0F0F0F0F0F0 + + // Touch the card table entry, if not already dirty. + shr rdi, 0Bh + cmp byte ptr [rdi + rax], 0FFh + .byte 0x75, 0x02 + // jne UpdateCardTable + REPRET + + UpdateCardTable: + mov byte ptr [rdi + rax], 0FFh + ret + + .balign 16 + Exit: + REPRET +#else + // JIT_WriteBarrier_PostGrow64 + // Do the move into the GC . It is correct to take an AV here, the EH code // figures out that this came from a WriteBarrier and correctly maps it back // to the managed method which called the WriteBarrier (see setup in @@ -69,6 +173,8 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT .balign 16 Exit: REPRET +#endif + // make sure this guy is bigger than any of the other guys .balign 16 nop @@ -79,43 +185,6 @@ LEAF_ENTRY JIT_PatchedCodeLast, _TEXT ret LEAF_END JIT_PatchedCodeLast, _TEXT -// There is an even more optimized version of these helpers possible which takes -// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2 -// that check (this is more significant in the JIT_WriteBarrier case). -// -// Additionally we can look into providing helpers which will take the src/dest from -// specific registers (like x86) which _could_ (??) make for easier register allocation -// for the JIT64, however it might lead to having to have some nasty code that treats -// these guys really special like... :(. -// -// Version that does the move, checks whether or not it's in the GC and whether or not -// it needs to have it's card updated -// -// void JIT_CheckedWriteBarrier(Object** dst, Object* src) -LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT - - // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference - // but if it isn't then it will just return. - // - // See if this is in GCHeap - PREPARE_EXTERNAL_VAR g_lowest_address, rax - cmp rdi, [rax] - // jb NotInHeap - .byte 0x72, 0x0e - PREPARE_EXTERNAL_VAR g_highest_address, rax - cmp rdi, [rax] - // jnb NotInHeap - .byte 0x73, 0x02 - - // call C_FUNC(JIT_WriteBarrier) - .byte 0xeb, 0x84 - - NotInHeap: - // See comment above about possible AV - mov [rdi], rsi - ret -LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT - // JIT_ByRefWriteBarrier has weird symantics, see usage in StubLinkerX86.cpp // // Entry: @@ -128,7 +197,7 @@ LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT // // RCX is trashed // RAX is trashed -// R10 is trashed on Debug build +// R10 is trashed // R11 is trashed on Debug build // Exit: // RDI, RSI are incremented by SIZEOF(LPVOID) @@ -202,6 +271,21 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT DoneShadow_ByRefWriteBarrier: #endif +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // Update the write watch table if necessary + PREPARE_EXTERNAL_VAR g_sw_ww_enabled_for_gc_heap, rax + cmp byte ptr [rax], 0h + je CheckCardTable_ByRefWriteBarrier + mov rax, rdi + shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift + PREPARE_EXTERNAL_VAR g_sw_ww_table, r10 + add rax, qword ptr [r10] + cmp byte ptr [rax], 0h + jne CheckCardTable_ByRefWriteBarrier + mov byte ptr [rax], 0FFh +#endif + + CheckCardTable_ByRefWriteBarrier: // See if we can just quick out PREPARE_EXTERNAL_VAR g_ephemeral_low, rax cmp rcx, [rax] diff --git a/src/vm/amd64/jithelpers_fastwritebarriers.S b/src/vm/amd64/jithelpers_fastwritebarriers.S index f8d41cb88d..085f85bc8b 100644 --- a/src/vm/amd64/jithelpers_fastwritebarriers.S +++ b/src/vm/amd64/jithelpers_fastwritebarriers.S @@ -5,39 +5,6 @@ .intel_syntax noprefix #include "unixasmmacros.inc" - .balign 4 -LEAF_ENTRY JIT_WriteBarrier_PreGrow32, _TEXT - // Do the move into the GC . It is correct to take an AV here, the EH code - // figures out that this came from a WriteBarrier and correctly maps it back - // to the managed method which called the WriteBarrier (see setup in - // InitializeExceptionHandling, vm\exceptionhandling.cpp). - mov [rdi], rsi - - NOP_2_BYTE // padding for alignment of constant - -PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_Lower - cmp rsi, -0F0F0F10h // 0F0F0F0F0h - .byte 0x72, 0x22 - // jb Exit_PreGrow32 - - shr rdi, 0Bh -PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Check - cmp byte ptr [rdi + 0F0F0F0F0h], 0FFh - .byte 0x75, 0x03 - // jne UpdateCardTable_PreGrow32 - REPRET - - nop // padding for alignment of constant - -PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Update - UpdateCardTable_PreGrow32: - mov byte ptr [rdi + 0F0F0F0F0h], 0FFh - ret - - .balign 16 - Exit_PreGrow32: - REPRET -LEAF_END_MARKED JIT_WriteBarrier_PreGrow32, _TEXT .balign 8 LEAF_ENTRY JIT_WriteBarrier_PreGrow64, _TEXT @@ -80,6 +47,7 @@ PATCH_LABEL JIT_WriteBarrier_PreGrow64_Patch_Label_CardTable REPRET LEAF_END_MARKED JIT_WriteBarrier_PreGrow64, _TEXT + .balign 8 // See comments for JIT_WriteBarrier_PreGrow (above). LEAF_ENTRY JIT_WriteBarrier_PostGrow64, _TEXT @@ -134,60 +102,109 @@ PATCH_LABEL JIT_WriteBarrier_PostGrow64_Patch_Label_CardTable REPRET LEAF_END_MARKED JIT_WriteBarrier_PostGrow64, _TEXT - .balign 4 -LEAF_ENTRY JIT_WriteBarrier_PostGrow32, _TEXT + +#ifdef FEATURE_SVR_GC + + .balign 8 +LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT + // + // SVR GC has multiple heaps, so it cannot provide one single + // ephemeral region to bounds check against, so we just skip the + // bounds checking all together and do our card table update + // unconditionally. + // + // Do the move into the GC . It is correct to take an AV here, the EH code // figures out that this came from a WriteBarrier and correctly maps it back // to the managed method which called the WriteBarrier (see setup in // InitializeExceptionHandling, vm\exceptionhandling.cpp). mov [rdi], rsi - NOP_2_BYTE // padding for alignment of constant + NOP_3_BYTE // padding for alignment of constant - // Check the lower and upper ephemeral region bounds +PATCH_LABEL JIT_WriteBarrier_SVR64_PatchLabel_CardTable + movabs rax, 0xF0F0F0F0F0F0F0F0 -PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_Lower - cmp rsi, -0F0F0F10h // 0F0F0F0F0h - .byte 0x72, 0x2e - // jb Exit_PostGrow32 + shr rdi, 0Bh - NOP_3_BYTE // padding for alignment of constant + cmp byte ptr [rdi + rax], 0FFh + .byte 0x75, 0x02 + // jne UpdateCardTable_SVR64 + REPRET -PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_Upper - cmp rsi, -0F0F0F10h // 0F0F0F0F0h - .byte 0x73, 0x22 - // jae Exit_PostGrow32 + UpdateCardTable_SVR64: + mov byte ptr [rdi + rax], 0FFh + ret +LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT - // Touch the card table entry, if not already dirty. - shr rdi, 0Bh +#endif + + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + + .balign 8 +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT + // Regarding patchable constants: + // - 64-bit constants have to be loaded into a register + // - The constants have to be aligned to 8 bytes so that they can be patched easily + // - The constant loads have been located to minimize NOP padding required to align the constants + // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special + // non-volatile calling convention, this should be changed to use just one register. + + // Do the move into the GC . It is correct to take an AV here, the EH code + // figures out that this came from a WriteBarrier and correctly maps it back + // to the managed method which called the WriteBarrier (see setup in + // InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rdi], rsi -PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_CheckCardTable - cmp byte ptr [rdi + 0F0F0F0F0h], 0FFh + // Update the write watch table if necessary + mov rax, rdi +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_WriteWatchTable + movabs r10, 0xF0F0F0F0F0F0F0F0 + shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift + NOP_2_BYTE // padding for alignment of constant +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_Lower + movabs r11, 0xF0F0F0F0F0F0F0F0 + add rax, r10 + cmp byte ptr [rax], 0h .byte 0x75, 0x03 - // jne UpdateCardTable_PostGrow32 - REPRET + // jne CheckCardTable_WriteWatch_PreGrow64 + mov byte ptr [rax], 0FFh - nop // padding for alignment of constant + CheckCardTable_WriteWatch_PreGrow64: + // Check the lower ephemeral region bound. + cmp rsi, r11 + .byte 0x72, 0x20 + // jb Exit_WriteWatch_PreGrow64 -PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_UpdateCardTable - UpdateCardTable_PostGrow32: - mov byte ptr [rdi + 0F0F0F0F0h], 0FFh + // Touch the card table entry, if not already dirty. + shr rdi, 0Bh + NOP_2_BYTE // padding for alignment of constant +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardTable + movabs rax, 0xF0F0F0F0F0F0F0F0 + cmp byte ptr [rdi + rax], 0FFh + .byte 0x75, 0x02 + // jne UpdateCardTable_WriteWatch_PreGrow64 + REPRET + + UpdateCardTable_WriteWatch_PreGrow64: + mov byte ptr [rdi + rax], 0FFh ret .balign 16 - Exit_PostGrow32: + Exit_WriteWatch_PreGrow64: REPRET -LEAF_END_MARKED JIT_WriteBarrier_PostGrow32, _TEXT +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT - .balign 4 -LEAF_ENTRY JIT_WriteBarrier_SVR32, _TEXT - // - // SVR GC has multiple heaps, so it cannot provide one single - // ephemeral region to bounds check against, so we just skip the - // bounds checking all together and do our card table update - // unconditionally. - // + .balign 8 +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT + // Regarding patchable constants: + // - 64-bit constants have to be loaded into a register + // - The constants have to be aligned to 8 bytes so that they can be patched easily + // - The constant loads have been located to minimize NOP padding required to align the constants + // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special + // non-volatile calling convention, this should be changed to use just one register. // Do the move into the GC . It is correct to take an AV here, the EH code // figures out that this came from a WriteBarrier and correctly maps it back @@ -195,26 +212,70 @@ LEAF_ENTRY JIT_WriteBarrier_SVR32, _TEXT // InitializeExceptionHandling, vm\exceptionhandling.cpp). mov [rdi], rsi - shr rdi, 0Bh + // Update the write watch table if necessary + mov rax, rdi +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_WriteWatchTable + movabs r10, 0xF0F0F0F0F0F0F0F0 + shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift + NOP_2_BYTE // padding for alignment of constant +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Lower + movabs r11, 0xF0F0F0F0F0F0F0F0 + add rax, r10 + cmp byte ptr [rax], 0h + .byte 0x75, 0x06 + // jne CheckCardTable_WriteWatch_PostGrow64 + mov byte ptr [rax], 0FFh NOP_3_BYTE // padding for alignment of constant -PATCH_LABEL JIT_WriteBarrier_SVR32_PatchLabel_CheckCardTable - cmp byte ptr [rdi + 0F0F0F0F0h], 0FFh - .byte 0x75, 0x03 - // jne UpdateCardTable_SVR32 - REPRET + // Check the lower and upper ephemeral region bounds + CheckCardTable_WriteWatch_PostGrow64: + cmp rsi, r11 + .byte 0x72, 0x3d + // jb Exit_WriteWatch_PostGrow64 + + NOP_3_BYTE // padding for alignment of constant + +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Upper + movabs r10, 0xF0F0F0F0F0F0F0F0 + + cmp rsi, r10 + .byte 0x73, 0x2b + // jae Exit_WriteWatch_PostGrow64 nop // padding for alignment of constant -PATCH_LABEL JIT_WriteBarrier_SVR32_PatchLabel_UpdateCardTable - UpdateCardTable_SVR32: - mov byte ptr [rdi + 0F0F0F0F0h], 0FFh +PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardTable + movabs rax, 0xF0F0F0F0F0F0F0F0 + + // Touch the card table entry, if not already dirty. + shr rdi, 0Bh + cmp byte ptr [rdi + rax], 0FFh + .byte 0x75, 0x02 + // jne UpdateCardTable_WriteWatch_PostGrow64 + REPRET + + UpdateCardTable_WriteWatch_PostGrow64: + mov byte ptr [rdi + rax], 0FFh ret -LEAF_END_MARKED JIT_WriteBarrier_SVR32, _TEXT + + .balign 16 + Exit_WriteWatch_PostGrow64: + REPRET +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT + + +#ifdef FEATURE_SVR_GC .balign 8 -LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_SVR64, _TEXT + // Regarding patchable constants: + // - 64-bit constants have to be loaded into a register + // - The constants have to be aligned to 8 bytes so that they can be patched easily + // - The constant loads have been located to minimize NOP padding required to align the constants + // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special + // non-volatile calling convention, this should be changed to use just one register. + // // SVR GC has multiple heaps, so it cannot provide one single // ephemeral region to bounds check against, so we just skip the @@ -228,19 +289,31 @@ LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT // InitializeExceptionHandling, vm\exceptionhandling.cpp). mov [rdi], rsi - NOP_3_BYTE // padding for alignment of constant - -PATCH_LABEL JIT_WriteBarrier_SVR64_PatchLabel_CardTable - movabs rax, 0xF0F0F0F0F0F0F0F0 + // Update the write watch table if necessary + mov rax, rdi +PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_WriteWatchTable + movabs r10, 0xF0F0F0F0F0F0F0F0 + shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift + NOP_2_BYTE // padding for alignment of constant +PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardTable + movabs r11, 0xF0F0F0F0F0F0F0F0 + add rax, r10 + cmp byte ptr [rax], 0h + .byte 0x75, 0x03 + // jne CheckCardTable_WriteWatch_SVR64 + mov byte ptr [rax], 0FFh + CheckCardTable_WriteWatch_SVR64: shr rdi, 0Bh - - cmp byte ptr [rdi + rax], 0FFh + cmp byte ptr [rdi + r11], 0FFh .byte 0x75, 0x02 - // jne UpdateCardTable_SVR64 + // jne UpdateCardTable_WriteWatch_SVR64 REPRET - UpdateCardTable_SVR64: - mov byte ptr [rdi + rax], 0FFh + UpdateCardTable_WriteWatch_SVR64: + mov byte ptr [rdi + r11], 0FFh ret -LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_SVR64, _TEXT + +#endif +#endif diff --git a/src/vm/amd64/jithelpers_slow.S b/src/vm/amd64/jithelpers_slow.S index 4d18e4356c..6c8d9077b8 100644 --- a/src/vm/amd64/jithelpers_slow.S +++ b/src/vm/amd64/jithelpers_slow.S @@ -68,6 +68,21 @@ LEAF_ENTRY JIT_WriteBarrier_Debug, _TEXT DoneShadow: #endif +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // Update the write watch table if necessary + PREPARE_EXTERNAL_VAR g_sw_ww_enabled_for_gc_heap, r10 + cmp byte ptr [r10], 0h + je CheckCardTable_Debug + mov r10, rdi + shr r10, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift + PREPARE_EXTERNAL_VAR g_sw_ww_table, r11 + add r10, qword ptr [r11] + cmp byte ptr [r10], 0h + jne CheckCardTable_Debug + mov byte ptr [r10], 0FFh +#endif + + CheckCardTable_Debug: // See if we can just quick out PREPARE_EXTERNAL_VAR g_ephemeral_low, r10 cmp rax, [r10] diff --git a/src/vm/amd64/jitinterfaceamd64.cpp b/src/vm/amd64/jitinterfaceamd64.cpp index cfcca1d372..39c2e05c2f 100644 --- a/src/vm/amd64/jitinterfaceamd64.cpp +++ b/src/vm/amd64/jitinterfaceamd64.cpp @@ -16,6 +16,7 @@ #include "eeconfig.h" #include "excep.h" #include "threadsuspend.h" +#include "../../gc/softwarewritewatch.h" extern uint8_t* g_ephemeral_low; extern uint8_t* g_ephemeral_high; @@ -24,24 +25,11 @@ extern uint32_t* g_card_table; // Patch Labels for the various write barriers EXTERN_C void JIT_WriteBarrier_End(); -EXTERN_C void JIT_WriteBarrier_PreGrow32(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_PreGrow32_PatchLabel_Lower(); -EXTERN_C void JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Check(); -EXTERN_C void JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Update(); -EXTERN_C void JIT_WriteBarrier_PreGrow32_End(); - EXTERN_C void JIT_WriteBarrier_PreGrow64(Object **dst, Object *ref); EXTERN_C void JIT_WriteBarrier_PreGrow64_Patch_Label_Lower(); EXTERN_C void JIT_WriteBarrier_PreGrow64_Patch_Label_CardTable(); EXTERN_C void JIT_WriteBarrier_PreGrow64_End(); -EXTERN_C void JIT_WriteBarrier_PostGrow32(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_PostGrow32_PatchLabel_Lower(); -EXTERN_C void JIT_WriteBarrier_PostGrow32_PatchLabel_Upper(); -EXTERN_C void JIT_WriteBarrier_PostGrow32_PatchLabel_CheckCardTable(); -EXTERN_C void JIT_WriteBarrier_PostGrow32_PatchLabel_UpdateCardTable(); -EXTERN_C void JIT_WriteBarrier_PostGrow32_End(); - EXTERN_C void JIT_WriteBarrier_PostGrow64(Object **dst, Object *ref); EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_Lower(); EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_Upper(); @@ -49,15 +37,32 @@ EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_CardTable(); EXTERN_C void JIT_WriteBarrier_PostGrow64_End(); #ifdef FEATURE_SVR_GC -EXTERN_C void JIT_WriteBarrier_SVR32(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_SVR32_PatchLabel_CheckCardTable(); -EXTERN_C void JIT_WriteBarrier_SVR32_PatchLabel_UpdateCardTable(); -EXTERN_C void JIT_WriteBarrier_SVR32_End(); - EXTERN_C void JIT_WriteBarrier_SVR64(Object **dst, Object *ref); EXTERN_C void JIT_WriteBarrier_SVR64_PatchLabel_CardTable(); EXTERN_C void JIT_WriteBarrier_SVR64_End(); -#endif +#endif // FEATURE_SVR_GC + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_WriteWatchTable(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_Lower(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardTable(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_End(); + +EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_WriteWatchTable(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Lower(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Upper(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardTable(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_End(); + +#ifdef FEATURE_SVR_GC +EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_WriteWatchTable(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardTable(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_End(); +#endif // FEATURE_SVR_GC +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP WriteBarrierManager g_WriteBarrierManager; @@ -90,28 +95,13 @@ void WriteBarrierManager::Validate() // are places where these values are updated while the EE is running // NOTE: we can't call this from the ctor since our infrastructure isn't ready for assert dialogs - PBYTE pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_Lower, 3); - PBYTE pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_CardTable_Check, 2); - PBYTE pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_CardTable_Update, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x3) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x3) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate2) & 0x3) == 0); + PBYTE pLowerBoundImmediate, pUpperBoundImmediate, pCardTableImmediate; pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_Lower, 2); pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardTable, 2); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x7) == 0); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0); - PBYTE pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_Upper, 3); - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_Lower, 3); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_CheckCardTable, 2); - pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_UpdateCardTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pUpperBoundImmediate) & 0x3) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x3) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x3) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate2) & 0x3) == 0); - - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Lower, 2); pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Upper, 2); pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardTable, 2); @@ -120,14 +110,36 @@ void WriteBarrierManager::Validate() _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0); #ifdef FEATURE_SVR_GC - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR32, PatchLabel_CheckCardTable, 2); - pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR32, PatchLabel_UpdateCardTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x3) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate2) & 0x3) == 0); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0); -#endif +#endif // FEATURE_SVR_GC + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + PBYTE pWriteWatchTableImmediate; + + pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_WriteWatchTable, 2); + pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_Lower, 2); + pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_CardTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pWriteWatchTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0); + + pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_WriteWatchTable, 2); + pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Lower, 2); + pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Upper, 2); + pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_CardTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pWriteWatchTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pUpperBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0); + +#ifdef FEATURE_SVR_GC + pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_WriteWatchTable, 2); + pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_CardTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pWriteWatchTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0); +#endif // FEATURE_SVR_GC +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP } #endif // CODECOVERAGE @@ -139,20 +151,24 @@ PCODE WriteBarrierManager::GetCurrentWriteBarrierCode() switch (m_currentWriteBarrier) { - case WRITE_BARRIER_PREGROW32: - return GetEEFuncEntryPoint(JIT_WriteBarrier_PreGrow32); case WRITE_BARRIER_PREGROW64: return GetEEFuncEntryPoint(JIT_WriteBarrier_PreGrow64); - case WRITE_BARRIER_POSTGROW32: - return GetEEFuncEntryPoint(JIT_WriteBarrier_PostGrow32); case WRITE_BARRIER_POSTGROW64: return GetEEFuncEntryPoint(JIT_WriteBarrier_PostGrow64); #ifdef FEATURE_SVR_GC - case WRITE_BARRIER_SVR32: - return GetEEFuncEntryPoint(JIT_WriteBarrier_SVR32); case WRITE_BARRIER_SVR64: return GetEEFuncEntryPoint(JIT_WriteBarrier_SVR64); -#endif +#endif // FEATURE_SVR_GC +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + case WRITE_BARRIER_WRITE_WATCH_PREGROW64: + return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_PreGrow64); + case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: + return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_PostGrow64); +#ifdef FEATURE_SVR_GC + case WRITE_BARRIER_WRITE_WATCH_SVR64: + return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_SVR64); +#endif // FEATURE_SVR_GC +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP default: UNREACHABLE_MSG("unexpected m_currentWriteBarrier!"); }; @@ -167,20 +183,24 @@ size_t WriteBarrierManager::GetSpecificWriteBarrierSize(WriteBarrierType writeBa switch (writeBarrier) { - case WRITE_BARRIER_PREGROW32: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_PreGrow32); case WRITE_BARRIER_PREGROW64: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_PreGrow64); - case WRITE_BARRIER_POSTGROW32: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_PostGrow32); case WRITE_BARRIER_POSTGROW64: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_PostGrow64); #ifdef FEATURE_SVR_GC - case WRITE_BARRIER_SVR32: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_SVR32); case WRITE_BARRIER_SVR64: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_SVR64); -#endif +#endif // FEATURE_SVR_GC +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + case WRITE_BARRIER_WRITE_WATCH_PREGROW64: + return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_PreGrow64); + case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: + return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_PostGrow64); +#ifdef FEATURE_SVR_GC + case WRITE_BARRIER_WRITE_WATCH_SVR64: + return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_SVR64); +#endif // FEATURE_SVR_GC +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP case WRITE_BARRIER_BUFFER: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier); default: @@ -202,38 +222,25 @@ PBYTE WriteBarrierManager::CalculatePatchLocation(LPVOID base, LPVOID label, int return ((LPBYTE)GetEEFuncEntryPoint(JIT_WriteBarrier) + ((LPBYTE)GetEEFuncEntryPoint(label) - (LPBYTE)GetEEFuncEntryPoint(base) + offset)); } -void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier) -{ - GCX_MAYBE_COOP_NO_THREAD_BROKEN((GetThread() != NULL)); - BOOL bEESuspended = FALSE; - if(m_currentWriteBarrier != WRITE_BARRIER_UNINITIALIZED && !IsGCThread()) +void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, bool isRuntimeSuspended) +{ + GCX_MAYBE_COOP_NO_THREAD_BROKEN((!isRuntimeSuspended && GetThread() != NULL)); + BOOL bEESuspendedHere = FALSE; + if(!isRuntimeSuspended && m_currentWriteBarrier != WRITE_BARRIER_UNINITIALIZED) { ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_FOR_GC_PREP); - bEESuspended = TRUE; - } - + bEESuspendedHere = TRUE; + } + _ASSERTE(m_currentWriteBarrier != newWriteBarrier); m_currentWriteBarrier = newWriteBarrier; - + // the memcpy must come before the switch statment because the asserts inside the switch // are actually looking into the JIT_WriteBarrier buffer memcpy((PVOID)JIT_WriteBarrier, (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize()); switch (newWriteBarrier) { - case WRITE_BARRIER_PREGROW32: - { - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_Lower, 3); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_CardTable_Check, 2); - m_pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_CardTable_Update, 2); - - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate2); - break; - } - case WRITE_BARRIER_PREGROW64: { m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_Lower, 2); @@ -244,22 +251,7 @@ void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier) _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); break; } - - case WRITE_BARRIER_POSTGROW32: - { - m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_Upper, 3); - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_Lower, 3); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_CheckCardTable, 2); - m_pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_UpdateCardTable, 2); - // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pUpperBoundImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate2); - break; - } - case WRITE_BARRIER_POSTGROW64: { m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Lower, 2); @@ -274,35 +266,67 @@ void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier) } #ifdef FEATURE_SVR_GC - case WRITE_BARRIER_SVR32: + case WRITE_BARRIER_SVR64: { - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR32, PatchLabel_CheckCardTable, 2); - m_pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR32, PatchLabel_UpdateCardTable, 2); + m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2); // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); + break; + } +#endif // FEATURE_SVR_GC + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + case WRITE_BARRIER_WRITE_WATCH_PREGROW64: + { + m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_WriteWatchTable, 2); + m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_Lower, 2); + m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_CardTable, 2); + + // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); break; } - case WRITE_BARRIER_SVR64: + case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: { - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2); + m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_WriteWatchTable, 2); + m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Lower, 2); + m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Upper, 2); + m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_CardTable, 2); // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - break; + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); + break; } -#endif + +#ifdef FEATURE_SVR_GC + case WRITE_BARRIER_WRITE_WATCH_SVR64: + { + m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_WriteWatchTable, 2); + m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_CardTable, 2); + + // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); + break; + } +#endif // FEATURE_SVR_GC +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP default: UNREACHABLE_MSG("unexpected write barrier type!"); } - UpdateEphemeralBounds(); - UpdateCardTableLocation(FALSE); + UpdateEphemeralBounds(true); + UpdateWriteWatchAndCardTableLocations(true, false); - if(bEESuspended) + if(bEESuspendedHere) { ThreadSuspend::RestartEE(FALSE, TRUE); } @@ -325,21 +349,25 @@ void WriteBarrierManager::Initialize() // write barrier implementations. size_t cbWriteBarrierBuffer = GetSpecificWriteBarrierSize(WRITE_BARRIER_BUFFER); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_PREGROW32)); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_PREGROW64)); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_POSTGROW32)); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_POSTGROW64)); #ifdef FEATURE_SVR_GC - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_SVR32)); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_SVR64)); -#endif +#endif // FEATURE_SVR_GC +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_PREGROW64)); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_POSTGROW64)); +#ifdef FEATURE_SVR_GC + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_SVR64)); +#endif // FEATURE_SVR_GC +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP #if !defined(CODECOVERAGE) Validate(); #endif } -bool WriteBarrierManager::NeedDifferentWriteBarrier(BOOL bReqUpperBoundsCheck, WriteBarrierType* pNewWriteBarrierType) +bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, WriteBarrierType* pNewWriteBarrierType) { // Init code for the JIT_WriteBarrier assembly routine. Since it will be bashed everytime the GC Heap // changes size, we want to do most of the work just once. @@ -362,22 +390,9 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(BOOL bReqUpperBoundsCheck, W } #endif - writeBarrierType = GCHeap::IsServerHeap() ? WRITE_BARRIER_SVR32 : WRITE_BARRIER_PREGROW32; + writeBarrierType = GCHeap::IsServerHeap() ? WRITE_BARRIER_SVR64 : WRITE_BARRIER_PREGROW64; continue; - case WRITE_BARRIER_PREGROW32: - if (bReqUpperBoundsCheck) - { - writeBarrierType = WRITE_BARRIER_POSTGROW32; - continue; - } - - if (!FitsInI4((size_t)g_card_table) || !FitsInI4((size_t)g_ephemeral_low)) - { - writeBarrierType = WRITE_BARRIER_PREGROW64; - } - break; - case WRITE_BARRIER_PREGROW64: if (bReqUpperBoundsCheck) { @@ -385,27 +400,30 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(BOOL bReqUpperBoundsCheck, W } break; - case WRITE_BARRIER_POSTGROW32: - if (!FitsInI4((size_t)g_card_table) || !FitsInI4((size_t)g_ephemeral_low) || !FitsInI4((size_t)g_ephemeral_high)) - { - writeBarrierType = WRITE_BARRIER_POSTGROW64; - } - break; - case WRITE_BARRIER_POSTGROW64: break; #ifdef FEATURE_SVR_GC - case WRITE_BARRIER_SVR32: - if (!FitsInI4((size_t)g_card_table)) + case WRITE_BARRIER_SVR64: + break; +#endif // FEATURE_SVR_GC + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + case WRITE_BARRIER_WRITE_WATCH_PREGROW64: + if (bReqUpperBoundsCheck) { - writeBarrierType = WRITE_BARRIER_SVR64; + writeBarrierType = WRITE_BARRIER_WRITE_WATCH_POSTGROW64; } break; - case WRITE_BARRIER_SVR64: + case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: break; -#endif + +#ifdef FEATURE_SVR_GC + case WRITE_BARRIER_WRITE_WATCH_SVR64: + break; +#endif // FEATURE_SVR_GC +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP default: UNREACHABLE_MSG("unexpected write barrier type!"); @@ -417,14 +435,14 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(BOOL bReqUpperBoundsCheck, W return m_currentWriteBarrier != writeBarrierType; } -void WriteBarrierManager::UpdateEphemeralBounds() +void WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended) { bool needToFlushCache = false; WriteBarrierType newType; - if (NeedDifferentWriteBarrier(FALSE, &newType)) + if (NeedDifferentWriteBarrier(false, &newType)) { - ChangeWriteBarrierTo(newType); + ChangeWriteBarrierTo(newType, isRuntimeSuspended); return; } @@ -436,31 +454,10 @@ void WriteBarrierManager::UpdateEphemeralBounds() switch (m_currentWriteBarrier) { - - case WRITE_BARRIER_POSTGROW32: - { - // Change immediate if different from new g_ephermeral_high. - if (*(INT32*)m_pUpperBoundImmediate != (INT32)(size_t)g_ephemeral_high) - { - *(INT32*)m_pUpperBoundImmediate = (INT32)(size_t)g_ephemeral_high; - needToFlushCache = true; - } - } - // - // INTENTIONAL FALL-THROUGH! - // - case WRITE_BARRIER_PREGROW32: - { - // Change immediate if different from new g_ephermeral_low. - if (*(INT32*)m_pLowerBoundImmediate != (INT32)(size_t)g_ephemeral_low) - { - *(INT32*)m_pLowerBoundImmediate = (INT32)(size_t)g_ephemeral_low; - needToFlushCache = true; - } - break; - } - case WRITE_BARRIER_POSTGROW64: +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP { // Change immediate if different from new g_ephermeral_high. if (*(UINT64*)m_pUpperBoundImmediate != (size_t)g_ephemeral_high) @@ -473,6 +470,9 @@ void WriteBarrierManager::UpdateEphemeralBounds() // INTENTIONAL FALL-THROUGH! // case WRITE_BARRIER_PREGROW64: +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + case WRITE_BARRIER_WRITE_WATCH_PREGROW64: +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP { // Change immediate if different from new g_ephermeral_low. if (*(UINT64*)m_pLowerBoundImmediate != (size_t)g_ephemeral_low) @@ -484,12 +484,14 @@ void WriteBarrierManager::UpdateEphemeralBounds() } #ifdef FEATURE_SVR_GC - case WRITE_BARRIER_SVR32: case WRITE_BARRIER_SVR64: +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + case WRITE_BARRIER_WRITE_WATCH_SVR64: +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP { break; } -#endif +#endif // FEATURE_SVR_GC default: UNREACHABLE_MSG("unexpected m_currentWriteBarrier in UpdateEphemeralBounds"); @@ -501,7 +503,7 @@ void WriteBarrierManager::UpdateEphemeralBounds() } } -void WriteBarrierManager::UpdateCardTableLocation(BOOL bReqUpperBoundsCheck) +void WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) { // If we are told that we require an upper bounds check (GC did some heap // reshuffling), we need to switch to the WriteBarrier_PostGrow function for @@ -510,7 +512,7 @@ void WriteBarrierManager::UpdateCardTableLocation(BOOL bReqUpperBoundsCheck) WriteBarrierType newType; if (NeedDifferentWriteBarrier(bReqUpperBoundsCheck, &newType)) { - ChangeWriteBarrierTo(newType); + ChangeWriteBarrierTo(newType, isRuntimeSuspended); return; } @@ -522,24 +524,30 @@ void WriteBarrierManager::UpdateCardTableLocation(BOOL bReqUpperBoundsCheck) bool fFlushCache = false; - if (m_currentWriteBarrier == WRITE_BARRIER_PREGROW32 || - m_currentWriteBarrier == WRITE_BARRIER_POSTGROW32 || - m_currentWriteBarrier == WRITE_BARRIER_SVR32) +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + switch (m_currentWriteBarrier) { - if (*(INT32*)m_pCardTableImmediate != (INT32)(size_t)g_card_table) - { - *(INT32*)m_pCardTableImmediate = (INT32)(size_t)g_card_table; - *(INT32*)m_pCardTableImmediate2 = (INT32)(size_t)g_card_table; - fFlushCache = true; - } + case WRITE_BARRIER_WRITE_WATCH_PREGROW64: + case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: +#ifdef FEATURE_SVR_GC + case WRITE_BARRIER_WRITE_WATCH_SVR64: +#endif // FEATURE_SVR_GC + if (*(UINT64*)m_pWriteWatchTableImmediate != (size_t)SoftwareWriteWatch::GetTable()) + { + *(UINT64*)m_pWriteWatchTableImmediate = (size_t)SoftwareWriteWatch::GetTable(); + fFlushCache = true; + } + break; + + default: + break; // clang seems to require all enum values to be covered for some reason } - else +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + + if (*(UINT64*)m_pCardTableImmediate != (size_t)g_card_table) { - if (*(UINT64*)m_pCardTableImmediate != (size_t)g_card_table) - { - *(UINT64*)m_pCardTableImmediate = (size_t)g_card_table; - fFlushCache = true; - } + *(UINT64*)m_pCardTableImmediate = (size_t)g_card_table; + fFlushCache = true; } if (fFlushCache) @@ -548,23 +556,100 @@ void WriteBarrierManager::UpdateCardTableLocation(BOOL bReqUpperBoundsCheck) } } +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +void WriteBarrierManager::SwitchToWriteWatchBarrier(bool isRuntimeSuspended) +{ + WriteBarrierType newWriteBarrierType; + switch (m_currentWriteBarrier) + { + case WRITE_BARRIER_UNINITIALIZED: + // Using the debug-only write barrier + return; + + case WRITE_BARRIER_PREGROW64: + newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_PREGROW64; + break; + + case WRITE_BARRIER_POSTGROW64: + newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_POSTGROW64; + break; + +#ifdef FEATURE_SVR_GC + case WRITE_BARRIER_SVR64: + newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_SVR64; + break; +#endif // FEATURE_SVR_GC + + default: + UNREACHABLE(); + } + + ChangeWriteBarrierTo(newWriteBarrierType, isRuntimeSuspended); +} + +void WriteBarrierManager::SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended) +{ + WriteBarrierType newWriteBarrierType; + switch (m_currentWriteBarrier) + { + case WRITE_BARRIER_UNINITIALIZED: + // Using the debug-only write barrier + return; + + case WRITE_BARRIER_WRITE_WATCH_PREGROW64: + newWriteBarrierType = WRITE_BARRIER_PREGROW64; + break; + + case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: + newWriteBarrierType = WRITE_BARRIER_POSTGROW64; + break; + +#ifdef FEATURE_SVR_GC + case WRITE_BARRIER_WRITE_WATCH_SVR64: + newWriteBarrierType = WRITE_BARRIER_SVR64; + break; +#endif // FEATURE_SVR_GC + + default: + UNREACHABLE(); + } + + ChangeWriteBarrierTo(newWriteBarrierType, isRuntimeSuspended); +} +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP // This function bashes the super fast amd64 version of the JIT_WriteBarrier // helper. It should be called by the GC whenever the ephermeral region // bounds get changed, but still remain on the top of the GC Heap. -void StompWriteBarrierEphemeral() +void StompWriteBarrierEphemeral(bool isRuntimeSuspended) { WRAPPER_NO_CONTRACT; - g_WriteBarrierManager.UpdateEphemeralBounds(); + g_WriteBarrierManager.UpdateEphemeralBounds(isRuntimeSuspended); } // This function bashes the super fast amd64 versions of the JIT_WriteBarrier // helpers. It should be called by the GC whenever the ephermeral region gets moved // from being at the top of the GC Heap, and/or when the cards table gets moved. -void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck) +void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) +{ + WRAPPER_NO_CONTRACT; + + g_WriteBarrierManager.UpdateWriteWatchAndCardTableLocations(isRuntimeSuspended, bReqUpperBoundsCheck); +} + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +void SwitchToWriteWatchBarrier(bool isRuntimeSuspended) +{ + WRAPPER_NO_CONTRACT; + + g_WriteBarrierManager.SwitchToWriteWatchBarrier(isRuntimeSuspended); +} + +void SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended) { WRAPPER_NO_CONTRACT; - g_WriteBarrierManager.UpdateCardTableLocation(bReqUpperBoundsCheck); + g_WriteBarrierManager.SwitchToNonWriteWatchBarrier(isRuntimeSuspended); } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP diff --git a/src/vm/arm/stubs.cpp b/src/vm/arm/stubs.cpp index cd58f294c2..2f1377fe8a 100644 --- a/src/vm/arm/stubs.cpp +++ b/src/vm/arm/stubs.cpp @@ -378,7 +378,7 @@ void ValidateWriteBarriers() // Update the instructions in our various write barrier implementations that refer directly to the values // of GC globals such as g_lowest_address and g_card_table. We don't particularly care which values have // changed on each of these callbacks, it's pretty cheap to refresh them all. -void UpdateGCWriteBarriers(BOOL postGrow = false) +void UpdateGCWriteBarriers(bool postGrow = false) { // Define a helper macro that abstracts the minutia of patching the instructions to access the value of a // particular GC global. @@ -455,7 +455,7 @@ void UpdateGCWriteBarriers(BOOL postGrow = false) FlushInstructionCache(GetCurrentProcess(), pbAlteredRange, cbAlteredRange); } -void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck) +void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) { // The runtime is not always suspended when this is called (unlike StompWriteBarrierEphemeral) but we have // no way to update the barrier code atomically on ARM since each 32-bit value we change is loaded over @@ -469,7 +469,7 @@ void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck) GCStressPolicy::InhibitHolder iholder; bool fSuspended = false; - if (!g_fEEInit && !GCHeap::IsGCInProgress()) + if (!isRuntimeSuspended) { ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_OTHER); fSuspended = true; @@ -481,9 +481,10 @@ void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck) ThreadSuspend::RestartEE(FALSE, TRUE); } -void StompWriteBarrierEphemeral(void) +void StompWriteBarrierEphemeral(bool isRuntimeSuspended) { - _ASSERTE(GCHeap::IsGCInProgress() || g_fEEInit); + UNREFERENCED_PARAMETER(isRuntimeSuspended); + _ASSERTE(isRuntimeSuspended); UpdateGCWriteBarriers(); } #endif // CROSSGEN_COMPILE diff --git a/src/vm/arm64/stubs.cpp b/src/vm/arm64/stubs.cpp index 943adb9765..bd71784ad3 100644 --- a/src/vm/arm64/stubs.cpp +++ b/src/vm/arm64/stubs.cpp @@ -1180,13 +1180,13 @@ extern "C" void getFPReturn(int fpSize, INT64 *retval) _ASSERTE(!"ARM64:NYI"); } -void StompWriteBarrierEphemeral() +void StompWriteBarrierEphemeral(bool isRuntimeSuspended) { //ARM64TODO: implement this return; } -void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck) +void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) { //ARM64TODO: implement this return; diff --git a/src/vm/gcenv.h b/src/vm/gcenv.h index 26c2099e75..08dcc711ae 100644 --- a/src/vm/gcenv.h +++ b/src/vm/gcenv.h @@ -53,6 +53,8 @@ #include "gcenv.interlocked.h" #include "gcenv.interlocked.inl" +#include "../gc/softwarewritewatch.h" + namespace ETW { typedef enum _GC_ROOT_KIND { diff --git a/src/vm/gchelpers.cpp b/src/vm/gchelpers.cpp index a894797627..bf81847716 100644 --- a/src/vm/gchelpers.cpp +++ b/src/vm/gchelpers.cpp @@ -35,6 +35,7 @@ #endif // FEATURE_COMINTEROP #include "rcwwalker.h" +#include "../gc/softwarewritewatch.h" //======================================================================== // @@ -1182,6 +1183,13 @@ extern "C" HCIMPL2_RAW(VOID, JIT_CheckedWriteBarrier, Object **dst, Object *ref) updateGCShadow(dst, ref); // support debugging write barrier #endif +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (SoftwareWriteWatch::IsEnabledForGCHeap()) + { + SoftwareWriteWatch::SetDirty(dst, sizeof(*dst)); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + #ifdef FEATURE_COUNT_GC_WRITE_BARRIERS if((BYTE*) dst >= g_ephemeral_low && (BYTE*) dst < g_ephemeral_high) { @@ -1232,6 +1240,13 @@ extern "C" HCIMPL2_RAW(VOID, JIT_WriteBarrier, Object **dst, Object *ref) updateGCShadow(dst, ref); // support debugging write barrier #endif +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (SoftwareWriteWatch::IsEnabledForGCHeap()) + { + SoftwareWriteWatch::SetDirty(dst, sizeof(*dst)); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + #ifdef FEATURE_COUNT_GC_WRITE_BARRIERS if((BYTE*) dst >= g_ephemeral_low && (BYTE*) dst < g_ephemeral_high) { @@ -1292,7 +1307,14 @@ void ErectWriteBarrier(OBJECTREF *dst, OBJECTREF ref) #ifdef WRITE_BARRIER_CHECK updateGCShadow((Object**) dst, OBJECTREFToObject(ref)); // support debugging write barrier #endif - + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (SoftwareWriteWatch::IsEnabledForGCHeap()) + { + SoftwareWriteWatch::SetDirty(dst, sizeof(*dst)); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if((BYTE*) OBJECTREFToObject(ref) >= g_ephemeral_low && (BYTE*) OBJECTREFToObject(ref) < g_ephemeral_high) { // VolatileLoadWithoutBarrier() is used here to prevent fetch of g_card_table from being reordered @@ -1319,6 +1341,13 @@ void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref) if (ref->Collectible()) { +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (SoftwareWriteWatch::IsEnabledForGCHeap()) + { + SoftwareWriteWatch::SetDirty(dst, sizeof(*dst)); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + BYTE *refObject = *(BYTE **)((MethodTable*)ref)->GetLoaderAllocatorObjectHandle(); if((BYTE*) refObject >= g_ephemeral_low && (BYTE*) refObject < g_ephemeral_high) { diff --git a/src/vm/gchelpers.h b/src/vm/gchelpers.h index a461f933f9..f5590beebe 100644 --- a/src/vm/gchelpers.h +++ b/src/vm/gchelpers.h @@ -107,8 +107,8 @@ OBJECTREF AllocateObject(MethodTable *pMT #endif ); -extern void StompWriteBarrierEphemeral(); -extern void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck); +extern void StompWriteBarrierEphemeral(bool isRuntimeSuspended); +extern void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck); extern void ThrowOutOfMemoryDimensionsExceeded(); diff --git a/src/vm/i386/jitinterfacex86.cpp b/src/vm/i386/jitinterfacex86.cpp index 4834e63db9..ff0ea93a3e 100644 --- a/src/vm/i386/jitinterfacex86.cpp +++ b/src/vm/i386/jitinterfacex86.cpp @@ -1727,7 +1727,7 @@ void ValidateWriteBarrierHelpers() // When a GC happens, the upper and lower bounds of the ephemeral // generation change. This routine updates the WriteBarrier thunks // with the new values. -void StompWriteBarrierEphemeral() +void StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */) { CONTRACTL { NOTHROW; @@ -1785,7 +1785,7 @@ void StompWriteBarrierEphemeral() // to the PostGrow thunk that checks both upper and lower bounds. // regardless we need to update the thunk with the // card_table - lowest_address. -void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck) +void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) { CONTRACTL { NOTHROW; @@ -1801,7 +1801,7 @@ void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck) bool bWriteBarrierIsPreGrow = WriteBarrierIsPreGrow(); bool bStompWriteBarrierEphemeral = false; - BOOL bEESuspended = FALSE; + BOOL bEESuspendedHere = FALSE; for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++) { @@ -1817,9 +1817,9 @@ void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck) if (bReqUpperBoundsCheck) { GCX_MAYBE_COOP_NO_THREAD_BROKEN((GetThread()!=NULL)); - if( !IsGCThread() && !bEESuspended) { + if( !isRuntimeSuspended && !bEESuspendedHere) { ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_FOR_GC_PREP); - bEESuspended = TRUE; + bEESuspendedHere = TRUE; } pfunc = (size_t *) JIT_WriteBarrierReg_PostGrow; @@ -1906,12 +1906,17 @@ void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck) } if (bStompWriteBarrierEphemeral) - StompWriteBarrierEphemeral(); + { + _ASSERTE(isRuntimeSuspended || bEESuspendedHere); + StompWriteBarrierEphemeral(true); + } else - FlushInstructionCache(GetCurrentProcess(), (void *)JIT_PatchedWriteBarrierStart, + { + FlushInstructionCache(GetCurrentProcess(), (void *)JIT_PatchedWriteBarrierStart, (BYTE*)JIT_PatchedWriteBarrierLast - (BYTE*)JIT_PatchedWriteBarrierStart); + } - if(bEESuspended) + if(bEESuspendedHere) ThreadSuspend::RestartEE(FALSE, TRUE); } diff --git a/src/vm/jitinterface.h b/src/vm/jitinterface.h index 00edc6e212..6780fe0911 100644 --- a/src/vm/jitinterface.h +++ b/src/vm/jitinterface.h @@ -265,39 +265,50 @@ class WriteBarrierManager public: enum WriteBarrierType { - WRITE_BARRIER_UNINITIALIZED = 0, - WRITE_BARRIER_PREGROW32 = 1, - WRITE_BARRIER_PREGROW64 = 2, - WRITE_BARRIER_POSTGROW32 = 3, - WRITE_BARRIER_POSTGROW64 = 4, - WRITE_BARRIER_SVR32 = 5, - WRITE_BARRIER_SVR64 = 6, - WRITE_BARRIER_BUFFER = 7, + WRITE_BARRIER_UNINITIALIZED, + WRITE_BARRIER_PREGROW64, + WRITE_BARRIER_POSTGROW64, +#ifdef FEATURE_SVR_GC + WRITE_BARRIER_SVR64, +#endif // FEATURE_SVR_GC +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + WRITE_BARRIER_WRITE_WATCH_PREGROW64, + WRITE_BARRIER_WRITE_WATCH_POSTGROW64, +#ifdef FEATURE_SVR_GC + WRITE_BARRIER_WRITE_WATCH_SVR64, +#endif // FEATURE_SVR_GC +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + WRITE_BARRIER_BUFFER }; WriteBarrierManager(); void Initialize(); - void UpdateEphemeralBounds(); - void UpdateCardTableLocation(BOOL bReqUpperBoundsCheck); + void UpdateEphemeralBounds(bool isRuntimeSuspended); + void UpdateWriteWatchAndCardTableLocations(bool isRuntimeSuspended, bool bReqUpperBoundsCheck); + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + void SwitchToWriteWatchBarrier(bool isRuntimeSuspended); + void SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended); +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP protected: size_t GetCurrentWriteBarrierSize(); size_t GetSpecificWriteBarrierSize(WriteBarrierType writeBarrier); PBYTE CalculatePatchLocation(LPVOID base, LPVOID label, int offset); PCODE GetCurrentWriteBarrierCode(); - void ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier); - bool NeedDifferentWriteBarrier(BOOL bReqUpperBoundsCheck, WriteBarrierType* pNewWriteBarrierType); + void ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, bool isRuntimeSuspended); + bool NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, WriteBarrierType* pNewWriteBarrierType); private: void Validate(); WriteBarrierType m_currentWriteBarrier; - PBYTE m_pLowerBoundImmediate; // PREGROW32 | PREGROW64 | POSTGROW32 | POSTGROW64 | | - PBYTE m_pCardTableImmediate; // PREGROW32 | PREGROW64 | POSTGROW32 | POSTGROW64 | SVR32 | - PBYTE m_pUpperBoundImmediate; // | | POSTGROW32 | POSTGROW64 | | - PBYTE m_pCardTableImmediate2; // PREGROW32 | | POSTGROW32 | | SVR32 | + PBYTE m_pWriteWatchTableImmediate; // PREGROW | POSTGROW | SVR | WRITE_WATCH | + PBYTE m_pLowerBoundImmediate; // PREGROW | POSTGROW | | WRITE_WATCH | + PBYTE m_pCardTableImmediate; // PREGROW | POSTGROW | SVR | WRITE_WATCH | + PBYTE m_pUpperBoundImmediate; // | POSTGROW | | WRITE_WATCH | }; #endif // _TARGET_AMD64_ diff --git a/tests/runtest.sh b/tests/runtest.sh index f853eae31d..8cf2603cee 100755 --- a/tests/runtest.sh +++ b/tests/runtest.sh @@ -340,11 +340,16 @@ function create_core_overlay { fi mkdir "$coreOverlayDir" - (cd $coreFxBinDir && find . -iname '*.dll' \! -iwholename '*netstandard13aot*' \! -iwholename '*netstandard15aot*' \! -iwholename '*netcore50aot*' \! -iwholename '*test*' \! -iwholename '*/ToolRuntime/*' \! -iwholename '*RemoteExecutorConsoleApp*' -exec cp -f '{}' "$coreOverlayDir/" \;) + (cd $coreFxBinDir && find . -iname '*.dll' \! -iwholename '*test*' \! -iwholename '*/ToolRuntime/*' \! -iwholename '*/RemoteExecutorConsoleApp/*' \! -iwholename '*/net*' \! -iwholename '*aot*' -exec cp -f '{}' "$coreOverlayDir/" \;) cp -f "$coreFxNativeBinDir/Native/"*."$libExtension" "$coreOverlayDir/" 2>/dev/null cp -f "$coreClrBinDir/"* "$coreOverlayDir/" 2>/dev/null + cp -f "$mscorlibDir/mscorlib.dll" "$coreOverlayDir/" cp -n "$testDependenciesDir"/* "$coreOverlayDir/" 2>/dev/null + if [ -f "$coreOverlayDir/mscorlib.ni.dll" ]; then + # Test dependencies come from a Windows build, and mscorlib.ni.dll would be the one from Windows + rm -f "$coreOverlayDir/mscorlib.ni.dll" + fi } function precompile_overlay_assemblies { @@ -407,7 +412,7 @@ declare -a failingTests ((runFailingTestsOnly = 0)) function load_unsupported_tests { - # Load the list of tests that fail and on this platform. These tests are disabled (skipped), pending investigation. + # Load the list of tests that are not supported on this platform. These tests are disabled (skipped) permanently. # 'readarray' is not used here, as it includes the trailing linefeed in lines placed in the array. while IFS='' read -r line || [ -n "$line" ]; do unsupportedTests[${#unsupportedTests[@]}]=$line @@ -415,7 +420,7 @@ function load_unsupported_tests { } function load_failing_tests { - # Load the list of tests that fail and on this platform. These tests are disabled (skipped), pending investigation. + # Load the list of tests that fail on this platform. These tests are disabled (skipped) temporarily, pending investigation. # 'readarray' is not used here, as it includes the trailing linefeed in lines placed in the array. while IFS='' read -r line || [ -n "$line" ]; do failingTests[${#failingTests[@]}]=$line @@ -733,8 +738,8 @@ do esac done -if (( disableEventLogging == 0)); then - export COMPlus_EnableEventLog=1 +if ((disableEventLogging == 0)); then + export COMPlus_EnableEventLog=1 fi export CORECLR_SERVER_GC="$serverGC" @@ -749,10 +754,11 @@ if [ ! -d "$testRootDir" ]; then exit $EXIT_CODE_EXCEPTION fi - # Copy native interop test libraries over to the mscorlib path in # order for interop tests to run on linux. -cp $mscorlibDir/bin/* $mscorlibDir +if [ -d $mscorlibDir/bin ]; then + cp $mscorlibDir/bin/* $mscorlibDir +fi # If this is a coverage run, make sure the appropriate args have been passed if [ "$CoreClrCoverage" == "ON" ] |