summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKoundinya Veluri <kouvel@users.noreply.github.com>2016-04-12 21:19:01 -0700
committerKoundinya Veluri <kouvel@users.noreply.github.com>2016-04-12 21:19:01 -0700
commitafdce3a592e5f6f2047bed057d121225be91743d (patch)
tree7ab5637597f3daae58dbf1929abd363cbfe0b619
parent5ef243f597b8198efce1add587f16aae59b1f568 (diff)
parentc235ae17cd3a87f8032948bdcb838641d8e6c055 (diff)
downloadcoreclr-afdce3a592e5f6f2047bed057d121225be91743d.tar.gz
coreclr-afdce3a592e5f6f2047bed057d121225be91743d.tar.bz2
coreclr-afdce3a592e5f6f2047bed057d121225be91743d.zip
Merge pull request #4074 from kouvel/SoftwareWriteWatch
Implement software write watch and make concurrent GC functional outs…
-rw-r--r--CMakeLists.txt3
-rw-r--r--src/gc/env/gcenv.base.h6
-rw-r--r--src/gc/gc.cpp346
-rw-r--r--src/gc/gcpriv.h8
-rw-r--r--src/gc/sample/CMakeLists.txt1
-rw-r--r--src/gc/sample/gcenv.ee.cpp12
-rw-r--r--src/gc/sample/gcenv.h6
-rw-r--r--src/gc/softwarewritewatch.cpp243
-rw-r--r--src/gc/softwarewritewatch.h339
-rw-r--r--src/inc/stdmacros.h11
-rw-r--r--src/vm/CMakeLists.txt1
-rw-r--r--src/vm/amd64/JitHelpers_Fast.asm84
-rw-r--r--src/vm/amd64/JitHelpers_FastWriteBarriers.asm228
-rw-r--r--src/vm/amd64/JitHelpers_Slow.asm18
-rw-r--r--src/vm/amd64/jithelpers_fast.S160
-rw-r--r--src/vm/amd64/jithelpers_fastwritebarriers.S247
-rw-r--r--src/vm/amd64/jithelpers_slow.S15
-rw-r--r--src/vm/amd64/jitinterfaceamd64.cpp455
-rw-r--r--src/vm/arm/stubs.cpp11
-rw-r--r--src/vm/arm64/stubs.cpp4
-rw-r--r--src/vm/gcenv.h2
-rw-r--r--src/vm/gchelpers.cpp31
-rw-r--r--src/vm/gchelpers.h4
-rw-r--r--src/vm/i386/jitinterfacex86.cpp21
-rw-r--r--src/vm/jitinterface.h43
-rwxr-xr-xtests/runtest.sh20
26 files changed, 1814 insertions, 505 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c8b64c21a6..4317736408 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -857,6 +857,9 @@ if(CLR_CMAKE_PLATFORM_UNIX_TARGET_AMD64)
add_definitions(-DFEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
endif (CLR_CMAKE_PLATFORM_UNIX_TARGET_AMD64)
add_definitions(-DFEATURE_USE_ASM_GC_WRITE_BARRIERS)
+if(CLR_CMAKE_PLATFORM_ARCH_AMD64 AND NOT WIN32)
+ add_definitions(-DFEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP)
+endif(CLR_CMAKE_PLATFORM_ARCH_AMD64 AND NOT WIN32)
add_definitions(-DFEATURE_VERSIONING)
if(WIN32)
add_definitions(-DFEATURE_VERSIONING_LOG)
diff --git a/src/gc/env/gcenv.base.h b/src/gc/env/gcenv.base.h
index f3317fc79e..96d6917696 100644
--- a/src/gc/env/gcenv.base.h
+++ b/src/gc/env/gcenv.base.h
@@ -244,6 +244,8 @@ typedef uintptr_t TADDR;
extern type var
#define GVAL_IMPL(type, var) \
type var
+#define GVAL_IMPL_INIT(type, var, init) \
+ type var = init
#define GPTR_DECL(type, var) \
extern type* var
@@ -543,8 +545,8 @@ void LogSpewAlways(const char *fmt, ...);
// -----------------------------------------------------------------------------------------------------------
-void StompWriteBarrierEphemeral();
-void StompWriteBarrierResize(bool bReqUpperBoundsCheck);
+void StompWriteBarrierEphemeral(bool isRuntimeSuspended);
+void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck);
class CLRConfig
{
diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp
index 3147a58072..6c3141d682 100644
--- a/src/gc/gc.cpp
+++ b/src/gc/gc.cpp
@@ -78,6 +78,10 @@ BOOL bgc_heap_walk_for_etw_p = FALSE;
int compact_ratio = 0;
#endif //GC_CONFIG_DRIVEN
+#if defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) && defined(NO_WRITE_BARRIER)
+#error Software write watch requires write barriers.
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP && NO_WRITE_BARRIER
+
// See comments in reset_memory.
BOOL reset_mm_p = TRUE;
@@ -597,7 +601,8 @@ enum gc_join_stage
gc_join_after_commit_soh_no_gc = 35,
gc_join_expand_loh_no_gc = 36,
gc_join_final_no_gc = 37,
- gc_join_max = 38
+ gc_join_disable_software_write_watch = 38,
+ gc_join_max = 39
};
enum gc_join_flavor
@@ -1435,19 +1440,21 @@ void reset_memory (uint8_t* o, size_t sizeo);
#ifdef WRITE_WATCH
-static bool virtual_alloc_write_watch = false;
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+static bool virtual_alloc_hardware_write_watch = false;
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
-static bool write_watch_capability = false;
+static bool hardware_write_watch_capability = false;
#ifndef DACCESS_COMPILE
//check if the write watch APIs are supported.
-void write_watch_api_supported()
+void hardware_write_watch_api_supported()
{
if (GCToOSInterface::SupportsWriteWatch())
{
- write_watch_capability = true;
+ hardware_write_watch_capability = true;
dprintf (2, ("WriteWatch supported"));
}
else
@@ -1458,9 +1465,23 @@ void write_watch_api_supported()
#endif //!DACCESS_COMPILE
-inline bool can_use_write_watch()
+inline bool can_use_hardware_write_watch()
+{
+ return hardware_write_watch_capability;
+}
+
+inline bool can_use_write_watch_for_gc_heap()
+{
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ return true;
+#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ return can_use_hardware_write_watch();
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+}
+
+inline bool can_use_write_watch_for_card_table()
{
- return write_watch_capability;
+ return can_use_hardware_write_watch();
}
#else
@@ -4255,7 +4276,13 @@ void* virtual_alloc (size_t size)
}
}
- uint32_t flags = virtual_alloc_write_watch ? VirtualReserveFlags::WriteWatch : VirtualReserveFlags::None;
+ uint32_t flags = VirtualReserveFlags::None;
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ if (virtual_alloc_hardware_write_watch)
+ {
+ flags = VirtualReserveFlags::WriteWatch;
+ }
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
void* prgmem = GCToOSInterface::VirtualReserve (0, requested_size, card_size * card_word_width, flags);
void *aligned_mem = prgmem;
@@ -6490,7 +6517,7 @@ uint32_t* translate_card_bundle_table (uint32_t* cb)
void gc_heap::enable_card_bundles ()
{
- if (can_use_write_watch() && (!card_bundles_enabled()))
+ if (can_use_write_watch_for_card_table() && (!card_bundles_enabled()))
{
dprintf (3, ("Enabling card bundles"));
//set all of the card bundles
@@ -6826,13 +6853,21 @@ void release_card_table (uint32_t* c_table)
destroy_card_table (c_table);
// sever the link from the parent
if (&g_card_table[card_word (gcard_of(g_lowest_address))] == c_table)
+ {
g_card_table = 0;
- uint32_t* p_table = &g_card_table[card_word (gcard_of(g_lowest_address))];
- if (p_table)
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ SoftwareWriteWatch::StaticClose();
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ }
+ else
{
- while (p_table && (card_table_next (p_table) != c_table))
- p_table = card_table_next (p_table);
- card_table_next (p_table) = 0;
+ uint32_t* p_table = &g_card_table[card_word (gcard_of(g_lowest_address))];
+ if (p_table)
+ {
+ while (p_table && (card_table_next (p_table) != c_table))
+ p_table = card_table_next (p_table);
+ card_table_next (p_table) = 0;
+ }
}
}
}
@@ -6866,13 +6901,24 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end)
size_t cb = 0;
#ifdef CARD_BUNDLE
- if (can_use_write_watch())
+ if (can_use_write_watch_for_card_table())
{
virtual_reserve_flags |= VirtualReserveFlags::WriteWatch;
cb = size_card_bundle_of (g_lowest_address, g_highest_address);
}
#endif //CARD_BUNDLE
+ size_t wws = 0;
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ size_t sw_ww_table_offset = 0;
+ if (gc_can_use_concurrent)
+ {
+ size_t sw_ww_size_before_table = sizeof(card_table_info) + cs + bs + cb;
+ sw_ww_table_offset = SoftwareWriteWatch::GetTableStartByteOffset(sw_ww_size_before_table);
+ wws = sw_ww_table_offset - sw_ww_size_before_table + SoftwareWriteWatch::GetTableByteSize(start, end);
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
#ifdef GROWABLE_SEG_MAPPING_TABLE
size_t st = size_seg_mapping_table_of (g_lowest_address, g_highest_address);
#else //GROWABLE_SEG_MAPPING_TABLE
@@ -6881,29 +6927,29 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end)
// it is impossible for alloc_size to overflow due bounds on each of
// its components.
- size_t alloc_size = sizeof (uint8_t)*(bs + cs + cb + ms + st + sizeof (card_table_info));
+ size_t alloc_size = sizeof (uint8_t)*(sizeof(card_table_info) + cs + bs + cb + wws + st + ms);
size_t alloc_size_aligned = Align (alloc_size, g_SystemInfo.dwAllocationGranularity-1);
- uint32_t* ct = (uint32_t*)GCToOSInterface::VirtualReserve (0, alloc_size_aligned, 0, virtual_reserve_flags);
+ uint8_t* mem = (uint8_t*)GCToOSInterface::VirtualReserve (0, alloc_size_aligned, 0, virtual_reserve_flags);
- if (!ct)
+ if (!mem)
return 0;
- dprintf (2, ("init - table alloc for %Id bytes: [%Ix, %Ix[",
- alloc_size, (size_t)ct, (size_t)((uint8_t*)ct+alloc_size)));
+ dprintf (2, ("Init - Card table alloc for %Id bytes: [%Ix, %Ix[",
+ alloc_size, (size_t)mem, (size_t)(mem+alloc_size)));
// mark array will be committed separately (per segment).
size_t commit_size = alloc_size - ms;
- if (!GCToOSInterface::VirtualCommit ((uint8_t*)ct, commit_size))
+ if (!GCToOSInterface::VirtualCommit (mem, commit_size))
{
- dprintf (2, ("Table commit failed"));
- GCToOSInterface::VirtualRelease ((uint8_t*)ct, alloc_size_aligned);
+ dprintf (2, ("Card table commit failed"));
+ GCToOSInterface::VirtualRelease (mem, alloc_size_aligned);
return 0;
}
// initialize the ref count
- ct = (uint32_t*)((uint8_t*)ct+sizeof (card_table_info));
+ uint32_t* ct = (uint32_t*)(mem+sizeof (card_table_info));
card_table_refcount (ct) = 0;
card_table_lowest_address (ct) = start;
card_table_highest_address (ct) = end;
@@ -6915,15 +6961,22 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end)
card_table_card_bundle_table (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs);
#endif //CARD_BUNDLE
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ if (gc_can_use_concurrent)
+ {
+ SoftwareWriteWatch::InitializeUntranslatedTable(mem + sw_ww_table_offset, start);
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
#ifdef GROWABLE_SEG_MAPPING_TABLE
- seg_mapping_table = (seg_mapping*)((uint8_t*)card_table_brick_table (ct) + bs + cb);
+ seg_mapping_table = (seg_mapping*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws);
seg_mapping_table = (seg_mapping*)((uint8_t*)seg_mapping_table -
size_seg_mapping_table_of (0, (align_lower_segment (g_lowest_address))));
#endif //GROWABLE_SEG_MAPPING_TABLE
#ifdef MARK_ARRAY
if (gc_can_use_concurrent)
- card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + st);
+ card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws + st);
else
card_table_mark_array (ct) = NULL;
#endif //MARK_ARRAY
@@ -7033,13 +7086,27 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
size_t cb = 0;
#ifdef CARD_BUNDLE
- if (can_use_write_watch())
+ if (can_use_write_watch_for_card_table())
{
virtual_reserve_flags = VirtualReserveFlags::WriteWatch;
cb = size_card_bundle_of (saved_g_lowest_address, saved_g_highest_address);
}
#endif //CARD_BUNDLE
+ size_t wws = 0;
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ size_t sw_ww_table_offset = 0;
+ if (gc_can_use_concurrent)
+ {
+ size_t sw_ww_size_before_table = sizeof(card_table_info) + cs + bs + cb;
+ sw_ww_table_offset = SoftwareWriteWatch::GetTableStartByteOffset(sw_ww_size_before_table);
+ wws =
+ sw_ww_table_offset -
+ sw_ww_size_before_table +
+ SoftwareWriteWatch::GetTableByteSize(saved_g_lowest_address, saved_g_highest_address);
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
#ifdef GROWABLE_SEG_MAPPING_TABLE
size_t st = size_seg_mapping_table_of (saved_g_lowest_address, saved_g_highest_address);
#else //GROWABLE_SEG_MAPPING_TABLE
@@ -7048,10 +7115,10 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
// it is impossible for alloc_size to overflow due bounds on each of
// its components.
- size_t alloc_size = sizeof (uint8_t)*(bs + cs + cb + ms +st + sizeof (card_table_info));
+ size_t alloc_size = sizeof (uint8_t)*(sizeof(card_table_info) + cs + bs + cb + wws + st + ms);
size_t alloc_size_aligned = Align (alloc_size, g_SystemInfo.dwAllocationGranularity-1);
- dprintf (GC_TABLE_LOG, ("brick table: %Id; card table: %Id; mark array: %Id, card bundle: %Id, seg table: %Id",
- bs, cs, ms, cb, st));
+ dprintf (GC_TABLE_LOG, ("card table: %Id; brick table: %Id; card bundle: %Id; sw ww table: %Id; seg table: %Id; mark array: %Id",
+ cs, bs, cb, wws, st, ms));
uint8_t* mem = (uint8_t*)GCToOSInterface::VirtualReserve (0, alloc_size_aligned, 0, virtual_reserve_flags);
@@ -7104,7 +7171,7 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
#ifdef GROWABLE_SEG_MAPPING_TABLE
{
- seg_mapping* new_seg_mapping_table = (seg_mapping*)((uint8_t*)card_table_brick_table (ct) + bs + cb);
+ seg_mapping* new_seg_mapping_table = (seg_mapping*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws);
new_seg_mapping_table = (seg_mapping*)((uint8_t*)new_seg_mapping_table -
size_seg_mapping_table_of (0, (align_lower_segment (saved_g_lowest_address))));
memcpy(&new_seg_mapping_table[seg_mapping_word_of(g_lowest_address)],
@@ -7117,7 +7184,7 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
#ifdef MARK_ARRAY
if(gc_can_use_concurrent)
- card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + st);
+ card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws + st);
else
card_table_mark_array (ct) = NULL;
#endif //MARK_ARRAY
@@ -7155,11 +7222,51 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
}
#endif //BACKGROUND_GC
- // This passes a bool telling whether we need to switch to the post
- // grow version of the write barrier. This test tells us if the new
- // segment was allocated at a lower address than the old, requiring
- // that we start doing an upper bounds check in the write barrier.
- StompWriteBarrierResize(la != saved_g_lowest_address);
+ {
+ bool write_barrier_updated = false;
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ if (gc_can_use_concurrent)
+ {
+ // The current design of software write watch requires that the runtime is suspended during resize. Suspending
+ // on resize is preferred because it is a far less frequent operation than GetWriteWatch() / ResetWriteWatch().
+ // Suspending here allows copying dirty state from the old table into the new table, and not have to merge old
+ // table info lazily as done for card tables.
+
+ BOOL is_runtime_suspended = IsSuspendEEThread();
+ if (!is_runtime_suspended)
+ {
+ suspend_EE();
+ }
+
+ SoftwareWriteWatch::SetResizedUntranslatedTable(
+ mem + sw_ww_table_offset,
+ saved_g_lowest_address,
+ saved_g_highest_address);
+
+ // Since the runtime is already suspended, update the write barrier here as well.
+ // This passes a bool telling whether we need to switch to the post
+ // grow version of the write barrier. This test tells us if the new
+ // segment was allocated at a lower address than the old, requiring
+ // that we start doing an upper bounds check in the write barrier.
+ StompWriteBarrierResize(true, la != saved_g_lowest_address);
+ write_barrier_updated = true;
+
+ if (!is_runtime_suspended)
+ {
+ restart_EE();
+ }
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+ if (!write_barrier_updated)
+ {
+ // This passes a bool telling whether we need to switch to the post
+ // grow version of the write barrier. This test tells us if the new
+ // segment was allocated at a lower address than the old, requiring
+ // that we start doing an upper bounds check in the write barrier.
+ StompWriteBarrierResize(!!IsSuspendEEThread(), la != saved_g_lowest_address);
+ }
+ }
// We need to make sure that other threads executing checked write barriers
// will see the g_card_table update before g_lowest/highest_address updates.
@@ -7367,8 +7474,6 @@ void gc_heap::copy_brick_card_table()
#else //GROWABLE_SEG_MAPPING_TABLE
size_t st = 0;
#endif //GROWABLE_SEG_MAPPING_TABLE
- assert (!gc_can_use_concurrent ||
- (((uint8_t*)card_table_card_bundle_table (ct) + size_card_bundle_of (g_lowest_address, g_highest_address) + st) == (uint8_t*)card_table_mark_array (ct)));
#endif //MARK_ARRAY && _DEBUG
card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct));
assert (&card_bundle_table [card_bundle_word (cardw_card_bundle (card_word (card_of (g_lowest_address))))] ==
@@ -9279,6 +9384,27 @@ void gc_heap::update_card_table_bundle()
}
#endif //CARD_BUNDLE
+// static
+void gc_heap::reset_write_watch_for_gc_heap(void* base_address, size_t region_size)
+{
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ SoftwareWriteWatch::ClearDirty(base_address, region_size);
+#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ GCToOSInterface::ResetWriteWatch(base_address, region_size);
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+}
+
+// static
+void gc_heap::get_write_watch_for_gc_heap(bool reset, void *base_address, size_t region_size, void** dirty_pages, uintptr_t* dirty_page_count_ref, bool is_runtime_suspended)
+{
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ SoftwareWriteWatch::GetDirty(base_address, region_size, dirty_pages, dirty_page_count_ref, reset, is_runtime_suspended);
+#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ bool success = GCToOSInterface::GetWriteWatch(reset, base_address, region_size, dirty_pages, dirty_page_count_ref);
+ assert(success);
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+}
+
const size_t ww_reset_quantum = 128*1024*1024;
inline
@@ -9302,7 +9428,7 @@ void gc_heap::reset_ww_by_chunk (uint8_t* start_address, size_t total_reset_size
next_reset_size = ((remaining_reset_size >= ww_reset_quantum) ? ww_reset_quantum : remaining_reset_size);
if (next_reset_size)
{
- GCToOSInterface::ResetWriteWatch (start_address, next_reset_size);
+ reset_write_watch_for_gc_heap(start_address, next_reset_size);
reset_size += next_reset_size;
switch_one_quantum();
@@ -9333,6 +9459,11 @@ void gc_heap::switch_on_reset (BOOL concurrent_p, size_t* current_total_reset_si
void gc_heap::reset_write_watch (BOOL concurrent_p)
{
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ // Software write watch currently requires the runtime to be suspended during reset. See SoftwareWriteWatch::ClearDirty().
+ assert(!concurrent_p);
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
PREFIX_ASSUME(seg != NULL);
@@ -9371,7 +9502,7 @@ void gc_heap::reset_write_watch (BOOL concurrent_p)
#endif //TIME_WRITE_WATCH
dprintf (3, ("h%d: soh ww: [%Ix(%Id)", heap_number, (size_t)base_address, region_size));
//reset_ww_by_chunk (base_address, region_size);
- GCToOSInterface::ResetWriteWatch (base_address, region_size);
+ reset_write_watch_for_gc_heap(base_address, region_size);
#ifdef TIME_WRITE_WATCH
unsigned int time_stop = GetCycleCount32();
@@ -9414,7 +9545,7 @@ void gc_heap::reset_write_watch (BOOL concurrent_p)
#endif //TIME_WRITE_WATCH
dprintf (3, ("h%d: loh ww: [%Ix(%Id)", heap_number, (size_t)base_address, region_size));
//reset_ww_by_chunk (base_address, region_size);
- GCToOSInterface::ResetWriteWatch (base_address, region_size);
+ reset_write_watch_for_gc_heap(base_address, region_size);
#ifdef TIME_WRITE_WATCH
unsigned int time_stop = GetCycleCount32();
@@ -9514,7 +9645,7 @@ void gc_heap::adjust_ephemeral_limits ()
(size_t)ephemeral_low, (size_t)ephemeral_high))
// This updates the write barrier helpers with the new info.
- StompWriteBarrierEphemeral();
+ StompWriteBarrierEphemeral(!!IsSuspendEEThread());
}
#if defined(TRACE_GC) || defined(GC_CONFIG_DRIVEN)
@@ -9636,12 +9767,14 @@ HRESULT gc_heap::initialize_gc (size_t segment_size,
HRESULT hres = S_OK;
#ifdef WRITE_WATCH
- write_watch_api_supported();
+ hardware_write_watch_api_supported();
#ifdef BACKGROUND_GC
- if (can_use_write_watch () && g_pConfig->GetGCconcurrent()!=0)
+ if (can_use_write_watch_for_gc_heap() && g_pConfig->GetGCconcurrent() != 0)
{
gc_can_use_concurrent = true;
- virtual_alloc_write_watch = true;
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ virtual_alloc_hardware_write_watch = true;
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
}
else
{
@@ -9673,10 +9806,11 @@ HRESULT gc_heap::initialize_gc (size_t segment_size,
uint64_t th = (uint64_t)SH_TH_CARD_BUNDLE;
#endif //MULTIPLE_HEAPS
- if ((can_use_write_watch() && reserved_memory >= th))
+ if (can_use_write_watch_for_card_table() && reserved_memory >= th)
{
settings.card_bundles = TRUE;
- } else
+ }
+ else
{
settings.card_bundles = FALSE;
}
@@ -18428,10 +18562,11 @@ void gc_heap::fix_card_table ()
continue;
}
else
- {
+ {
break;
}
}
+
uint8_t* base_address = align_lower_page (heap_segment_mem (seg));
uint8_t* high_address = align_on_page (
(seg != ephemeral_heap_segment) ?
@@ -18451,15 +18586,14 @@ void gc_heap::fix_card_table ()
#ifdef TIME_WRITE_WATCH
unsigned int time_start = GetCycleCount32();
#endif //TIME_WRITE_WATCH
- bool success = GCToOSInterface::GetWriteWatch(reset_watch_state, base_address, region_size,
- (void**)g_addresses,
- &bcount);
- assert (success);
+ get_write_watch_for_gc_heap(reset_watch_state, base_address, region_size,
+ (void**)g_addresses,
+ &bcount, true);
#ifdef TIME_WRITE_WATCH
unsigned int time_stop = GetCycleCount32();
tot_cycles += time_stop - time_start;
- printf ("GetWriteWatch Duration: %d, total: %d\n",
+ printf ("get_write_watch_for_gc_heap Duration: %d, total: %d\n",
time_stop - time_start, tot_cycles);
#endif //TIME_WRITE_WATCH
@@ -18483,6 +18617,7 @@ void gc_heap::fix_card_table ()
} while (bcount >= array_size);
seg = heap_segment_next_rw (seg);
}
+
#ifdef BACKGROUND_GC
if (settings.concurrent)
{
@@ -18491,7 +18626,7 @@ void gc_heap::fix_card_table ()
align_on_page (generation_allocation_start (generation_of (0)));
size_t region_size =
heap_segment_allocated (ephemeral_heap_segment) - base_address;
- GCToOSInterface::ResetWriteWatch (base_address, region_size);
+ reset_write_watch_for_gc_heap(base_address, region_size);
}
#endif //BACKGROUND_GC
#endif //WRITE_WATCH
@@ -24295,6 +24430,15 @@ void gc_heap::gcmemcopy (uint8_t* dest, uint8_t* src, size_t len, BOOL copy_car
//dprintf(3,(" Memcopy [%Ix->%Ix, %Ix->%Ix[", (size_t)src, (size_t)dest, (size_t)src+len, (size_t)dest+len));
dprintf(3,(" mc: [%Ix->%Ix, %Ix->%Ix[", (size_t)src, (size_t)dest, (size_t)src+len, (size_t)dest+len));
memcopy (dest - plug_skew, src - plug_skew, (int)len);
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ if (SoftwareWriteWatch::IsEnabledForGCHeap())
+ {
+ // The ranges [src - plug_kew .. src[ and [src + len - plug_skew .. src + len[ are ObjHeaders, which don't have GC
+ // references, and are not relevant for write watch. The latter range actually corresponds to the ObjHeader for the
+ // object at (src + len), so it can be ignored anyway.
+ SoftwareWriteWatch::SetDirtyRegion(dest, len - plug_skew);
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
copy_cards_range (dest, src, len, copy_cards_p);
}
}
@@ -25570,6 +25714,28 @@ void gc_heap::background_mark_phase ()
if (bgc_t_join.joined())
#endif //MULTIPLE_HEAPS
{
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ SoftwareWriteWatch::EnableForGCHeap();
+
+ // Resetting write watch for software write watch is pretty fast, much faster than for hardware write watch. Reset
+ // can be done while the runtime is suspended or after the runtime is restarted, the preference was to reset while
+ // the runtime is suspended. The reset for hardware write watch is done after the runtime is restarted below.
+#ifdef WRITE_WATCH
+ concurrent_print_time_delta ("CRWW begin");
+
+#ifdef MULTIPLE_HEAPS
+ for (int i = 0; i < n_heaps; i++)
+ {
+ g_heaps[i]->reset_write_watch (FALSE);
+ }
+#else
+ reset_write_watch (FALSE);
+#endif //MULTIPLE_HEAPS
+
+ concurrent_print_time_delta ("CRWW");
+#endif //WRITE_WATCH
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
num_sizedrefs = SystemDomain::System()->GetTotalNumSizedRefHandles();
// this c_write is not really necessary because restart_vm
@@ -25597,12 +25763,16 @@ void gc_heap::background_mark_phase ()
{
disable_preemptive (current_thread, TRUE);
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ // When software write watch is enabled, resetting write watch is done while the runtime is suspended above. The
+ // post-reset call to revisit_written_pages is only necessary for concurrent reset_write_watch, to discard dirtied
+ // pages during the concurrent reset.
+
#ifdef WRITE_WATCH
concurrent_print_time_delta ("CRWW begin");
#ifdef MULTIPLE_HEAPS
- int i;
- for (i = 0; i < n_heaps; i++)
+ for (int i = 0; i < n_heaps; i++)
{
g_heaps[i]->reset_write_watch (TRUE);
}
@@ -25614,7 +25784,7 @@ void gc_heap::background_mark_phase ()
#endif //WRITE_WATCH
#ifdef MULTIPLE_HEAPS
- for (i = 0; i < n_heaps; i++)
+ for (int i = 0; i < n_heaps; i++)
{
g_heaps[i]->revisit_written_pages (TRUE, TRUE);
}
@@ -25623,9 +25793,10 @@ void gc_heap::background_mark_phase ()
#endif //MULTIPLE_HEAPS
concurrent_print_time_delta ("CRW");
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
#ifdef MULTIPLE_HEAPS
- for (i = 0; i < n_heaps; i++)
+ for (int i = 0; i < n_heaps; i++)
{
g_heaps[i]->current_bgc_state = bgc_mark_handles;
}
@@ -25818,6 +25989,23 @@ void gc_heap::background_mark_phase ()
//concurrent_print_time_delta ("nonconcurrent revisit dirtied pages on LOH");
concurrent_print_time_delta ("NRre LOH");
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+#ifdef MULTIPLE_HEAPS
+ bgc_t_join.join(this, gc_join_disable_software_write_watch);
+ if (bgc_t_join.joined())
+#endif // MULTIPLE_HEAPS
+ {
+ // The runtime is suspended, and we will be doing a final query of dirty pages, so pause tracking written pages to
+ // avoid further perf penalty after the runtime is restarted
+ SoftwareWriteWatch::DisableForGCHeap();
+
+#ifdef MULTIPLE_HEAPS
+ dprintf(3, ("Restarting BGC threads after disabling software write watch"));
+ bgc_t_join.restart();
+#endif // MULTIPLE_HEAPS
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
dprintf (2, ("before NR 1st Hov count: %d", bgc_overflow_count));
bgc_overflow_count = 0;
@@ -26223,6 +26411,7 @@ void gc_heap::revisit_written_pages (BOOL concurrent_p, BOOL reset_only_p)
PREFIX_ASSUME(seg != NULL);
bool reset_watch_state = !!concurrent_p;
+ bool is_runtime_suspended = !concurrent_p;
BOOL small_object_segments = TRUE;
int align_const = get_alignment_constant (small_object_segments);
@@ -26327,18 +26516,27 @@ void gc_heap::revisit_written_pages (BOOL concurrent_p, BOOL reset_only_p)
ptrdiff_t region_size = high_address - base_address;
dprintf (3, ("h%d: gw: [%Ix(%Id)", heap_number, (size_t)base_address, (size_t)region_size));
- bool success = GCToOSInterface::GetWriteWatch (reset_watch_state, base_address, region_size,
- (void**)background_written_addresses,
- &bcount);
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ // When the runtime is not suspended, it's possible for the table to be resized concurrently with the scan
+ // for dirty pages below. Prevent that by synchronizing with grow_brick_card_tables(). When the runtime is
+ // suspended, it's ok to scan for dirty pages concurrently from multiple background GC threads for disjoint
+ // memory regions.
+ if (!is_runtime_suspended)
+ {
+ enter_spin_lock(&gc_lock);
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+ get_write_watch_for_gc_heap (reset_watch_state, base_address, region_size,
+ (void**)background_written_addresses,
+ &bcount, is_runtime_suspended);
- //#ifdef _DEBUG
- if (!success)
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ if (!is_runtime_suspended)
{
- printf ("GetWriteWatch Error ");
- printf ("Probing pages [%Ix, %Ix[\n", (size_t)base_address, (size_t)high_address);
+ leave_spin_lock(&gc_lock);
}
- //#endif
- assert (success);
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
if (bcount != 0)
{
@@ -33396,6 +33594,9 @@ HRESULT GCHeap::Shutdown ()
{
destroy_card_table (ct);
g_card_table = 0;
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ SoftwareWriteWatch::StaticClose();
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
}
//destroy all segments on the standby list
@@ -33555,7 +33756,7 @@ HRESULT GCHeap::Initialize ()
WaitForGCEvent->CreateManualEvent(TRUE);
- StompWriteBarrierResize(FALSE);
+ StompWriteBarrierResize(true, false);
#ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way
#if defined (STRESS_HEAP) && !defined (MULTIPLE_HEAPS)
@@ -35903,6 +36104,13 @@ GCHeap::SetCardsAfterBulkCopy( Object **StartPoint, size_t len )
updateGCShadow(&StartPoint[i], StartPoint[i]);
#endif //WRITE_BARRIER_CHECK && !SERVER_GC
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ if (SoftwareWriteWatch::IsEnabledForGCHeap())
+ {
+ SoftwareWriteWatch::SetDirtyRegion(StartPoint, len);
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
// If destination is in Gen 0 don't bother
if (
#ifdef BACKGROUND_GC
diff --git a/src/gc/gcpriv.h b/src/gc/gcpriv.h
index 5f41f5e9ce..bfb6f8146d 100644
--- a/src/gc/gcpriv.h
+++ b/src/gc/gcpriv.h
@@ -106,7 +106,7 @@ inline void FATAL_GC_ERROR()
#define MARK_ARRAY //Mark bit in an array
#endif //BACKGROUND_GC
-#if defined(BACKGROUND_GC) || defined (CARD_BUNDLE)
+#if defined(BACKGROUND_GC) || defined (CARD_BUNDLE) || defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP)
#define WRITE_WATCH //Write Watch feature
#endif //BACKGROUND_GC || CARD_BUNDLE
@@ -1648,6 +1648,12 @@ protected:
void rearrange_large_heap_segments();
PER_HEAP
void rearrange_heap_segments(BOOL compacting);
+
+ PER_HEAP_ISOLATED
+ void reset_write_watch_for_gc_heap(void* base_address, size_t region_size);
+ PER_HEAP_ISOLATED
+ void get_write_watch_for_gc_heap(bool reset, void *base_address, size_t region_size, void** dirty_pages, uintptr_t* dirty_page_count_ref, bool is_runtime_suspended);
+
PER_HEAP
void switch_one_quantum();
PER_HEAP
diff --git a/src/gc/sample/CMakeLists.txt b/src/gc/sample/CMakeLists.txt
index 45cdbb2b9d..f0ba28edc7 100644
--- a/src/gc/sample/CMakeLists.txt
+++ b/src/gc/sample/CMakeLists.txt
@@ -17,6 +17,7 @@ set(SOURCES
../handletablecore.cpp
../handletablescan.cpp
../objecthandle.cpp
+ ../softwarewritewatch.cpp
)
if(WIN32)
diff --git a/src/gc/sample/gcenv.ee.cpp b/src/gc/sample/gcenv.ee.cpp
index 7180165f86..3205900c91 100644
--- a/src/gc/sample/gcenv.ee.cpp
+++ b/src/gc/sample/gcenv.ee.cpp
@@ -241,11 +241,19 @@ bool IsGCSpecialThread()
return false;
}
-void StompWriteBarrierEphemeral()
+void StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */)
{
}
-void StompWriteBarrierResize(bool /*bReqUpperBoundsCheck*/)
+void StompWriteBarrierResize(bool /* isRuntimeSuspended */, bool /*bReqUpperBoundsCheck*/)
+{
+}
+
+void SwitchToWriteWatchBarrier()
+{
+}
+
+void SwitchToNonWriteWatchBarrier()
{
}
diff --git a/src/gc/sample/gcenv.h b/src/gc/sample/gcenv.h
index c4e8015392..1798cde7f6 100644
--- a/src/gc/sample/gcenv.h
+++ b/src/gc/sample/gcenv.h
@@ -79,6 +79,12 @@ public:
Thread * GetThread();
+inline BOOL IsSuspendEEThread()
+{
+ // TODO: Implement
+ return false;
+}
+
class ThreadStore
{
public:
diff --git a/src/gc/softwarewritewatch.cpp b/src/gc/softwarewritewatch.cpp
new file mode 100644
index 0000000000..bbd37ef94b
--- /dev/null
+++ b/src/gc/softwarewritewatch.cpp
@@ -0,0 +1,243 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "common.h"
+#include "softwarewritewatch.h"
+
+#include "../inc/static_assert.h"
+#include "gcenv.h"
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+#ifndef DACCESS_COMPILE
+
+static_assert_no_msg((static_cast<size_t>(1) << SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift) == OS_PAGE_SIZE);
+
+extern "C"
+{
+ uint8_t *g_sw_ww_table = nullptr;
+ bool g_sw_ww_enabled_for_gc_heap = false;
+}
+
+void SoftwareWriteWatch::StaticClose()
+{
+ if (GetTable() == nullptr)
+ {
+ return;
+ }
+
+ g_sw_ww_enabled_for_gc_heap = false;
+ g_sw_ww_table = nullptr;
+}
+
+bool SoftwareWriteWatch::GetDirtyFromBlock(
+ uint8_t *block,
+ uint8_t *firstPageAddressInBlock,
+ size_t startByteIndex,
+ size_t endByteIndex,
+ void **dirtyPages,
+ size_t *dirtyPageIndexRef,
+ size_t dirtyPageCount,
+ bool clearDirty)
+{
+ assert(block != nullptr);
+ assert(ALIGN_DOWN(block, sizeof(size_t)) == block);
+ assert(firstPageAddressInBlock == reinterpret_cast<uint8_t *>(GetPageAddress(block - GetTable())));
+ assert(startByteIndex < endByteIndex);
+ assert(endByteIndex <= sizeof(size_t));
+ assert(dirtyPages != nullptr);
+ assert(dirtyPageIndexRef != nullptr);
+
+ size_t &dirtyPageIndex = *dirtyPageIndexRef;
+ assert(dirtyPageIndex < dirtyPageCount);
+
+ size_t dirtyBytes = *reinterpret_cast<size_t *>(block);
+ if (dirtyBytes == 0)
+ {
+ return true;
+ }
+
+ if (startByteIndex != 0)
+ {
+ size_t numLowBitsToClear = startByteIndex * 8;
+ dirtyBytes >>= numLowBitsToClear;
+ dirtyBytes <<= numLowBitsToClear;
+ }
+ if (endByteIndex != sizeof(size_t))
+ {
+ size_t numHighBitsToClear = (sizeof(size_t) - endByteIndex) * 8;
+ dirtyBytes <<= numHighBitsToClear;
+ dirtyBytes >>= numHighBitsToClear;
+ }
+
+ while (dirtyBytes != 0)
+ {
+ DWORD bitIndex;
+ static_assert_no_msg(sizeof(size_t) <= 8);
+ if (sizeof(size_t) == 8)
+ {
+ BitScanForward64(&bitIndex, static_cast<DWORD64>(dirtyBytes));
+ }
+ else
+ {
+ BitScanForward(&bitIndex, static_cast<DWORD>(dirtyBytes));
+ }
+
+ // Each byte is only ever set to 0 or 0xff
+ assert(bitIndex % 8 == 0);
+ size_t byteMask = static_cast<size_t>(0xff) << bitIndex;
+ assert((dirtyBytes & byteMask) == byteMask);
+ dirtyBytes ^= byteMask;
+
+ DWORD byteIndex = bitIndex / 8;
+ if (clearDirty)
+ {
+ // Clear only the bytes for which pages are recorded as dirty
+ block[byteIndex] = 0;
+ }
+
+ void *pageAddress = firstPageAddressInBlock + byteIndex * OS_PAGE_SIZE;
+ assert(pageAddress >= GetHeapStartAddress());
+ assert(pageAddress < GetHeapEndAddress());
+ assert(dirtyPageIndex < dirtyPageCount);
+ dirtyPages[dirtyPageIndex] = pageAddress;
+ ++dirtyPageIndex;
+ if (dirtyPageIndex == dirtyPageCount)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+void SoftwareWriteWatch::GetDirty(
+ void *baseAddress,
+ size_t regionByteSize,
+ void **dirtyPages,
+ size_t *dirtyPageCountRef,
+ bool clearDirty,
+ bool isRuntimeSuspended)
+{
+ VerifyCreated();
+ VerifyMemoryRegion(baseAddress, regionByteSize);
+ assert(dirtyPages != nullptr);
+ assert(dirtyPageCountRef != nullptr);
+
+ size_t dirtyPageCount = *dirtyPageCountRef;
+ if (dirtyPageCount == 0)
+ {
+ return;
+ }
+
+ if (!isRuntimeSuspended)
+ {
+ // When a page is marked as dirty, a memory barrier is not issued after the write most of the time. Issue a memory
+ // barrier on all active threads of the process now to make recent changes to dirty state visible to this thread.
+ GCToOSInterface::FlushProcessWriteBuffers();
+ }
+
+ uint8_t *tableRegionStart;
+ size_t tableRegionByteSize;
+ TranslateToTableRegion(baseAddress, regionByteSize, &tableRegionStart, &tableRegionByteSize);
+ uint8_t *tableRegionEnd = tableRegionStart + tableRegionByteSize;
+
+ uint8_t *blockStart = ALIGN_DOWN(tableRegionStart, sizeof(size_t));
+ assert(blockStart >= GetUntranslatedTable());
+ uint8_t *blockEnd = ALIGN_UP(tableRegionEnd, sizeof(size_t));
+ assert(blockEnd <= GetUntranslatedTableEnd());
+ uint8_t *fullBlockEnd = ALIGN_DOWN(tableRegionEnd, sizeof(size_t));
+
+ size_t dirtyPageIndex = 0;
+ uint8_t *currentBlock = blockStart;
+ uint8_t *firstPageAddressInCurrentBlock = reinterpret_cast<uint8_t *>(GetPageAddress(currentBlock - GetTable()));
+
+ do
+ {
+ if (blockStart == fullBlockEnd)
+ {
+ if (GetDirtyFromBlock(
+ currentBlock,
+ firstPageAddressInCurrentBlock,
+ tableRegionStart - blockStart,
+ tableRegionEnd - fullBlockEnd,
+ dirtyPages,
+ &dirtyPageIndex,
+ dirtyPageCount,
+ clearDirty))
+ {
+ *dirtyPageCountRef = dirtyPageIndex;
+ }
+ break;
+ }
+
+ if (tableRegionStart != blockStart)
+ {
+ if (!GetDirtyFromBlock(
+ currentBlock,
+ firstPageAddressInCurrentBlock,
+ tableRegionStart - blockStart,
+ sizeof(size_t),
+ dirtyPages,
+ &dirtyPageIndex,
+ dirtyPageCount,
+ clearDirty))
+ {
+ break;
+ }
+ currentBlock += sizeof(size_t);
+ firstPageAddressInCurrentBlock += sizeof(size_t) * OS_PAGE_SIZE;
+ }
+
+ while (currentBlock < fullBlockEnd)
+ {
+ if (!GetDirtyFromBlock(
+ currentBlock,
+ firstPageAddressInCurrentBlock,
+ 0,
+ sizeof(size_t),
+ dirtyPages,
+ &dirtyPageIndex,
+ dirtyPageCount,
+ clearDirty))
+ {
+ break;
+ }
+ currentBlock += sizeof(size_t);
+ firstPageAddressInCurrentBlock += sizeof(size_t) * OS_PAGE_SIZE;
+ }
+ if (currentBlock < fullBlockEnd)
+ {
+ break;
+ }
+
+ if (tableRegionEnd != fullBlockEnd &&
+ !GetDirtyFromBlock(
+ currentBlock,
+ firstPageAddressInCurrentBlock,
+ 0,
+ tableRegionEnd - fullBlockEnd,
+ dirtyPages,
+ &dirtyPageIndex,
+ dirtyPageCount,
+ clearDirty))
+ {
+ break;
+ }
+
+ *dirtyPageCountRef = dirtyPageIndex;
+ } while (false);
+
+ if (!isRuntimeSuspended && clearDirty && dirtyPageIndex != 0)
+ {
+ // When dirtying a page, the dirty state of the page is first checked to see if the page is already dirty. If already
+ // dirty, the write to mark it as dirty is skipped. So, when the dirty state of a page is cleared, we need to make sure
+ // the cleared state is visible to other threads that may dirty the page, before marking through objects in the page, so
+ // that the GC will not miss marking through dirtied objects in the page. Issue a memory barrier on all active threads
+ // of the process now.
+ MemoryBarrier(); // flush writes from this thread first to guarantee ordering
+ GCToOSInterface::FlushProcessWriteBuffers();
+ }
+}
+
+#endif // !DACCESS_COMPILE
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
diff --git a/src/gc/softwarewritewatch.h b/src/gc/softwarewritewatch.h
new file mode 100644
index 0000000000..3c8491cecb
--- /dev/null
+++ b/src/gc/softwarewritewatch.h
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef __SOFTWARE_WRITE_WATCH_H__
+#define __SOFTWARE_WRITE_WATCH_H__
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+#ifndef DACCESS_COMPILE
+
+extern void SwitchToWriteWatchBarrier(bool isRuntimeSuspended);
+extern void SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended);
+
+#define SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift 0xc
+
+extern "C"
+{
+ // Table containing the dirty state. This table is translated to exclude the lowest address it represents, see
+ // TranslateTableToExcludeHeapStartAddress.
+ extern uint8_t *g_sw_ww_table;
+
+ // Write watch may be disabled when it is not needed (between GCs for instance). This indicates whether it is enabled.
+ extern bool g_sw_ww_enabled_for_gc_heap;
+
+ extern uint8_t *g_lowest_address; // start address of the GC heap
+ extern uint8_t *g_highest_address; // end address of the GC heap
+}
+
+class SoftwareWriteWatch
+{
+private:
+ // The granularity of dirty state in the table is one page. Dirtiness is tracked per byte of the table so that
+ // synchronization is not required when changing the dirty state. Shifting-right an address by the following value yields
+ // the byte index of the address into the write watch table. For instance,
+ // GetTable()[address >> AddressToTableByteIndexShift] is the byte that represents the region of memory for 'address'.
+ static const uint8_t AddressToTableByteIndexShift = SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift;
+
+private:
+ static void VerifyCreated();
+ static void VerifyMemoryRegion(void *baseAddress, size_t regionByteSize);
+ static void VerifyMemoryRegion(void *baseAddress, size_t regionByteSize, void *heapStartAddress, void *heapEndAddress);
+
+public:
+ static uint8_t *GetTable();
+private:
+ static uint8_t *GetUntranslatedTable();
+ static uint8_t *GetUntranslatedTable(uint8_t *table, void *heapStartAddress);
+ static uint8_t *GetUntranslatedTableEnd();
+ static uint8_t *GetUntranslatedTableEnd(uint8_t *table, void *heapEndAddress);
+public:
+ static void InitializeUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress);
+private:
+ static void SetUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress);
+public:
+ static void SetResizedUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress, void *heapEndAddress);
+ static bool IsEnabledForGCHeap();
+ static void EnableForGCHeap();
+ static void DisableForGCHeap();
+private:
+ static void *GetHeapStartAddress();
+ static void *GetHeapEndAddress();
+
+public:
+ static void StaticClose();
+
+private:
+ static size_t GetTableByteIndex(void *address);
+ static void *GetPageAddress(size_t tableByteIndex);
+public:
+ static size_t GetTableByteSize(void *heapStartAddress, void *heapEndAddress);
+ static size_t GetTableStartByteOffset(size_t byteSizeBeforeTable);
+private:
+ static uint8_t *TranslateTableToExcludeHeapStartAddress(uint8_t *table, void *heapStartAddress);
+ static void TranslateToTableRegion(void *baseAddress, size_t regionByteSize, uint8_t **tableBaseAddressRef, size_t *tableRegionByteSizeRef);
+
+public:
+ static void ClearDirty(void *baseAddress, size_t regionByteSize);
+ static void SetDirty(void *address, size_t writeByteSize);
+ static void SetDirtyRegion(void *baseAddress, size_t regionByteSize);
+private:
+ static bool GetDirtyFromBlock(uint8_t *block, uint8_t *firstPageAddressInBlock, size_t startByteIndex, size_t endByteIndex, void **dirtyPages, size_t *dirtyPageIndexRef, size_t dirtyPageCount, bool clearDirty);
+public:
+ static void GetDirty(void *baseAddress, size_t regionByteSize, void **dirtyPages, size_t *dirtyPageCountRef, bool clearDirty, bool isRuntimeSuspended);
+};
+
+inline void SoftwareWriteWatch::VerifyCreated()
+{
+ assert(GetTable() != nullptr);
+ assert(GetHeapStartAddress() != nullptr);
+ assert(GetHeapEndAddress() != nullptr);
+ assert(GetHeapStartAddress() < GetHeapEndAddress());
+}
+
+inline void SoftwareWriteWatch::VerifyMemoryRegion(void *baseAddress, size_t regionByteSize)
+{
+ VerifyMemoryRegion(baseAddress, regionByteSize, GetHeapStartAddress(), GetHeapEndAddress());
+}
+
+inline void SoftwareWriteWatch::VerifyMemoryRegion(
+ void *baseAddress,
+ size_t regionByteSize,
+ void *heapStartAddress,
+ void *heapEndAddress)
+{
+ VerifyCreated();
+ assert(baseAddress != nullptr);
+ assert(heapStartAddress != nullptr);
+ assert(heapStartAddress >= GetHeapStartAddress());
+ assert(heapEndAddress != nullptr);
+ assert(heapEndAddress <= GetHeapEndAddress());
+ assert(baseAddress >= heapStartAddress);
+ assert(baseAddress < heapEndAddress);
+ assert(regionByteSize != 0);
+ assert(regionByteSize <= reinterpret_cast<size_t>(heapEndAddress) - reinterpret_cast<size_t>(baseAddress));
+}
+
+inline uint8_t *SoftwareWriteWatch::GetTable()
+{
+ return g_sw_ww_table;
+}
+
+inline uint8_t *SoftwareWriteWatch::GetUntranslatedTable()
+{
+ VerifyCreated();
+ return GetUntranslatedTable(GetTable(), GetHeapStartAddress());
+}
+
+inline uint8_t *SoftwareWriteWatch::GetUntranslatedTable(uint8_t *table, void *heapStartAddress)
+{
+ assert(table != nullptr);
+ assert(heapStartAddress != nullptr);
+ assert(heapStartAddress >= GetHeapStartAddress());
+
+ uint8_t *untranslatedTable = table + GetTableByteIndex(heapStartAddress);
+ assert(ALIGN_DOWN(untranslatedTable, sizeof(size_t)) == untranslatedTable);
+ return untranslatedTable;
+}
+
+inline uint8_t *SoftwareWriteWatch::GetUntranslatedTableEnd()
+{
+ VerifyCreated();
+ return GetUntranslatedTableEnd(GetTable(), GetHeapEndAddress());
+}
+
+inline uint8_t *SoftwareWriteWatch::GetUntranslatedTableEnd(uint8_t *table, void *heapEndAddress)
+{
+ assert(table != nullptr);
+ assert(heapEndAddress != nullptr);
+ assert(heapEndAddress <= GetHeapEndAddress());
+
+ return ALIGN_UP(&table[GetTableByteIndex(reinterpret_cast<uint8_t *>(heapEndAddress) - 1) + 1], sizeof(size_t));
+}
+
+inline void SoftwareWriteWatch::InitializeUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress)
+{
+ assert(GetTable() == nullptr);
+ SetUntranslatedTable(untranslatedTable, heapStartAddress);
+}
+
+inline void SoftwareWriteWatch::SetUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress)
+{
+ assert(untranslatedTable != nullptr);
+ assert(ALIGN_DOWN(untranslatedTable, sizeof(size_t)) == untranslatedTable);
+ assert(heapStartAddress != nullptr);
+
+ g_sw_ww_table = TranslateTableToExcludeHeapStartAddress(untranslatedTable, heapStartAddress);
+}
+
+inline void SoftwareWriteWatch::SetResizedUntranslatedTable(
+ uint8_t *untranslatedTable,
+ void *heapStartAddress,
+ void *heapEndAddress)
+{
+ // The runtime needs to be suspended during this call, and background GC threads need to synchronize calls to ClearDirty()
+ // and GetDirty() such that they are not called concurrently with this function
+
+ VerifyCreated();
+ assert(untranslatedTable != nullptr);
+ assert(ALIGN_DOWN(untranslatedTable, sizeof(size_t)) == untranslatedTable);
+ assert(heapStartAddress != nullptr);
+ assert(heapEndAddress != nullptr);
+ assert(heapStartAddress <= GetHeapStartAddress());
+ assert(heapEndAddress >= GetHeapEndAddress());
+ assert(heapStartAddress < GetHeapStartAddress() || heapEndAddress > GetHeapEndAddress());
+
+ uint8_t *oldUntranslatedTable = GetUntranslatedTable();
+ void *oldTableHeapStartAddress = GetHeapStartAddress();
+ size_t oldTableByteSize = GetTableByteSize(oldTableHeapStartAddress, GetHeapEndAddress());
+ SetUntranslatedTable(untranslatedTable, heapStartAddress);
+
+ uint8_t *tableRegionStart = &GetTable()[GetTableByteIndex(oldTableHeapStartAddress)];
+ memcpy(tableRegionStart, oldUntranslatedTable, oldTableByteSize);
+}
+
+inline bool SoftwareWriteWatch::IsEnabledForGCHeap()
+{
+ return g_sw_ww_enabled_for_gc_heap;
+}
+
+inline void SoftwareWriteWatch::EnableForGCHeap()
+{
+ // The runtime needs to be suspended during this call. This is how it currently guarantees that GC heap writes from other
+ // threads between calls to EnableForGCHeap() and DisableForGCHeap() will be tracked.
+
+ VerifyCreated();
+ assert(!IsEnabledForGCHeap());
+
+ g_sw_ww_enabled_for_gc_heap = true;
+ SwitchToWriteWatchBarrier(true);
+}
+
+inline void SoftwareWriteWatch::DisableForGCHeap()
+{
+ // The runtime needs to be suspended during this call. This is how it currently guarantees that GC heap writes from other
+ // threads between calls to EnableForGCHeap() and DisableForGCHeap() will be tracked.
+
+ VerifyCreated();
+ assert(IsEnabledForGCHeap());
+
+ g_sw_ww_enabled_for_gc_heap = false;
+ SwitchToNonWriteWatchBarrier(true);
+}
+
+inline void *SoftwareWriteWatch::GetHeapStartAddress()
+{
+ return g_lowest_address;
+}
+
+inline void *SoftwareWriteWatch::GetHeapEndAddress()
+{
+ return g_highest_address;
+}
+
+inline size_t SoftwareWriteWatch::GetTableByteIndex(void *address)
+{
+ assert(address != nullptr);
+
+ size_t tableByteIndex = reinterpret_cast<size_t>(address) >> AddressToTableByteIndexShift;
+ assert(tableByteIndex != 0);
+ return tableByteIndex;
+}
+
+inline void *SoftwareWriteWatch::GetPageAddress(size_t tableByteIndex)
+{
+ assert(tableByteIndex != 0);
+
+ void *pageAddress = reinterpret_cast<void *>(tableByteIndex << AddressToTableByteIndexShift);
+ assert(pageAddress >= GetHeapStartAddress());
+ assert(pageAddress < GetHeapEndAddress());
+ assert(ALIGN_DOWN(pageAddress, OS_PAGE_SIZE) == pageAddress);
+ return pageAddress;
+}
+
+inline size_t SoftwareWriteWatch::GetTableByteSize(void *heapStartAddress, void *heapEndAddress)
+{
+ assert(heapStartAddress != nullptr);
+ assert(heapEndAddress != nullptr);
+ assert(heapStartAddress < heapEndAddress);
+
+ size_t tableByteSize =
+ GetTableByteIndex(reinterpret_cast<uint8_t *>(heapEndAddress) - 1) - GetTableByteIndex(heapStartAddress) + 1;
+ tableByteSize = ALIGN_UP(tableByteSize, sizeof(size_t));
+ return tableByteSize;
+}
+
+inline size_t SoftwareWriteWatch::GetTableStartByteOffset(size_t byteSizeBeforeTable)
+{
+ return ALIGN_UP(byteSizeBeforeTable, sizeof(size_t)); // start of the table needs to be aligned to size_t
+}
+
+inline uint8_t *SoftwareWriteWatch::TranslateTableToExcludeHeapStartAddress(uint8_t *table, void *heapStartAddress)
+{
+ assert(table != nullptr);
+ assert(heapStartAddress != nullptr);
+
+ // Exclude the table byte index corresponding to the heap start address from the table pointer, so that each lookup in the
+ // table by address does not have to calculate (address - heapStartAddress)
+ return table - GetTableByteIndex(heapStartAddress);
+}
+
+inline void SoftwareWriteWatch::TranslateToTableRegion(
+ void *baseAddress,
+ size_t regionByteSize,
+ uint8_t **tableBaseAddressRef,
+ size_t *tableRegionByteSizeRef)
+{
+ VerifyCreated();
+ VerifyMemoryRegion(baseAddress, regionByteSize);
+ assert(tableBaseAddressRef != nullptr);
+ assert(tableRegionByteSizeRef != nullptr);
+
+ size_t baseAddressTableByteIndex = GetTableByteIndex(baseAddress);
+ *tableBaseAddressRef = &GetTable()[baseAddressTableByteIndex];
+ *tableRegionByteSizeRef =
+ GetTableByteIndex(reinterpret_cast<uint8_t *>(baseAddress) + (regionByteSize - 1)) - baseAddressTableByteIndex + 1;
+}
+
+inline void SoftwareWriteWatch::ClearDirty(void *baseAddress, size_t regionByteSize)
+{
+ VerifyCreated();
+ VerifyMemoryRegion(baseAddress, regionByteSize);
+
+ uint8_t *tableBaseAddress;
+ size_t tableRegionByteSize;
+ TranslateToTableRegion(baseAddress, regionByteSize, &tableBaseAddress, &tableRegionByteSize);
+ memset(tableBaseAddress, 0, tableRegionByteSize);
+}
+
+inline void SoftwareWriteWatch::SetDirty(void *address, size_t writeByteSize)
+{
+ VerifyCreated();
+ VerifyMemoryRegion(address, writeByteSize);
+ assert(address != nullptr);
+ assert(writeByteSize <= sizeof(void *));
+
+ size_t tableByteIndex = GetTableByteIndex(address);
+ assert(GetTableByteIndex(reinterpret_cast<uint8_t *>(address) + (writeByteSize - 1)) == tableByteIndex);
+
+ uint8_t *tableByteAddress = &GetTable()[tableByteIndex];
+ if (*tableByteAddress == 0)
+ {
+ *tableByteAddress = 0xff;
+ }
+}
+
+inline void SoftwareWriteWatch::SetDirtyRegion(void *baseAddress, size_t regionByteSize)
+{
+ VerifyCreated();
+ VerifyMemoryRegion(baseAddress, regionByteSize);
+
+ uint8_t *tableBaseAddress;
+ size_t tableRegionByteSize;
+ TranslateToTableRegion(baseAddress, regionByteSize, &tableBaseAddress, &tableRegionByteSize);
+ memset(tableBaseAddress, ~0, tableRegionByteSize);
+}
+
+#endif // !DACCESS_COMPILE
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+#endif // !__SOFTWARE_WRITE_WATCH_H__
diff --git a/src/inc/stdmacros.h b/src/inc/stdmacros.h
index 6f27c211c5..ab77a2cd91 100644
--- a/src/inc/stdmacros.h
+++ b/src/inc/stdmacros.h
@@ -188,6 +188,12 @@ inline void* ALIGN_UP( void* val, size_t alignment )
return (void*) ALIGN_UP( (size_t)val, alignment );
}
+inline uint8_t* ALIGN_UP( uint8_t* val, size_t alignment )
+{
+ WRAPPER_NO_CONTRACT;
+
+ return (uint8_t*) ALIGN_UP( (size_t)val, alignment );
+}
inline size_t ALIGN_DOWN( size_t val, size_t alignment )
{
@@ -203,6 +209,11 @@ inline void* ALIGN_DOWN( void* val, size_t alignment )
WRAPPER_NO_CONTRACT;
return (void*) ALIGN_DOWN( (size_t)val, alignment );
}
+inline uint8_t* ALIGN_DOWN( uint8_t* val, size_t alignment )
+{
+ WRAPPER_NO_CONTRACT;
+ return (uint8_t*) ALIGN_DOWN( (size_t)val, alignment );
+}
inline BOOL IS_ALIGNED( size_t val, size_t alignment )
{
diff --git a/src/vm/CMakeLists.txt b/src/vm/CMakeLists.txt
index a2676b5385..13106a4a68 100644
--- a/src/vm/CMakeLists.txt
+++ b/src/vm/CMakeLists.txt
@@ -122,6 +122,7 @@ set(VM_SOURCES_DAC_AND_WKS_COMMON
securitydescriptorassembly.cpp
sigformat.cpp
siginfo.cpp
+ ../gc/softwarewritewatch.cpp
stackwalk.cpp
stublink.cpp
stubmgr.cpp
diff --git a/src/vm/amd64/JitHelpers_Fast.asm b/src/vm/amd64/JitHelpers_Fast.asm
index 8e39a6d39f..90185205af 100644
--- a/src/vm/amd64/JitHelpers_Fast.asm
+++ b/src/vm/amd64/JitHelpers_Fast.asm
@@ -27,6 +27,11 @@ EXTERN g_lowest_address:QWORD
EXTERN g_highest_address:QWORD
EXTERN g_card_table:QWORD
+ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+EXTERN g_sw_ww_table:QWORD
+EXTERN g_sw_ww_enabled_for_gc_heap:BYTE
+endif
+
ifdef WRITE_BARRIER_CHECK
; Those global variables are always defined, but should be 0 for Server GC
g_GCShadow TEXTEQU <?g_GCShadow@@3PEAEEA>
@@ -466,6 +471,67 @@ ifdef _DEBUG
jmp JIT_WriteBarrier_Debug
endif
+ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ ; JIT_WriteBarrier_WriteWatch_PostGrow64
+
+ ; Regarding patchable constants:
+ ; - 64-bit constants have to be loaded into a register
+ ; - The constants have to be aligned to 8 bytes so that they can be patched easily
+ ; - The constant loads have been located to minimize NOP padding required to align the constants
+ ; - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
+ ; non-volatile calling convention, this should be changed to use just one register.
+
+ ; Do the move into the GC . It is correct to take an AV here, the EH code
+ ; figures out that this came from a WriteBarrier and correctly maps it back
+ ; to the managed method which called the WriteBarrier (see setup in
+ ; InitializeExceptionHandling, vm\exceptionhandling.cpp).
+ mov [rcx], rdx
+
+ ; Update the write watch table if necessary
+ mov rax, rcx
+ mov r8, 0F0F0F0F0F0F0F0F0h
+ shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift
+ NOP_2_BYTE ; padding for alignment of constant
+ mov r9, 0F0F0F0F0F0F0F0F0h
+ add rax, r8
+ cmp byte ptr [rax], 0h
+ jne CheckCardTable
+ mov byte ptr [rax], 0FFh
+
+ NOP_3_BYTE ; padding for alignment of constant
+
+ ; Check the lower and upper ephemeral region bounds
+ CheckCardTable:
+ cmp rdx, r9
+ jb Exit
+
+ NOP_3_BYTE ; padding for alignment of constant
+
+ mov r8, 0F0F0F0F0F0F0F0F0h
+
+ cmp rdx, r8
+ jae Exit
+
+ nop ; padding for alignment of constant
+
+ mov rax, 0F0F0F0F0F0F0F0F0h
+
+ ; Touch the card table entry, if not already dirty.
+ shr rcx, 0Bh
+ cmp byte ptr [rcx + rax], 0FFh
+ jne UpdateCardTable
+ REPRET
+
+ UpdateCardTable:
+ mov byte ptr [rcx + rax], 0FFh
+ ret
+
+ align 16
+ Exit:
+ REPRET
+else
+ ; JIT_WriteBarrier_PostGrow64
+
; Do the move into the GC . It is correct to take an AV here, the EH code
; figures out that this came from a WriteBarrier and correctly maps it back
; to the managed method which called the WriteBarrier (see setup in
@@ -510,6 +576,8 @@ endif
align 16
Exit:
REPRET
+endif
+
; make sure this guy is bigger than any of the other guys
align 16
nop
@@ -577,7 +645,8 @@ LEAF_END JIT_PatchedCodeLast, _TEXT
; Entry:
; RDI - address of ref-field (assigned to)
; RSI - address of the data (source)
-; RCX can be trashed
+; RCX is trashed
+; RAX is trashed when FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP is defined
; Exit:
; RDI, RSI are incremented by SIZEOF(LPVOID)
LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
@@ -653,7 +722,20 @@ ifdef WRITE_BARRIER_CHECK
pop r10
endif
+ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ ; Update the write watch table if necessary
+ cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
+ je CheckCardTable
+ mov rax, rdi
+ shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift
+ add rax, qword ptr [g_sw_ww_table]
+ cmp byte ptr [rax], 0h
+ jne CheckCardTable
+ mov byte ptr [rax], 0FFh
+endif
+
; See if we can just quick out
+ CheckCardTable:
cmp rcx, [g_ephemeral_low]
jb Exit
cmp rcx, [g_ephemeral_high]
diff --git a/src/vm/amd64/JitHelpers_FastWriteBarriers.asm b/src/vm/amd64/JitHelpers_FastWriteBarriers.asm
index 17730142ed..07e985f94f 100644
--- a/src/vm/amd64/JitHelpers_FastWriteBarriers.asm
+++ b/src/vm/amd64/JitHelpers_FastWriteBarriers.asm
@@ -41,39 +41,6 @@ include asmconstants.inc
; (card table, ephemeral region ranges, etc) are naturally aligned since
; there are codepaths that will overwrite these values while the EE is running.
;
-LEAF_ENTRY JIT_WriteBarrier_PreGrow32, _TEXT
- align 4
- ; Do the move into the GC . It is correct to take an AV here, the EH code
- ; figures out that this came from a WriteBarrier and correctly maps it back
- ; to the managed method which called the WriteBarrier (see setup in
- ; InitializeExceptionHandling, vm\exceptionhandling.cpp).
- mov [rcx], rdx
-
- NOP_2_BYTE ; padding for alignment of constant
-
-PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_Lower
- cmp rdx, 0F0F0F0F0h
- jb Exit
-
- shr rcx, 0Bh
-PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Check
- cmp byte ptr [rcx + 0F0F0F0F0h], 0FFh
- jne UpdateCardTable
- REPRET
-
- nop ; padding for alignment of constant
-
-PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Update
- UpdateCardTable:
- mov byte ptr [rcx + 0F0F0F0F0h], 0FFh
- ret
-
- align 16
- Exit:
- REPRET
-LEAF_END_MARKED JIT_WriteBarrier_PreGrow32, _TEXT
-
-
LEAF_ENTRY JIT_WriteBarrier_PreGrow64, _TEXT
align 8
; Do the move into the GC . It is correct to take an AV here, the EH code
@@ -165,57 +132,107 @@ PATCH_LABEL JIT_WriteBarrier_PostGrow64_Patch_Label_CardTable
REPRET
LEAF_END_MARKED JIT_WriteBarrier_PostGrow64, _TEXT
-LEAF_ENTRY JIT_WriteBarrier_PostGrow32, _TEXT
- align 4
+
+ifdef FEATURE_SVR_GC
+
+LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT
+ align 8
+ ;
+ ; SVR GC has multiple heaps, so it cannot provide one single
+ ; ephemeral region to bounds check against, so we just skip the
+ ; bounds checking all together and do our card table update
+ ; unconditionally.
+ ;
+
; Do the move into the GC . It is correct to take an AV here, the EH code
; figures out that this came from a WriteBarrier and correctly maps it back
; to the managed method which called the WriteBarrier (see setup in
; InitializeExceptionHandling, vm\exceptionhandling.cpp).
mov [rcx], rdx
- NOP_2_BYTE ; padding for alignment of constant
+ NOP_3_BYTE ; padding for alignment of constant
- ; Check the lower and upper ephemeral region bounds
+PATCH_LABEL JIT_WriteBarrier_SVR64_PatchLabel_CardTable
+ mov rax, 0F0F0F0F0F0F0F0F0h
-PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_Lower
- cmp rdx, 0F0F0F0F0h
- jb Exit
+ shr rcx, 0Bh
- NOP_3_BYTE ; padding for alignment of constant
+ cmp byte ptr [rcx + rax], 0FFh
+ jne UpdateCardTable
+ REPRET
-PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_Upper
- cmp rdx, 0F0F0F0F0h
- jae Exit
+ UpdateCardTable:
+ mov byte ptr [rcx + rax], 0FFh
+ ret
+LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT
+
+endif
+
+
+ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT
+ align 8
+
+ ; Regarding patchable constants:
+ ; - 64-bit constants have to be loaded into a register
+ ; - The constants have to be aligned to 8 bytes so that they can be patched easily
+ ; - The constant loads have been located to minimize NOP padding required to align the constants
+ ; - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
+ ; non-volatile calling convention, this should be changed to use just one register.
+
+ ; Do the move into the GC . It is correct to take an AV here, the EH code
+ ; figures out that this came from a WriteBarrier and correctly maps it back
+ ; to the managed method which called the WriteBarrier (see setup in
+ ; InitializeExceptionHandling, vm\exceptionhandling.cpp).
+ mov [rcx], rdx
+
+ ; Update the write watch table if necessary
+ mov rax, rcx
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_WriteWatchTable
+ mov r8, 0F0F0F0F0F0F0F0F0h
+ shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift
+ NOP_2_BYTE ; padding for alignment of constant
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_Lower
+ mov r9, 0F0F0F0F0F0F0F0F0h
+ add rax, r8
+ cmp byte ptr [rax], 0h
+ jne CheckCardTable
+ mov byte ptr [rax], 0FFh
+
+ ; Check the lower ephemeral region bound.
+ CheckCardTable:
+ cmp rdx, r9
+ jb Exit
; Touch the card table entry, if not already dirty.
shr rcx, 0Bh
-
-PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_CheckCardTable
- cmp byte ptr [rcx + 0F0F0F0F0h], 0FFh
+ NOP_2_BYTE ; padding for alignment of constant
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardTable
+ mov rax, 0F0F0F0F0F0F0F0F0h
+ cmp byte ptr [rcx + rax], 0FFh
jne UpdateCardTable
REPRET
- nop ; padding for alignment of constant
-
-PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_UpdateCardTable
UpdateCardTable:
- mov byte ptr [rcx + 0F0F0F0F0h], 0FFh
+ mov byte ptr [rcx + rax], 0FFh
ret
align 16
Exit:
REPRET
-LEAF_END_MARKED JIT_WriteBarrier_PostGrow32, _TEXT
+LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT
-LEAF_ENTRY JIT_WriteBarrier_SVR32, _TEXT
- align 4
- ;
- ; SVR GC has multiple heaps, so it cannot provide one single
- ; ephemeral region to bounds check against, so we just skip the
- ; bounds checking all together and do our card table update
- ; unconditionally.
- ;
+LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT
+ align 8
+
+ ; Regarding patchable constants:
+ ; - 64-bit constants have to be loaded into a register
+ ; - The constants have to be aligned to 8 bytes so that they can be patched easily
+ ; - The constant loads have been located to minimize NOP padding required to align the constants
+ ; - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
+ ; non-volatile calling convention, this should be changed to use just one register.
; Do the move into the GC . It is correct to take an AV here, the EH code
; figures out that this came from a WriteBarrier and correctly maps it back
@@ -223,25 +240,67 @@ LEAF_ENTRY JIT_WriteBarrier_SVR32, _TEXT
; InitializeExceptionHandling, vm\exceptionhandling.cpp).
mov [rcx], rdx
- shr rcx, 0Bh
+ ; Update the write watch table if necessary
+ mov rax, rcx
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_WriteWatchTable
+ mov r8, 0F0F0F0F0F0F0F0F0h
+ shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift
+ NOP_2_BYTE ; padding for alignment of constant
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Lower
+ mov r9, 0F0F0F0F0F0F0F0F0h
+ add rax, r8
+ cmp byte ptr [rax], 0h
+ jne CheckCardTable
+ mov byte ptr [rax], 0FFh
NOP_3_BYTE ; padding for alignment of constant
-PATCH_LABEL JIT_WriteBarrier_SVR32_PatchLabel_CheckCardTable
- cmp byte ptr [rcx + 0F0F0F0F0h], 0FFh
- jne UpdateCardTable
- REPRET
+ ; Check the lower and upper ephemeral region bounds
+ CheckCardTable:
+ cmp rdx, r9
+ jb Exit
+
+ NOP_3_BYTE ; padding for alignment of constant
+
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Upper
+ mov r8, 0F0F0F0F0F0F0F0F0h
+
+ cmp rdx, r8
+ jae Exit
nop ; padding for alignment of constant
-PATCH_LABEL JIT_WriteBarrier_SVR32_PatchLabel_UpdateCardTable
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardTable
+ mov rax, 0F0F0F0F0F0F0F0F0h
+
+ ; Touch the card table entry, if not already dirty.
+ shr rcx, 0Bh
+ cmp byte ptr [rcx + rax], 0FFh
+ jne UpdateCardTable
+ REPRET
+
UpdateCardTable:
- mov byte ptr [rcx + 0F0F0F0F0h], 0FFh
+ mov byte ptr [rcx + rax], 0FFh
ret
-LEAF_END_MARKED JIT_WriteBarrier_SVR32, _TEXT
-LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT
+ align 16
+ Exit:
+ REPRET
+LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT
+
+
+ifdef FEATURE_SVR_GC
+
+LEAF_ENTRY JIT_WriteBarrier_WriteWatch_SVR64, _TEXT
align 8
+
+ ; Regarding patchable constants:
+ ; - 64-bit constants have to be loaded into a register
+ ; - The constants have to be aligned to 8 bytes so that they can be patched easily
+ ; - The constant loads have been located to minimize NOP padding required to align the constants
+ ; - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
+ ; non-volatile calling convention, this should be changed to use just one register.
+
;
; SVR GC has multiple heaps, so it cannot provide one single
; ephemeral region to bounds check against, so we just skip the
@@ -255,21 +314,32 @@ LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT
; InitializeExceptionHandling, vm\exceptionhandling.cpp).
mov [rcx], rdx
- NOP_3_BYTE ; padding for alignment of constant
-
-PATCH_LABEL JIT_WriteBarrier_SVR64_PatchLabel_CardTable
- mov rax, 0F0F0F0F0F0F0F0F0h
-
+ ; Update the write watch table if necessary
+ mov rax, rcx
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_WriteWatchTable
+ mov r8, 0F0F0F0F0F0F0F0F0h
+ shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift
+ NOP_2_BYTE ; padding for alignment of constant
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardTable
+ mov r9, 0F0F0F0F0F0F0F0F0h
+ add rax, r8
+ cmp byte ptr [rax], 0h
+ jne CheckCardTable
+ mov byte ptr [rax], 0FFh
+
+ CheckCardTable:
shr rcx, 0Bh
-
- cmp byte ptr [rcx + rax], 0FFh
+ cmp byte ptr [rcx + r9], 0FFh
jne UpdateCardTable
REPRET
UpdateCardTable:
- mov byte ptr [rcx + rax], 0FFh
+ mov byte ptr [rcx + r9], 0FFh
ret
-LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT
+LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_SVR64, _TEXT
- end
+endif
+endif
+
+ end
diff --git a/src/vm/amd64/JitHelpers_Slow.asm b/src/vm/amd64/JitHelpers_Slow.asm
index 51829cad42..64b9a82e61 100644
--- a/src/vm/amd64/JitHelpers_Slow.asm
+++ b/src/vm/amd64/JitHelpers_Slow.asm
@@ -28,6 +28,11 @@ EXTERN g_lowest_address:QWORD
EXTERN g_highest_address:QWORD
EXTERN g_card_table:QWORD
+ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+EXTERN g_sw_ww_table:QWORD
+EXTERN g_sw_ww_enabled_for_gc_heap:BYTE
+endif
+
ifdef WRITE_BARRIER_CHECK
; Those global variables are always defined, but should be 0 for Server GC
g_GCShadow TEXTEQU <?g_GCShadow@@3PEAEEA>
@@ -118,6 +123,19 @@ ifdef WRITE_BARRIER_CHECK
DoneShadow:
endif
+ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ ; Update the write watch table if necessary
+ cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
+ je CheckCardTable
+ mov r10, rcx
+ shr r10, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift
+ add r10, qword ptr [g_sw_ww_table]
+ cmp byte ptr [r10], 0h
+ jne CheckCardTable
+ mov byte ptr [r10], 0FFh
+endif
+
+ CheckCardTable:
; See if we can just quick out
cmp rax, [g_ephemeral_low]
jb Exit
diff --git a/src/vm/amd64/jithelpers_fast.S b/src/vm/amd64/jithelpers_fast.S
index 22f21bb8de..a0650759f6 100644
--- a/src/vm/amd64/jithelpers_fast.S
+++ b/src/vm/amd64/jithelpers_fast.S
@@ -10,6 +10,45 @@ LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
ret
LEAF_END JIT_PatchedCodeStart, _TEXT
+
+// There is an even more optimized version of these helpers possible which takes
+// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
+// that check (this is more significant in the JIT_WriteBarrier case).
+//
+// Additionally we can look into providing helpers which will take the src/dest from
+// specific registers (like x86) which _could_ (??) make for easier register allocation
+// for the JIT64, however it might lead to having to have some nasty code that treats
+// these guys really special like... :(.
+//
+// Version that does the move, checks whether or not it's in the GC and whether or not
+// it needs to have it's card updated
+//
+// void JIT_CheckedWriteBarrier(Object** dst, Object* src)
+LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
+
+ // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
+ // but if it isn't then it will just return.
+ //
+ // See if this is in GCHeap
+ PREPARE_EXTERNAL_VAR g_lowest_address, rax
+ cmp rdi, [rax]
+ // jb NotInHeap
+ .byte 0x72, 0x0e
+ PREPARE_EXTERNAL_VAR g_highest_address, rax
+ cmp rdi, [rax]
+ // jnb NotInHeap
+ .byte 0x73, 0x02
+
+ // call C_FUNC(JIT_WriteBarrier)
+ .byte 0xeb, 0x05
+
+ NotInHeap:
+ // See comment above about possible AV
+ mov [rdi], rsi
+ ret
+LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
+
+
// This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow
// or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_
// change at runtime as the GC changes. Initially it should simply be a copy of the
@@ -22,6 +61,71 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
jmp C_FUNC(JIT_WriteBarrier_Debug)
#endif
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ // JIT_WriteBarrier_WriteWatch_PostGrow64
+
+ // Regarding patchable constants:
+ // - 64-bit constants have to be loaded into a register
+ // - The constants have to be aligned to 8 bytes so that they can be patched easily
+ // - The constant loads have been located to minimize NOP padding required to align the constants
+ // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
+ // non-volatile calling convention, this should be changed to use just one register.
+
+ // Do the move into the GC . It is correct to take an AV here, the EH code
+ // figures out that this came from a WriteBarrier and correctly maps it back
+ // to the managed method which called the WriteBarrier (see setup in
+ // InitializeExceptionHandling, vm\exceptionhandling.cpp).
+ mov [rdi], rsi
+
+ // Update the write watch table if necessary
+ mov rax, rdi
+ movabs r10, 0xF0F0F0F0F0F0F0F0
+ shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift
+ NOP_2_BYTE // padding for alignment of constant
+ movabs r11, 0xF0F0F0F0F0F0F0F0
+ add rax, r10
+ cmp byte ptr [rax], 0h
+ .byte 0x75, 0x06
+ // jne CheckCardTable
+ mov byte ptr [rax], 0FFh
+
+ NOP_3_BYTE // padding for alignment of constant
+
+ // Check the lower and upper ephemeral region bounds
+ CheckCardTable:
+ cmp rsi, r11
+ .byte 0x72,0x3D
+ // jb Exit
+
+ NOP_3_BYTE // padding for alignment of constant
+
+ movabs r10, 0xF0F0F0F0F0F0F0F0
+
+ cmp rsi, r10
+ .byte 0x73,0x2B
+ // jae Exit
+
+ nop // padding for alignment of constant
+
+ movabs rax, 0xF0F0F0F0F0F0F0F0
+
+ // Touch the card table entry, if not already dirty.
+ shr rdi, 0Bh
+ cmp byte ptr [rdi + rax], 0FFh
+ .byte 0x75, 0x02
+ // jne UpdateCardTable
+ REPRET
+
+ UpdateCardTable:
+ mov byte ptr [rdi + rax], 0FFh
+ ret
+
+ .balign 16
+ Exit:
+ REPRET
+#else
+ // JIT_WriteBarrier_PostGrow64
+
// Do the move into the GC . It is correct to take an AV here, the EH code
// figures out that this came from a WriteBarrier and correctly maps it back
// to the managed method which called the WriteBarrier (see setup in
@@ -69,6 +173,8 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
.balign 16
Exit:
REPRET
+#endif
+
// make sure this guy is bigger than any of the other guys
.balign 16
nop
@@ -79,43 +185,6 @@ LEAF_ENTRY JIT_PatchedCodeLast, _TEXT
ret
LEAF_END JIT_PatchedCodeLast, _TEXT
-// There is an even more optimized version of these helpers possible which takes
-// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
-// that check (this is more significant in the JIT_WriteBarrier case).
-//
-// Additionally we can look into providing helpers which will take the src/dest from
-// specific registers (like x86) which _could_ (??) make for easier register allocation
-// for the JIT64, however it might lead to having to have some nasty code that treats
-// these guys really special like... :(.
-//
-// Version that does the move, checks whether or not it's in the GC and whether or not
-// it needs to have it's card updated
-//
-// void JIT_CheckedWriteBarrier(Object** dst, Object* src)
-LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
-
- // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
- // but if it isn't then it will just return.
- //
- // See if this is in GCHeap
- PREPARE_EXTERNAL_VAR g_lowest_address, rax
- cmp rdi, [rax]
- // jb NotInHeap
- .byte 0x72, 0x0e
- PREPARE_EXTERNAL_VAR g_highest_address, rax
- cmp rdi, [rax]
- // jnb NotInHeap
- .byte 0x73, 0x02
-
- // call C_FUNC(JIT_WriteBarrier)
- .byte 0xeb, 0x84
-
- NotInHeap:
- // See comment above about possible AV
- mov [rdi], rsi
- ret
-LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
-
// JIT_ByRefWriteBarrier has weird symantics, see usage in StubLinkerX86.cpp
//
// Entry:
@@ -128,7 +197,7 @@ LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
//
// RCX is trashed
// RAX is trashed
-// R10 is trashed on Debug build
+// R10 is trashed
// R11 is trashed on Debug build
// Exit:
// RDI, RSI are incremented by SIZEOF(LPVOID)
@@ -202,6 +271,21 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
DoneShadow_ByRefWriteBarrier:
#endif
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ // Update the write watch table if necessary
+ PREPARE_EXTERNAL_VAR g_sw_ww_enabled_for_gc_heap, rax
+ cmp byte ptr [rax], 0h
+ je CheckCardTable_ByRefWriteBarrier
+ mov rax, rdi
+ shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift
+ PREPARE_EXTERNAL_VAR g_sw_ww_table, r10
+ add rax, qword ptr [r10]
+ cmp byte ptr [rax], 0h
+ jne CheckCardTable_ByRefWriteBarrier
+ mov byte ptr [rax], 0FFh
+#endif
+
+ CheckCardTable_ByRefWriteBarrier:
// See if we can just quick out
PREPARE_EXTERNAL_VAR g_ephemeral_low, rax
cmp rcx, [rax]
diff --git a/src/vm/amd64/jithelpers_fastwritebarriers.S b/src/vm/amd64/jithelpers_fastwritebarriers.S
index f8d41cb88d..085f85bc8b 100644
--- a/src/vm/amd64/jithelpers_fastwritebarriers.S
+++ b/src/vm/amd64/jithelpers_fastwritebarriers.S
@@ -5,39 +5,6 @@
.intel_syntax noprefix
#include "unixasmmacros.inc"
- .balign 4
-LEAF_ENTRY JIT_WriteBarrier_PreGrow32, _TEXT
- // Do the move into the GC . It is correct to take an AV here, the EH code
- // figures out that this came from a WriteBarrier and correctly maps it back
- // to the managed method which called the WriteBarrier (see setup in
- // InitializeExceptionHandling, vm\exceptionhandling.cpp).
- mov [rdi], rsi
-
- NOP_2_BYTE // padding for alignment of constant
-
-PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_Lower
- cmp rsi, -0F0F0F10h // 0F0F0F0F0h
- .byte 0x72, 0x22
- // jb Exit_PreGrow32
-
- shr rdi, 0Bh
-PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Check
- cmp byte ptr [rdi + 0F0F0F0F0h], 0FFh
- .byte 0x75, 0x03
- // jne UpdateCardTable_PreGrow32
- REPRET
-
- nop // padding for alignment of constant
-
-PATCH_LABEL JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Update
- UpdateCardTable_PreGrow32:
- mov byte ptr [rdi + 0F0F0F0F0h], 0FFh
- ret
-
- .balign 16
- Exit_PreGrow32:
- REPRET
-LEAF_END_MARKED JIT_WriteBarrier_PreGrow32, _TEXT
.balign 8
LEAF_ENTRY JIT_WriteBarrier_PreGrow64, _TEXT
@@ -80,6 +47,7 @@ PATCH_LABEL JIT_WriteBarrier_PreGrow64_Patch_Label_CardTable
REPRET
LEAF_END_MARKED JIT_WriteBarrier_PreGrow64, _TEXT
+
.balign 8
// See comments for JIT_WriteBarrier_PreGrow (above).
LEAF_ENTRY JIT_WriteBarrier_PostGrow64, _TEXT
@@ -134,60 +102,109 @@ PATCH_LABEL JIT_WriteBarrier_PostGrow64_Patch_Label_CardTable
REPRET
LEAF_END_MARKED JIT_WriteBarrier_PostGrow64, _TEXT
- .balign 4
-LEAF_ENTRY JIT_WriteBarrier_PostGrow32, _TEXT
+
+#ifdef FEATURE_SVR_GC
+
+ .balign 8
+LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT
+ //
+ // SVR GC has multiple heaps, so it cannot provide one single
+ // ephemeral region to bounds check against, so we just skip the
+ // bounds checking all together and do our card table update
+ // unconditionally.
+ //
+
// Do the move into the GC . It is correct to take an AV here, the EH code
// figures out that this came from a WriteBarrier and correctly maps it back
// to the managed method which called the WriteBarrier (see setup in
// InitializeExceptionHandling, vm\exceptionhandling.cpp).
mov [rdi], rsi
- NOP_2_BYTE // padding for alignment of constant
+ NOP_3_BYTE // padding for alignment of constant
- // Check the lower and upper ephemeral region bounds
+PATCH_LABEL JIT_WriteBarrier_SVR64_PatchLabel_CardTable
+ movabs rax, 0xF0F0F0F0F0F0F0F0
-PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_Lower
- cmp rsi, -0F0F0F10h // 0F0F0F0F0h
- .byte 0x72, 0x2e
- // jb Exit_PostGrow32
+ shr rdi, 0Bh
- NOP_3_BYTE // padding for alignment of constant
+ cmp byte ptr [rdi + rax], 0FFh
+ .byte 0x75, 0x02
+ // jne UpdateCardTable_SVR64
+ REPRET
-PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_Upper
- cmp rsi, -0F0F0F10h // 0F0F0F0F0h
- .byte 0x73, 0x22
- // jae Exit_PostGrow32
+ UpdateCardTable_SVR64:
+ mov byte ptr [rdi + rax], 0FFh
+ ret
+LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT
- // Touch the card table entry, if not already dirty.
- shr rdi, 0Bh
+#endif
+
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+ .balign 8
+LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT
+ // Regarding patchable constants:
+ // - 64-bit constants have to be loaded into a register
+ // - The constants have to be aligned to 8 bytes so that they can be patched easily
+ // - The constant loads have been located to minimize NOP padding required to align the constants
+ // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
+ // non-volatile calling convention, this should be changed to use just one register.
+
+ // Do the move into the GC . It is correct to take an AV here, the EH code
+ // figures out that this came from a WriteBarrier and correctly maps it back
+ // to the managed method which called the WriteBarrier (see setup in
+ // InitializeExceptionHandling, vm\exceptionhandling.cpp).
+ mov [rdi], rsi
-PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_CheckCardTable
- cmp byte ptr [rdi + 0F0F0F0F0h], 0FFh
+ // Update the write watch table if necessary
+ mov rax, rdi
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_WriteWatchTable
+ movabs r10, 0xF0F0F0F0F0F0F0F0
+ shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift
+ NOP_2_BYTE // padding for alignment of constant
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_Lower
+ movabs r11, 0xF0F0F0F0F0F0F0F0
+ add rax, r10
+ cmp byte ptr [rax], 0h
.byte 0x75, 0x03
- // jne UpdateCardTable_PostGrow32
- REPRET
+ // jne CheckCardTable_WriteWatch_PreGrow64
+ mov byte ptr [rax], 0FFh
- nop // padding for alignment of constant
+ CheckCardTable_WriteWatch_PreGrow64:
+ // Check the lower ephemeral region bound.
+ cmp rsi, r11
+ .byte 0x72, 0x20
+ // jb Exit_WriteWatch_PreGrow64
-PATCH_LABEL JIT_WriteBarrier_PostGrow32_PatchLabel_UpdateCardTable
- UpdateCardTable_PostGrow32:
- mov byte ptr [rdi + 0F0F0F0F0h], 0FFh
+ // Touch the card table entry, if not already dirty.
+ shr rdi, 0Bh
+ NOP_2_BYTE // padding for alignment of constant
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardTable
+ movabs rax, 0xF0F0F0F0F0F0F0F0
+ cmp byte ptr [rdi + rax], 0FFh
+ .byte 0x75, 0x02
+ // jne UpdateCardTable_WriteWatch_PreGrow64
+ REPRET
+
+ UpdateCardTable_WriteWatch_PreGrow64:
+ mov byte ptr [rdi + rax], 0FFh
ret
.balign 16
- Exit_PostGrow32:
+ Exit_WriteWatch_PreGrow64:
REPRET
-LEAF_END_MARKED JIT_WriteBarrier_PostGrow32, _TEXT
+LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT
- .balign 4
-LEAF_ENTRY JIT_WriteBarrier_SVR32, _TEXT
- //
- // SVR GC has multiple heaps, so it cannot provide one single
- // ephemeral region to bounds check against, so we just skip the
- // bounds checking all together and do our card table update
- // unconditionally.
- //
+ .balign 8
+LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT
+ // Regarding patchable constants:
+ // - 64-bit constants have to be loaded into a register
+ // - The constants have to be aligned to 8 bytes so that they can be patched easily
+ // - The constant loads have been located to minimize NOP padding required to align the constants
+ // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
+ // non-volatile calling convention, this should be changed to use just one register.
// Do the move into the GC . It is correct to take an AV here, the EH code
// figures out that this came from a WriteBarrier and correctly maps it back
@@ -195,26 +212,70 @@ LEAF_ENTRY JIT_WriteBarrier_SVR32, _TEXT
// InitializeExceptionHandling, vm\exceptionhandling.cpp).
mov [rdi], rsi
- shr rdi, 0Bh
+ // Update the write watch table if necessary
+ mov rax, rdi
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_WriteWatchTable
+ movabs r10, 0xF0F0F0F0F0F0F0F0
+ shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift
+ NOP_2_BYTE // padding for alignment of constant
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Lower
+ movabs r11, 0xF0F0F0F0F0F0F0F0
+ add rax, r10
+ cmp byte ptr [rax], 0h
+ .byte 0x75, 0x06
+ // jne CheckCardTable_WriteWatch_PostGrow64
+ mov byte ptr [rax], 0FFh
NOP_3_BYTE // padding for alignment of constant
-PATCH_LABEL JIT_WriteBarrier_SVR32_PatchLabel_CheckCardTable
- cmp byte ptr [rdi + 0F0F0F0F0h], 0FFh
- .byte 0x75, 0x03
- // jne UpdateCardTable_SVR32
- REPRET
+ // Check the lower and upper ephemeral region bounds
+ CheckCardTable_WriteWatch_PostGrow64:
+ cmp rsi, r11
+ .byte 0x72, 0x3d
+ // jb Exit_WriteWatch_PostGrow64
+
+ NOP_3_BYTE // padding for alignment of constant
+
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Upper
+ movabs r10, 0xF0F0F0F0F0F0F0F0
+
+ cmp rsi, r10
+ .byte 0x73, 0x2b
+ // jae Exit_WriteWatch_PostGrow64
nop // padding for alignment of constant
-PATCH_LABEL JIT_WriteBarrier_SVR32_PatchLabel_UpdateCardTable
- UpdateCardTable_SVR32:
- mov byte ptr [rdi + 0F0F0F0F0h], 0FFh
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardTable
+ movabs rax, 0xF0F0F0F0F0F0F0F0
+
+ // Touch the card table entry, if not already dirty.
+ shr rdi, 0Bh
+ cmp byte ptr [rdi + rax], 0FFh
+ .byte 0x75, 0x02
+ // jne UpdateCardTable_WriteWatch_PostGrow64
+ REPRET
+
+ UpdateCardTable_WriteWatch_PostGrow64:
+ mov byte ptr [rdi + rax], 0FFh
ret
-LEAF_END_MARKED JIT_WriteBarrier_SVR32, _TEXT
+
+ .balign 16
+ Exit_WriteWatch_PostGrow64:
+ REPRET
+LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT
+
+
+#ifdef FEATURE_SVR_GC
.balign 8
-LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT
+LEAF_ENTRY JIT_WriteBarrier_WriteWatch_SVR64, _TEXT
+ // Regarding patchable constants:
+ // - 64-bit constants have to be loaded into a register
+ // - The constants have to be aligned to 8 bytes so that they can be patched easily
+ // - The constant loads have been located to minimize NOP padding required to align the constants
+ // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
+ // non-volatile calling convention, this should be changed to use just one register.
+
//
// SVR GC has multiple heaps, so it cannot provide one single
// ephemeral region to bounds check against, so we just skip the
@@ -228,19 +289,31 @@ LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT
// InitializeExceptionHandling, vm\exceptionhandling.cpp).
mov [rdi], rsi
- NOP_3_BYTE // padding for alignment of constant
-
-PATCH_LABEL JIT_WriteBarrier_SVR64_PatchLabel_CardTable
- movabs rax, 0xF0F0F0F0F0F0F0F0
+ // Update the write watch table if necessary
+ mov rax, rdi
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_WriteWatchTable
+ movabs r10, 0xF0F0F0F0F0F0F0F0
+ shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift
+ NOP_2_BYTE // padding for alignment of constant
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardTable
+ movabs r11, 0xF0F0F0F0F0F0F0F0
+ add rax, r10
+ cmp byte ptr [rax], 0h
+ .byte 0x75, 0x03
+ // jne CheckCardTable_WriteWatch_SVR64
+ mov byte ptr [rax], 0FFh
+ CheckCardTable_WriteWatch_SVR64:
shr rdi, 0Bh
-
- cmp byte ptr [rdi + rax], 0FFh
+ cmp byte ptr [rdi + r11], 0FFh
.byte 0x75, 0x02
- // jne UpdateCardTable_SVR64
+ // jne UpdateCardTable_WriteWatch_SVR64
REPRET
- UpdateCardTable_SVR64:
- mov byte ptr [rdi + rax], 0FFh
+ UpdateCardTable_WriteWatch_SVR64:
+ mov byte ptr [rdi + r11], 0FFh
ret
-LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT
+LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_SVR64, _TEXT
+
+#endif
+#endif
diff --git a/src/vm/amd64/jithelpers_slow.S b/src/vm/amd64/jithelpers_slow.S
index 4d18e4356c..6c8d9077b8 100644
--- a/src/vm/amd64/jithelpers_slow.S
+++ b/src/vm/amd64/jithelpers_slow.S
@@ -68,6 +68,21 @@ LEAF_ENTRY JIT_WriteBarrier_Debug, _TEXT
DoneShadow:
#endif
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ // Update the write watch table if necessary
+ PREPARE_EXTERNAL_VAR g_sw_ww_enabled_for_gc_heap, r10
+ cmp byte ptr [r10], 0h
+ je CheckCardTable_Debug
+ mov r10, rdi
+ shr r10, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift
+ PREPARE_EXTERNAL_VAR g_sw_ww_table, r11
+ add r10, qword ptr [r11]
+ cmp byte ptr [r10], 0h
+ jne CheckCardTable_Debug
+ mov byte ptr [r10], 0FFh
+#endif
+
+ CheckCardTable_Debug:
// See if we can just quick out
PREPARE_EXTERNAL_VAR g_ephemeral_low, r10
cmp rax, [r10]
diff --git a/src/vm/amd64/jitinterfaceamd64.cpp b/src/vm/amd64/jitinterfaceamd64.cpp
index cfcca1d372..39c2e05c2f 100644
--- a/src/vm/amd64/jitinterfaceamd64.cpp
+++ b/src/vm/amd64/jitinterfaceamd64.cpp
@@ -16,6 +16,7 @@
#include "eeconfig.h"
#include "excep.h"
#include "threadsuspend.h"
+#include "../../gc/softwarewritewatch.h"
extern uint8_t* g_ephemeral_low;
extern uint8_t* g_ephemeral_high;
@@ -24,24 +25,11 @@ extern uint32_t* g_card_table;
// Patch Labels for the various write barriers
EXTERN_C void JIT_WriteBarrier_End();
-EXTERN_C void JIT_WriteBarrier_PreGrow32(Object **dst, Object *ref);
-EXTERN_C void JIT_WriteBarrier_PreGrow32_PatchLabel_Lower();
-EXTERN_C void JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Check();
-EXTERN_C void JIT_WriteBarrier_PreGrow32_PatchLabel_CardTable_Update();
-EXTERN_C void JIT_WriteBarrier_PreGrow32_End();
-
EXTERN_C void JIT_WriteBarrier_PreGrow64(Object **dst, Object *ref);
EXTERN_C void JIT_WriteBarrier_PreGrow64_Patch_Label_Lower();
EXTERN_C void JIT_WriteBarrier_PreGrow64_Patch_Label_CardTable();
EXTERN_C void JIT_WriteBarrier_PreGrow64_End();
-EXTERN_C void JIT_WriteBarrier_PostGrow32(Object **dst, Object *ref);
-EXTERN_C void JIT_WriteBarrier_PostGrow32_PatchLabel_Lower();
-EXTERN_C void JIT_WriteBarrier_PostGrow32_PatchLabel_Upper();
-EXTERN_C void JIT_WriteBarrier_PostGrow32_PatchLabel_CheckCardTable();
-EXTERN_C void JIT_WriteBarrier_PostGrow32_PatchLabel_UpdateCardTable();
-EXTERN_C void JIT_WriteBarrier_PostGrow32_End();
-
EXTERN_C void JIT_WriteBarrier_PostGrow64(Object **dst, Object *ref);
EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_Lower();
EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_Upper();
@@ -49,15 +37,32 @@ EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_CardTable();
EXTERN_C void JIT_WriteBarrier_PostGrow64_End();
#ifdef FEATURE_SVR_GC
-EXTERN_C void JIT_WriteBarrier_SVR32(Object **dst, Object *ref);
-EXTERN_C void JIT_WriteBarrier_SVR32_PatchLabel_CheckCardTable();
-EXTERN_C void JIT_WriteBarrier_SVR32_PatchLabel_UpdateCardTable();
-EXTERN_C void JIT_WriteBarrier_SVR32_End();
-
EXTERN_C void JIT_WriteBarrier_SVR64(Object **dst, Object *ref);
EXTERN_C void JIT_WriteBarrier_SVR64_PatchLabel_CardTable();
EXTERN_C void JIT_WriteBarrier_SVR64_End();
-#endif
+#endif // FEATURE_SVR_GC
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64(Object **dst, Object *ref);
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_WriteWatchTable();
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_Lower();
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardTable();
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_End();
+
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64(Object **dst, Object *ref);
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_WriteWatchTable();
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Lower();
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Upper();
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardTable();
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_End();
+
+#ifdef FEATURE_SVR_GC
+EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64(Object **dst, Object *ref);
+EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_WriteWatchTable();
+EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardTable();
+EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_End();
+#endif // FEATURE_SVR_GC
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
WriteBarrierManager g_WriteBarrierManager;
@@ -90,28 +95,13 @@ void WriteBarrierManager::Validate()
// are places where these values are updated while the EE is running
// NOTE: we can't call this from the ctor since our infrastructure isn't ready for assert dialogs
- PBYTE pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_Lower, 3);
- PBYTE pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_CardTable_Check, 2);
- PBYTE pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_CardTable_Update, 2);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x3) == 0);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x3) == 0);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate2) & 0x3) == 0);
+ PBYTE pLowerBoundImmediate, pUpperBoundImmediate, pCardTableImmediate;
pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_Lower, 2);
pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardTable, 2);
_ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x7) == 0);
_ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
- PBYTE pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_Upper, 3);
- pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_Lower, 3);
- pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_CheckCardTable, 2);
- pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_UpdateCardTable, 2);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pUpperBoundImmediate) & 0x3) == 0);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x3) == 0);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x3) == 0);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate2) & 0x3) == 0);
-
-
pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Lower, 2);
pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Upper, 2);
pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardTable, 2);
@@ -120,14 +110,36 @@ void WriteBarrierManager::Validate()
_ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
#ifdef FEATURE_SVR_GC
- pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR32, PatchLabel_CheckCardTable, 2);
- pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR32, PatchLabel_UpdateCardTable, 2);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x3) == 0);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate2) & 0x3) == 0);
-
pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2);
_ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
-#endif
+#endif // FEATURE_SVR_GC
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ PBYTE pWriteWatchTableImmediate;
+
+ pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_WriteWatchTable, 2);
+ pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_Lower, 2);
+ pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_CardTable, 2);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pWriteWatchTableImmediate) & 0x7) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x7) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
+
+ pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_WriteWatchTable, 2);
+ pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Lower, 2);
+ pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Upper, 2);
+ pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_CardTable, 2);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pWriteWatchTableImmediate) & 0x7) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x7) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pUpperBoundImmediate) & 0x7) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
+
+#ifdef FEATURE_SVR_GC
+ pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_WriteWatchTable, 2);
+ pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_CardTable, 2);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pWriteWatchTableImmediate) & 0x7) == 0);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
+#endif // FEATURE_SVR_GC
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
}
#endif // CODECOVERAGE
@@ -139,20 +151,24 @@ PCODE WriteBarrierManager::GetCurrentWriteBarrierCode()
switch (m_currentWriteBarrier)
{
- case WRITE_BARRIER_PREGROW32:
- return GetEEFuncEntryPoint(JIT_WriteBarrier_PreGrow32);
case WRITE_BARRIER_PREGROW64:
return GetEEFuncEntryPoint(JIT_WriteBarrier_PreGrow64);
- case WRITE_BARRIER_POSTGROW32:
- return GetEEFuncEntryPoint(JIT_WriteBarrier_PostGrow32);
case WRITE_BARRIER_POSTGROW64:
return GetEEFuncEntryPoint(JIT_WriteBarrier_PostGrow64);
#ifdef FEATURE_SVR_GC
- case WRITE_BARRIER_SVR32:
- return GetEEFuncEntryPoint(JIT_WriteBarrier_SVR32);
case WRITE_BARRIER_SVR64:
return GetEEFuncEntryPoint(JIT_WriteBarrier_SVR64);
-#endif
+#endif // FEATURE_SVR_GC
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ case WRITE_BARRIER_WRITE_WATCH_PREGROW64:
+ return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_PreGrow64);
+ case WRITE_BARRIER_WRITE_WATCH_POSTGROW64:
+ return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_PostGrow64);
+#ifdef FEATURE_SVR_GC
+ case WRITE_BARRIER_WRITE_WATCH_SVR64:
+ return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_SVR64);
+#endif // FEATURE_SVR_GC
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
default:
UNREACHABLE_MSG("unexpected m_currentWriteBarrier!");
};
@@ -167,20 +183,24 @@ size_t WriteBarrierManager::GetSpecificWriteBarrierSize(WriteBarrierType writeBa
switch (writeBarrier)
{
- case WRITE_BARRIER_PREGROW32:
- return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_PreGrow32);
case WRITE_BARRIER_PREGROW64:
return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_PreGrow64);
- case WRITE_BARRIER_POSTGROW32:
- return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_PostGrow32);
case WRITE_BARRIER_POSTGROW64:
return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_PostGrow64);
#ifdef FEATURE_SVR_GC
- case WRITE_BARRIER_SVR32:
- return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_SVR32);
case WRITE_BARRIER_SVR64:
return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_SVR64);
-#endif
+#endif // FEATURE_SVR_GC
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ case WRITE_BARRIER_WRITE_WATCH_PREGROW64:
+ return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_PreGrow64);
+ case WRITE_BARRIER_WRITE_WATCH_POSTGROW64:
+ return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_PostGrow64);
+#ifdef FEATURE_SVR_GC
+ case WRITE_BARRIER_WRITE_WATCH_SVR64:
+ return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_SVR64);
+#endif // FEATURE_SVR_GC
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
case WRITE_BARRIER_BUFFER:
return MARKED_FUNCTION_SIZE(JIT_WriteBarrier);
default:
@@ -202,38 +222,25 @@ PBYTE WriteBarrierManager::CalculatePatchLocation(LPVOID base, LPVOID label, int
return ((LPBYTE)GetEEFuncEntryPoint(JIT_WriteBarrier) + ((LPBYTE)GetEEFuncEntryPoint(label) - (LPBYTE)GetEEFuncEntryPoint(base) + offset));
}
-void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier)
-{
- GCX_MAYBE_COOP_NO_THREAD_BROKEN((GetThread() != NULL));
- BOOL bEESuspended = FALSE;
- if(m_currentWriteBarrier != WRITE_BARRIER_UNINITIALIZED && !IsGCThread())
+void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, bool isRuntimeSuspended)
+{
+ GCX_MAYBE_COOP_NO_THREAD_BROKEN((!isRuntimeSuspended && GetThread() != NULL));
+ BOOL bEESuspendedHere = FALSE;
+ if(!isRuntimeSuspended && m_currentWriteBarrier != WRITE_BARRIER_UNINITIALIZED)
{
ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_FOR_GC_PREP);
- bEESuspended = TRUE;
- }
-
+ bEESuspendedHere = TRUE;
+ }
+
_ASSERTE(m_currentWriteBarrier != newWriteBarrier);
m_currentWriteBarrier = newWriteBarrier;
-
+
// the memcpy must come before the switch statment because the asserts inside the switch
// are actually looking into the JIT_WriteBarrier buffer
memcpy((PVOID)JIT_WriteBarrier, (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize());
switch (newWriteBarrier)
{
- case WRITE_BARRIER_PREGROW32:
- {
- m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_Lower, 3);
- m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_CardTable_Check, 2);
- m_pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow32, PatchLabel_CardTable_Update, 2);
-
- // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0).
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pLowerBoundImmediate);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate2);
- break;
- }
-
case WRITE_BARRIER_PREGROW64:
{
m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_Lower, 2);
@@ -244,22 +251,7 @@ void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier)
_ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate);
break;
}
-
- case WRITE_BARRIER_POSTGROW32:
- {
- m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_Upper, 3);
- m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_Lower, 3);
- m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_CheckCardTable, 2);
- m_pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow32, PatchLabel_UpdateCardTable, 2);
- // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0).
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pUpperBoundImmediate);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pLowerBoundImmediate);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate2);
- break;
- }
-
case WRITE_BARRIER_POSTGROW64:
{
m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Lower, 2);
@@ -274,35 +266,67 @@ void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier)
}
#ifdef FEATURE_SVR_GC
- case WRITE_BARRIER_SVR32:
+ case WRITE_BARRIER_SVR64:
{
- m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR32, PatchLabel_CheckCardTable, 2);
- m_pCardTableImmediate2 = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR32, PatchLabel_UpdateCardTable, 2);
+ m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2);
// Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0).
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0 == *(DWORD*)m_pCardTableImmediate2);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate);
+ break;
+ }
+#endif // FEATURE_SVR_GC
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ case WRITE_BARRIER_WRITE_WATCH_PREGROW64:
+ {
+ m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_WriteWatchTable, 2);
+ m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_Lower, 2);
+ m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_CardTable, 2);
+
+ // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0).
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate);
break;
}
- case WRITE_BARRIER_SVR64:
+ case WRITE_BARRIER_WRITE_WATCH_POSTGROW64:
{
- m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2);
+ m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_WriteWatchTable, 2);
+ m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Lower, 2);
+ m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Upper, 2);
+ m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_CardTable, 2);
// Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0).
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate);
_ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate);
- break;
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate);
+ break;
}
-#endif
+
+#ifdef FEATURE_SVR_GC
+ case WRITE_BARRIER_WRITE_WATCH_SVR64:
+ {
+ m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_WriteWatchTable, 2);
+ m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_CardTable, 2);
+
+ // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0).
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate);
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate);
+ break;
+ }
+#endif // FEATURE_SVR_GC
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
default:
UNREACHABLE_MSG("unexpected write barrier type!");
}
- UpdateEphemeralBounds();
- UpdateCardTableLocation(FALSE);
+ UpdateEphemeralBounds(true);
+ UpdateWriteWatchAndCardTableLocations(true, false);
- if(bEESuspended)
+ if(bEESuspendedHere)
{
ThreadSuspend::RestartEE(FALSE, TRUE);
}
@@ -325,21 +349,25 @@ void WriteBarrierManager::Initialize()
// write barrier implementations.
size_t cbWriteBarrierBuffer = GetSpecificWriteBarrierSize(WRITE_BARRIER_BUFFER);
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_PREGROW32));
_ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_PREGROW64));
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_POSTGROW32));
_ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_POSTGROW64));
#ifdef FEATURE_SVR_GC
- _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_SVR32));
_ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_SVR64));
-#endif
+#endif // FEATURE_SVR_GC
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_PREGROW64));
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_POSTGROW64));
+#ifdef FEATURE_SVR_GC
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_SVR64));
+#endif // FEATURE_SVR_GC
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
#if !defined(CODECOVERAGE)
Validate();
#endif
}
-bool WriteBarrierManager::NeedDifferentWriteBarrier(BOOL bReqUpperBoundsCheck, WriteBarrierType* pNewWriteBarrierType)
+bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, WriteBarrierType* pNewWriteBarrierType)
{
// Init code for the JIT_WriteBarrier assembly routine. Since it will be bashed everytime the GC Heap
// changes size, we want to do most of the work just once.
@@ -362,22 +390,9 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(BOOL bReqUpperBoundsCheck, W
}
#endif
- writeBarrierType = GCHeap::IsServerHeap() ? WRITE_BARRIER_SVR32 : WRITE_BARRIER_PREGROW32;
+ writeBarrierType = GCHeap::IsServerHeap() ? WRITE_BARRIER_SVR64 : WRITE_BARRIER_PREGROW64;
continue;
- case WRITE_BARRIER_PREGROW32:
- if (bReqUpperBoundsCheck)
- {
- writeBarrierType = WRITE_BARRIER_POSTGROW32;
- continue;
- }
-
- if (!FitsInI4((size_t)g_card_table) || !FitsInI4((size_t)g_ephemeral_low))
- {
- writeBarrierType = WRITE_BARRIER_PREGROW64;
- }
- break;
-
case WRITE_BARRIER_PREGROW64:
if (bReqUpperBoundsCheck)
{
@@ -385,27 +400,30 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(BOOL bReqUpperBoundsCheck, W
}
break;
- case WRITE_BARRIER_POSTGROW32:
- if (!FitsInI4((size_t)g_card_table) || !FitsInI4((size_t)g_ephemeral_low) || !FitsInI4((size_t)g_ephemeral_high))
- {
- writeBarrierType = WRITE_BARRIER_POSTGROW64;
- }
- break;
-
case WRITE_BARRIER_POSTGROW64:
break;
#ifdef FEATURE_SVR_GC
- case WRITE_BARRIER_SVR32:
- if (!FitsInI4((size_t)g_card_table))
+ case WRITE_BARRIER_SVR64:
+ break;
+#endif // FEATURE_SVR_GC
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ case WRITE_BARRIER_WRITE_WATCH_PREGROW64:
+ if (bReqUpperBoundsCheck)
{
- writeBarrierType = WRITE_BARRIER_SVR64;
+ writeBarrierType = WRITE_BARRIER_WRITE_WATCH_POSTGROW64;
}
break;
- case WRITE_BARRIER_SVR64:
+ case WRITE_BARRIER_WRITE_WATCH_POSTGROW64:
break;
-#endif
+
+#ifdef FEATURE_SVR_GC
+ case WRITE_BARRIER_WRITE_WATCH_SVR64:
+ break;
+#endif // FEATURE_SVR_GC
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
default:
UNREACHABLE_MSG("unexpected write barrier type!");
@@ -417,14 +435,14 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(BOOL bReqUpperBoundsCheck, W
return m_currentWriteBarrier != writeBarrierType;
}
-void WriteBarrierManager::UpdateEphemeralBounds()
+void WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended)
{
bool needToFlushCache = false;
WriteBarrierType newType;
- if (NeedDifferentWriteBarrier(FALSE, &newType))
+ if (NeedDifferentWriteBarrier(false, &newType))
{
- ChangeWriteBarrierTo(newType);
+ ChangeWriteBarrierTo(newType, isRuntimeSuspended);
return;
}
@@ -436,31 +454,10 @@ void WriteBarrierManager::UpdateEphemeralBounds()
switch (m_currentWriteBarrier)
{
-
- case WRITE_BARRIER_POSTGROW32:
- {
- // Change immediate if different from new g_ephermeral_high.
- if (*(INT32*)m_pUpperBoundImmediate != (INT32)(size_t)g_ephemeral_high)
- {
- *(INT32*)m_pUpperBoundImmediate = (INT32)(size_t)g_ephemeral_high;
- needToFlushCache = true;
- }
- }
- //
- // INTENTIONAL FALL-THROUGH!
- //
- case WRITE_BARRIER_PREGROW32:
- {
- // Change immediate if different from new g_ephermeral_low.
- if (*(INT32*)m_pLowerBoundImmediate != (INT32)(size_t)g_ephemeral_low)
- {
- *(INT32*)m_pLowerBoundImmediate = (INT32)(size_t)g_ephemeral_low;
- needToFlushCache = true;
- }
- break;
- }
-
case WRITE_BARRIER_POSTGROW64:
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ case WRITE_BARRIER_WRITE_WATCH_POSTGROW64:
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
{
// Change immediate if different from new g_ephermeral_high.
if (*(UINT64*)m_pUpperBoundImmediate != (size_t)g_ephemeral_high)
@@ -473,6 +470,9 @@ void WriteBarrierManager::UpdateEphemeralBounds()
// INTENTIONAL FALL-THROUGH!
//
case WRITE_BARRIER_PREGROW64:
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ case WRITE_BARRIER_WRITE_WATCH_PREGROW64:
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
{
// Change immediate if different from new g_ephermeral_low.
if (*(UINT64*)m_pLowerBoundImmediate != (size_t)g_ephemeral_low)
@@ -484,12 +484,14 @@ void WriteBarrierManager::UpdateEphemeralBounds()
}
#ifdef FEATURE_SVR_GC
- case WRITE_BARRIER_SVR32:
case WRITE_BARRIER_SVR64:
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ case WRITE_BARRIER_WRITE_WATCH_SVR64:
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
{
break;
}
-#endif
+#endif // FEATURE_SVR_GC
default:
UNREACHABLE_MSG("unexpected m_currentWriteBarrier in UpdateEphemeralBounds");
@@ -501,7 +503,7 @@ void WriteBarrierManager::UpdateEphemeralBounds()
}
}
-void WriteBarrierManager::UpdateCardTableLocation(BOOL bReqUpperBoundsCheck)
+void WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
{
// If we are told that we require an upper bounds check (GC did some heap
// reshuffling), we need to switch to the WriteBarrier_PostGrow function for
@@ -510,7 +512,7 @@ void WriteBarrierManager::UpdateCardTableLocation(BOOL bReqUpperBoundsCheck)
WriteBarrierType newType;
if (NeedDifferentWriteBarrier(bReqUpperBoundsCheck, &newType))
{
- ChangeWriteBarrierTo(newType);
+ ChangeWriteBarrierTo(newType, isRuntimeSuspended);
return;
}
@@ -522,24 +524,30 @@ void WriteBarrierManager::UpdateCardTableLocation(BOOL bReqUpperBoundsCheck)
bool fFlushCache = false;
- if (m_currentWriteBarrier == WRITE_BARRIER_PREGROW32 ||
- m_currentWriteBarrier == WRITE_BARRIER_POSTGROW32 ||
- m_currentWriteBarrier == WRITE_BARRIER_SVR32)
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ switch (m_currentWriteBarrier)
{
- if (*(INT32*)m_pCardTableImmediate != (INT32)(size_t)g_card_table)
- {
- *(INT32*)m_pCardTableImmediate = (INT32)(size_t)g_card_table;
- *(INT32*)m_pCardTableImmediate2 = (INT32)(size_t)g_card_table;
- fFlushCache = true;
- }
+ case WRITE_BARRIER_WRITE_WATCH_PREGROW64:
+ case WRITE_BARRIER_WRITE_WATCH_POSTGROW64:
+#ifdef FEATURE_SVR_GC
+ case WRITE_BARRIER_WRITE_WATCH_SVR64:
+#endif // FEATURE_SVR_GC
+ if (*(UINT64*)m_pWriteWatchTableImmediate != (size_t)SoftwareWriteWatch::GetTable())
+ {
+ *(UINT64*)m_pWriteWatchTableImmediate = (size_t)SoftwareWriteWatch::GetTable();
+ fFlushCache = true;
+ }
+ break;
+
+ default:
+ break; // clang seems to require all enum values to be covered for some reason
}
- else
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+ if (*(UINT64*)m_pCardTableImmediate != (size_t)g_card_table)
{
- if (*(UINT64*)m_pCardTableImmediate != (size_t)g_card_table)
- {
- *(UINT64*)m_pCardTableImmediate = (size_t)g_card_table;
- fFlushCache = true;
- }
+ *(UINT64*)m_pCardTableImmediate = (size_t)g_card_table;
+ fFlushCache = true;
}
if (fFlushCache)
@@ -548,23 +556,100 @@ void WriteBarrierManager::UpdateCardTableLocation(BOOL bReqUpperBoundsCheck)
}
}
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+void WriteBarrierManager::SwitchToWriteWatchBarrier(bool isRuntimeSuspended)
+{
+ WriteBarrierType newWriteBarrierType;
+ switch (m_currentWriteBarrier)
+ {
+ case WRITE_BARRIER_UNINITIALIZED:
+ // Using the debug-only write barrier
+ return;
+
+ case WRITE_BARRIER_PREGROW64:
+ newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_PREGROW64;
+ break;
+
+ case WRITE_BARRIER_POSTGROW64:
+ newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_POSTGROW64;
+ break;
+
+#ifdef FEATURE_SVR_GC
+ case WRITE_BARRIER_SVR64:
+ newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_SVR64;
+ break;
+#endif // FEATURE_SVR_GC
+
+ default:
+ UNREACHABLE();
+ }
+
+ ChangeWriteBarrierTo(newWriteBarrierType, isRuntimeSuspended);
+}
+
+void WriteBarrierManager::SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended)
+{
+ WriteBarrierType newWriteBarrierType;
+ switch (m_currentWriteBarrier)
+ {
+ case WRITE_BARRIER_UNINITIALIZED:
+ // Using the debug-only write barrier
+ return;
+
+ case WRITE_BARRIER_WRITE_WATCH_PREGROW64:
+ newWriteBarrierType = WRITE_BARRIER_PREGROW64;
+ break;
+
+ case WRITE_BARRIER_WRITE_WATCH_POSTGROW64:
+ newWriteBarrierType = WRITE_BARRIER_POSTGROW64;
+ break;
+
+#ifdef FEATURE_SVR_GC
+ case WRITE_BARRIER_WRITE_WATCH_SVR64:
+ newWriteBarrierType = WRITE_BARRIER_SVR64;
+ break;
+#endif // FEATURE_SVR_GC
+
+ default:
+ UNREACHABLE();
+ }
+
+ ChangeWriteBarrierTo(newWriteBarrierType, isRuntimeSuspended);
+}
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
// This function bashes the super fast amd64 version of the JIT_WriteBarrier
// helper. It should be called by the GC whenever the ephermeral region
// bounds get changed, but still remain on the top of the GC Heap.
-void StompWriteBarrierEphemeral()
+void StompWriteBarrierEphemeral(bool isRuntimeSuspended)
{
WRAPPER_NO_CONTRACT;
- g_WriteBarrierManager.UpdateEphemeralBounds();
+ g_WriteBarrierManager.UpdateEphemeralBounds(isRuntimeSuspended);
}
// This function bashes the super fast amd64 versions of the JIT_WriteBarrier
// helpers. It should be called by the GC whenever the ephermeral region gets moved
// from being at the top of the GC Heap, and/or when the cards table gets moved.
-void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck)
+void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
+{
+ WRAPPER_NO_CONTRACT;
+
+ g_WriteBarrierManager.UpdateWriteWatchAndCardTableLocations(isRuntimeSuspended, bReqUpperBoundsCheck);
+}
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+void SwitchToWriteWatchBarrier(bool isRuntimeSuspended)
+{
+ WRAPPER_NO_CONTRACT;
+
+ g_WriteBarrierManager.SwitchToWriteWatchBarrier(isRuntimeSuspended);
+}
+
+void SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended)
{
WRAPPER_NO_CONTRACT;
- g_WriteBarrierManager.UpdateCardTableLocation(bReqUpperBoundsCheck);
+ g_WriteBarrierManager.SwitchToNonWriteWatchBarrier(isRuntimeSuspended);
}
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
diff --git a/src/vm/arm/stubs.cpp b/src/vm/arm/stubs.cpp
index cd58f294c2..2f1377fe8a 100644
--- a/src/vm/arm/stubs.cpp
+++ b/src/vm/arm/stubs.cpp
@@ -378,7 +378,7 @@ void ValidateWriteBarriers()
// Update the instructions in our various write barrier implementations that refer directly to the values
// of GC globals such as g_lowest_address and g_card_table. We don't particularly care which values have
// changed on each of these callbacks, it's pretty cheap to refresh them all.
-void UpdateGCWriteBarriers(BOOL postGrow = false)
+void UpdateGCWriteBarriers(bool postGrow = false)
{
// Define a helper macro that abstracts the minutia of patching the instructions to access the value of a
// particular GC global.
@@ -455,7 +455,7 @@ void UpdateGCWriteBarriers(BOOL postGrow = false)
FlushInstructionCache(GetCurrentProcess(), pbAlteredRange, cbAlteredRange);
}
-void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck)
+void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
{
// The runtime is not always suspended when this is called (unlike StompWriteBarrierEphemeral) but we have
// no way to update the barrier code atomically on ARM since each 32-bit value we change is loaded over
@@ -469,7 +469,7 @@ void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck)
GCStressPolicy::InhibitHolder iholder;
bool fSuspended = false;
- if (!g_fEEInit && !GCHeap::IsGCInProgress())
+ if (!isRuntimeSuspended)
{
ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_OTHER);
fSuspended = true;
@@ -481,9 +481,10 @@ void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck)
ThreadSuspend::RestartEE(FALSE, TRUE);
}
-void StompWriteBarrierEphemeral(void)
+void StompWriteBarrierEphemeral(bool isRuntimeSuspended)
{
- _ASSERTE(GCHeap::IsGCInProgress() || g_fEEInit);
+ UNREFERENCED_PARAMETER(isRuntimeSuspended);
+ _ASSERTE(isRuntimeSuspended);
UpdateGCWriteBarriers();
}
#endif // CROSSGEN_COMPILE
diff --git a/src/vm/arm64/stubs.cpp b/src/vm/arm64/stubs.cpp
index 18c1f5f091..c50fdca7f7 100644
--- a/src/vm/arm64/stubs.cpp
+++ b/src/vm/arm64/stubs.cpp
@@ -1184,13 +1184,13 @@ extern "C" void getFPReturn(int fpSize, INT64 *retval)
_ASSERTE(!"ARM64:NYI");
}
-void StompWriteBarrierEphemeral()
+void StompWriteBarrierEphemeral(bool isRuntimeSuspended)
{
//ARM64TODO: implement this
return;
}
-void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck)
+void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
{
//ARM64TODO: implement this
return;
diff --git a/src/vm/gcenv.h b/src/vm/gcenv.h
index 26c2099e75..08dcc711ae 100644
--- a/src/vm/gcenv.h
+++ b/src/vm/gcenv.h
@@ -53,6 +53,8 @@
#include "gcenv.interlocked.h"
#include "gcenv.interlocked.inl"
+#include "../gc/softwarewritewatch.h"
+
namespace ETW
{
typedef enum _GC_ROOT_KIND {
diff --git a/src/vm/gchelpers.cpp b/src/vm/gchelpers.cpp
index a894797627..bf81847716 100644
--- a/src/vm/gchelpers.cpp
+++ b/src/vm/gchelpers.cpp
@@ -35,6 +35,7 @@
#endif // FEATURE_COMINTEROP
#include "rcwwalker.h"
+#include "../gc/softwarewritewatch.h"
//========================================================================
//
@@ -1182,6 +1183,13 @@ extern "C" HCIMPL2_RAW(VOID, JIT_CheckedWriteBarrier, Object **dst, Object *ref)
updateGCShadow(dst, ref); // support debugging write barrier
#endif
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ if (SoftwareWriteWatch::IsEnabledForGCHeap())
+ {
+ SoftwareWriteWatch::SetDirty(dst, sizeof(*dst));
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
#ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
if((BYTE*) dst >= g_ephemeral_low && (BYTE*) dst < g_ephemeral_high)
{
@@ -1232,6 +1240,13 @@ extern "C" HCIMPL2_RAW(VOID, JIT_WriteBarrier, Object **dst, Object *ref)
updateGCShadow(dst, ref); // support debugging write barrier
#endif
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ if (SoftwareWriteWatch::IsEnabledForGCHeap())
+ {
+ SoftwareWriteWatch::SetDirty(dst, sizeof(*dst));
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
#ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
if((BYTE*) dst >= g_ephemeral_low && (BYTE*) dst < g_ephemeral_high)
{
@@ -1292,7 +1307,14 @@ void ErectWriteBarrier(OBJECTREF *dst, OBJECTREF ref)
#ifdef WRITE_BARRIER_CHECK
updateGCShadow((Object**) dst, OBJECTREFToObject(ref)); // support debugging write barrier
#endif
-
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ if (SoftwareWriteWatch::IsEnabledForGCHeap())
+ {
+ SoftwareWriteWatch::SetDirty(dst, sizeof(*dst));
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
if((BYTE*) OBJECTREFToObject(ref) >= g_ephemeral_low && (BYTE*) OBJECTREFToObject(ref) < g_ephemeral_high)
{
// VolatileLoadWithoutBarrier() is used here to prevent fetch of g_card_table from being reordered
@@ -1319,6 +1341,13 @@ void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref)
if (ref->Collectible())
{
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ if (SoftwareWriteWatch::IsEnabledForGCHeap())
+ {
+ SoftwareWriteWatch::SetDirty(dst, sizeof(*dst));
+ }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
BYTE *refObject = *(BYTE **)((MethodTable*)ref)->GetLoaderAllocatorObjectHandle();
if((BYTE*) refObject >= g_ephemeral_low && (BYTE*) refObject < g_ephemeral_high)
{
diff --git a/src/vm/gchelpers.h b/src/vm/gchelpers.h
index a461f933f9..f5590beebe 100644
--- a/src/vm/gchelpers.h
+++ b/src/vm/gchelpers.h
@@ -107,8 +107,8 @@ OBJECTREF AllocateObject(MethodTable *pMT
#endif
);
-extern void StompWriteBarrierEphemeral();
-extern void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck);
+extern void StompWriteBarrierEphemeral(bool isRuntimeSuspended);
+extern void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck);
extern void ThrowOutOfMemoryDimensionsExceeded();
diff --git a/src/vm/i386/jitinterfacex86.cpp b/src/vm/i386/jitinterfacex86.cpp
index 4834e63db9..ff0ea93a3e 100644
--- a/src/vm/i386/jitinterfacex86.cpp
+++ b/src/vm/i386/jitinterfacex86.cpp
@@ -1727,7 +1727,7 @@ void ValidateWriteBarrierHelpers()
// When a GC happens, the upper and lower bounds of the ephemeral
// generation change. This routine updates the WriteBarrier thunks
// with the new values.
-void StompWriteBarrierEphemeral()
+void StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */)
{
CONTRACTL {
NOTHROW;
@@ -1785,7 +1785,7 @@ void StompWriteBarrierEphemeral()
// to the PostGrow thunk that checks both upper and lower bounds.
// regardless we need to update the thunk with the
// card_table - lowest_address.
-void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck)
+void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
{
CONTRACTL {
NOTHROW;
@@ -1801,7 +1801,7 @@ void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck)
bool bWriteBarrierIsPreGrow = WriteBarrierIsPreGrow();
bool bStompWriteBarrierEphemeral = false;
- BOOL bEESuspended = FALSE;
+ BOOL bEESuspendedHere = FALSE;
for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++)
{
@@ -1817,9 +1817,9 @@ void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck)
if (bReqUpperBoundsCheck)
{
GCX_MAYBE_COOP_NO_THREAD_BROKEN((GetThread()!=NULL));
- if( !IsGCThread() && !bEESuspended) {
+ if( !isRuntimeSuspended && !bEESuspendedHere) {
ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_FOR_GC_PREP);
- bEESuspended = TRUE;
+ bEESuspendedHere = TRUE;
}
pfunc = (size_t *) JIT_WriteBarrierReg_PostGrow;
@@ -1906,12 +1906,17 @@ void StompWriteBarrierResize(BOOL bReqUpperBoundsCheck)
}
if (bStompWriteBarrierEphemeral)
- StompWriteBarrierEphemeral();
+ {
+ _ASSERTE(isRuntimeSuspended || bEESuspendedHere);
+ StompWriteBarrierEphemeral(true);
+ }
else
- FlushInstructionCache(GetCurrentProcess(), (void *)JIT_PatchedWriteBarrierStart,
+ {
+ FlushInstructionCache(GetCurrentProcess(), (void *)JIT_PatchedWriteBarrierStart,
(BYTE*)JIT_PatchedWriteBarrierLast - (BYTE*)JIT_PatchedWriteBarrierStart);
+ }
- if(bEESuspended)
+ if(bEESuspendedHere)
ThreadSuspend::RestartEE(FALSE, TRUE);
}
diff --git a/src/vm/jitinterface.h b/src/vm/jitinterface.h
index 00edc6e212..6780fe0911 100644
--- a/src/vm/jitinterface.h
+++ b/src/vm/jitinterface.h
@@ -265,39 +265,50 @@ class WriteBarrierManager
public:
enum WriteBarrierType
{
- WRITE_BARRIER_UNINITIALIZED = 0,
- WRITE_BARRIER_PREGROW32 = 1,
- WRITE_BARRIER_PREGROW64 = 2,
- WRITE_BARRIER_POSTGROW32 = 3,
- WRITE_BARRIER_POSTGROW64 = 4,
- WRITE_BARRIER_SVR32 = 5,
- WRITE_BARRIER_SVR64 = 6,
- WRITE_BARRIER_BUFFER = 7,
+ WRITE_BARRIER_UNINITIALIZED,
+ WRITE_BARRIER_PREGROW64,
+ WRITE_BARRIER_POSTGROW64,
+#ifdef FEATURE_SVR_GC
+ WRITE_BARRIER_SVR64,
+#endif // FEATURE_SVR_GC
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ WRITE_BARRIER_WRITE_WATCH_PREGROW64,
+ WRITE_BARRIER_WRITE_WATCH_POSTGROW64,
+#ifdef FEATURE_SVR_GC
+ WRITE_BARRIER_WRITE_WATCH_SVR64,
+#endif // FEATURE_SVR_GC
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ WRITE_BARRIER_BUFFER
};
WriteBarrierManager();
void Initialize();
- void UpdateEphemeralBounds();
- void UpdateCardTableLocation(BOOL bReqUpperBoundsCheck);
+ void UpdateEphemeralBounds(bool isRuntimeSuspended);
+ void UpdateWriteWatchAndCardTableLocations(bool isRuntimeSuspended, bool bReqUpperBoundsCheck);
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ void SwitchToWriteWatchBarrier(bool isRuntimeSuspended);
+ void SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended);
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
protected:
size_t GetCurrentWriteBarrierSize();
size_t GetSpecificWriteBarrierSize(WriteBarrierType writeBarrier);
PBYTE CalculatePatchLocation(LPVOID base, LPVOID label, int offset);
PCODE GetCurrentWriteBarrierCode();
- void ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier);
- bool NeedDifferentWriteBarrier(BOOL bReqUpperBoundsCheck, WriteBarrierType* pNewWriteBarrierType);
+ void ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, bool isRuntimeSuspended);
+ bool NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, WriteBarrierType* pNewWriteBarrierType);
private:
void Validate();
WriteBarrierType m_currentWriteBarrier;
- PBYTE m_pLowerBoundImmediate; // PREGROW32 | PREGROW64 | POSTGROW32 | POSTGROW64 | |
- PBYTE m_pCardTableImmediate; // PREGROW32 | PREGROW64 | POSTGROW32 | POSTGROW64 | SVR32 |
- PBYTE m_pUpperBoundImmediate; // | | POSTGROW32 | POSTGROW64 | |
- PBYTE m_pCardTableImmediate2; // PREGROW32 | | POSTGROW32 | | SVR32 |
+ PBYTE m_pWriteWatchTableImmediate; // PREGROW | POSTGROW | SVR | WRITE_WATCH |
+ PBYTE m_pLowerBoundImmediate; // PREGROW | POSTGROW | | WRITE_WATCH |
+ PBYTE m_pCardTableImmediate; // PREGROW | POSTGROW | SVR | WRITE_WATCH |
+ PBYTE m_pUpperBoundImmediate; // | POSTGROW | | WRITE_WATCH |
};
#endif // _TARGET_AMD64_
diff --git a/tests/runtest.sh b/tests/runtest.sh
index f853eae31d..8cf2603cee 100755
--- a/tests/runtest.sh
+++ b/tests/runtest.sh
@@ -340,11 +340,16 @@ function create_core_overlay {
fi
mkdir "$coreOverlayDir"
- (cd $coreFxBinDir && find . -iname '*.dll' \! -iwholename '*netstandard13aot*' \! -iwholename '*netstandard15aot*' \! -iwholename '*netcore50aot*' \! -iwholename '*test*' \! -iwholename '*/ToolRuntime/*' \! -iwholename '*RemoteExecutorConsoleApp*' -exec cp -f '{}' "$coreOverlayDir/" \;)
+ (cd $coreFxBinDir && find . -iname '*.dll' \! -iwholename '*test*' \! -iwholename '*/ToolRuntime/*' \! -iwholename '*/RemoteExecutorConsoleApp/*' \! -iwholename '*/net*' \! -iwholename '*aot*' -exec cp -f '{}' "$coreOverlayDir/" \;)
cp -f "$coreFxNativeBinDir/Native/"*."$libExtension" "$coreOverlayDir/" 2>/dev/null
cp -f "$coreClrBinDir/"* "$coreOverlayDir/" 2>/dev/null
+ cp -f "$mscorlibDir/mscorlib.dll" "$coreOverlayDir/"
cp -n "$testDependenciesDir"/* "$coreOverlayDir/" 2>/dev/null
+ if [ -f "$coreOverlayDir/mscorlib.ni.dll" ]; then
+ # Test dependencies come from a Windows build, and mscorlib.ni.dll would be the one from Windows
+ rm -f "$coreOverlayDir/mscorlib.ni.dll"
+ fi
}
function precompile_overlay_assemblies {
@@ -407,7 +412,7 @@ declare -a failingTests
((runFailingTestsOnly = 0))
function load_unsupported_tests {
- # Load the list of tests that fail and on this platform. These tests are disabled (skipped), pending investigation.
+ # Load the list of tests that are not supported on this platform. These tests are disabled (skipped) permanently.
# 'readarray' is not used here, as it includes the trailing linefeed in lines placed in the array.
while IFS='' read -r line || [ -n "$line" ]; do
unsupportedTests[${#unsupportedTests[@]}]=$line
@@ -415,7 +420,7 @@ function load_unsupported_tests {
}
function load_failing_tests {
- # Load the list of tests that fail and on this platform. These tests are disabled (skipped), pending investigation.
+ # Load the list of tests that fail on this platform. These tests are disabled (skipped) temporarily, pending investigation.
# 'readarray' is not used here, as it includes the trailing linefeed in lines placed in the array.
while IFS='' read -r line || [ -n "$line" ]; do
failingTests[${#failingTests[@]}]=$line
@@ -733,8 +738,8 @@ do
esac
done
-if (( disableEventLogging == 0)); then
- export COMPlus_EnableEventLog=1
+if ((disableEventLogging == 0)); then
+ export COMPlus_EnableEventLog=1
fi
export CORECLR_SERVER_GC="$serverGC"
@@ -749,10 +754,11 @@ if [ ! -d "$testRootDir" ]; then
exit $EXIT_CODE_EXCEPTION
fi
-
# Copy native interop test libraries over to the mscorlib path in
# order for interop tests to run on linux.
-cp $mscorlibDir/bin/* $mscorlibDir
+if [ -d $mscorlibDir/bin ]; then
+ cp $mscorlibDir/bin/* $mscorlibDir
+fi
# If this is a coverage run, make sure the appropriate args have been passed
if [ "$CoreClrCoverage" == "ON" ]