summaryrefslogtreecommitdiff
path: root/src/vm/amd64/jithelpers_fast.S
diff options
context:
space:
mode:
authorKoundinya Veluri <kouvel@microsoft.com>2016-01-07 11:21:27 -0800
committerKoundinya Veluri <kouvel@microsoft.com>2016-04-12 16:29:38 -0700
commitc235ae17cd3a87f8032948bdcb838641d8e6c055 (patch)
treecc2c3756157456898b4720709c9efbfc4c90ccd8 /src/vm/amd64/jithelpers_fast.S
parent7f95d79740d5f5b13d6a0df1b94654e622053a5f (diff)
downloadcoreclr-c235ae17cd3a87f8032948bdcb838641d8e6c055.tar.gz
coreclr-c235ae17cd3a87f8032948bdcb838641d8e6c055.tar.bz2
coreclr-c235ae17cd3a87f8032948bdcb838641d8e6c055.zip
Implement software write watch and make concurrent GC functional outside Windows
- Implemented software write watch using write barriers - A new set of write barriers is introduced, each corresponding to an existing one, but which also updates the write watch table. The GC switches to a write watch barrier during concurrent GC, and switches back to a non write watch barrier after the final query for dirty pages. - The write watch table is alloacted along with the card table - Since the card table is used differently, different synchonization is used for the write watch table. The runtime is suspended during resize since that is the most infrequently occuring operation, of that, ResetWriteWatch, and GetWriteWatch. - ResetWriteWatch() doesn't need a suspend, but since the software WW version is much faster than the Windows version, moved it into the suspended region to avoid some synchronization that would otherwise be required - The background calls to GetWriteWatch() don't need or do a suspend. They only need to synchronize with the resize path, not for the purpose of correct functionality, but to not miss dirty pages such that concurrent GC is effective. Miscellaneous: - Fixed runtests.sh to copy mscorlib.dll and delete the Windows version of mscorlib.ni.dll
Diffstat (limited to 'src/vm/amd64/jithelpers_fast.S')
-rw-r--r--src/vm/amd64/jithelpers_fast.S160
1 files changed, 122 insertions, 38 deletions
diff --git a/src/vm/amd64/jithelpers_fast.S b/src/vm/amd64/jithelpers_fast.S
index 22f21bb8de..a0650759f6 100644
--- a/src/vm/amd64/jithelpers_fast.S
+++ b/src/vm/amd64/jithelpers_fast.S
@@ -10,6 +10,45 @@ LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
ret
LEAF_END JIT_PatchedCodeStart, _TEXT
+
+// There is an even more optimized version of these helpers possible which takes
+// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
+// that check (this is more significant in the JIT_WriteBarrier case).
+//
+// Additionally we can look into providing helpers which will take the src/dest from
+// specific registers (like x86) which _could_ (??) make for easier register allocation
+// for the JIT64, however it might lead to having to have some nasty code that treats
+// these guys really special like... :(.
+//
+// Version that does the move, checks whether or not it's in the GC and whether or not
+// it needs to have it's card updated
+//
+// void JIT_CheckedWriteBarrier(Object** dst, Object* src)
+LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
+
+ // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
+ // but if it isn't then it will just return.
+ //
+ // See if this is in GCHeap
+ PREPARE_EXTERNAL_VAR g_lowest_address, rax
+ cmp rdi, [rax]
+ // jb NotInHeap
+ .byte 0x72, 0x0e
+ PREPARE_EXTERNAL_VAR g_highest_address, rax
+ cmp rdi, [rax]
+ // jnb NotInHeap
+ .byte 0x73, 0x02
+
+ // call C_FUNC(JIT_WriteBarrier)
+ .byte 0xeb, 0x05
+
+ NotInHeap:
+ // See comment above about possible AV
+ mov [rdi], rsi
+ ret
+LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
+
+
// This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow
// or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_
// change at runtime as the GC changes. Initially it should simply be a copy of the
@@ -22,6 +61,71 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
jmp C_FUNC(JIT_WriteBarrier_Debug)
#endif
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ // JIT_WriteBarrier_WriteWatch_PostGrow64
+
+ // Regarding patchable constants:
+ // - 64-bit constants have to be loaded into a register
+ // - The constants have to be aligned to 8 bytes so that they can be patched easily
+ // - The constant loads have been located to minimize NOP padding required to align the constants
+ // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
+ // non-volatile calling convention, this should be changed to use just one register.
+
+ // Do the move into the GC . It is correct to take an AV here, the EH code
+ // figures out that this came from a WriteBarrier and correctly maps it back
+ // to the managed method which called the WriteBarrier (see setup in
+ // InitializeExceptionHandling, vm\exceptionhandling.cpp).
+ mov [rdi], rsi
+
+ // Update the write watch table if necessary
+ mov rax, rdi
+ movabs r10, 0xF0F0F0F0F0F0F0F0
+ shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift
+ NOP_2_BYTE // padding for alignment of constant
+ movabs r11, 0xF0F0F0F0F0F0F0F0
+ add rax, r10
+ cmp byte ptr [rax], 0h
+ .byte 0x75, 0x06
+ // jne CheckCardTable
+ mov byte ptr [rax], 0FFh
+
+ NOP_3_BYTE // padding for alignment of constant
+
+ // Check the lower and upper ephemeral region bounds
+ CheckCardTable:
+ cmp rsi, r11
+ .byte 0x72,0x3D
+ // jb Exit
+
+ NOP_3_BYTE // padding for alignment of constant
+
+ movabs r10, 0xF0F0F0F0F0F0F0F0
+
+ cmp rsi, r10
+ .byte 0x73,0x2B
+ // jae Exit
+
+ nop // padding for alignment of constant
+
+ movabs rax, 0xF0F0F0F0F0F0F0F0
+
+ // Touch the card table entry, if not already dirty.
+ shr rdi, 0Bh
+ cmp byte ptr [rdi + rax], 0FFh
+ .byte 0x75, 0x02
+ // jne UpdateCardTable
+ REPRET
+
+ UpdateCardTable:
+ mov byte ptr [rdi + rax], 0FFh
+ ret
+
+ .balign 16
+ Exit:
+ REPRET
+#else
+ // JIT_WriteBarrier_PostGrow64
+
// Do the move into the GC . It is correct to take an AV here, the EH code
// figures out that this came from a WriteBarrier and correctly maps it back
// to the managed method which called the WriteBarrier (see setup in
@@ -69,6 +173,8 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
.balign 16
Exit:
REPRET
+#endif
+
// make sure this guy is bigger than any of the other guys
.balign 16
nop
@@ -79,43 +185,6 @@ LEAF_ENTRY JIT_PatchedCodeLast, _TEXT
ret
LEAF_END JIT_PatchedCodeLast, _TEXT
-// There is an even more optimized version of these helpers possible which takes
-// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
-// that check (this is more significant in the JIT_WriteBarrier case).
-//
-// Additionally we can look into providing helpers which will take the src/dest from
-// specific registers (like x86) which _could_ (??) make for easier register allocation
-// for the JIT64, however it might lead to having to have some nasty code that treats
-// these guys really special like... :(.
-//
-// Version that does the move, checks whether or not it's in the GC and whether or not
-// it needs to have it's card updated
-//
-// void JIT_CheckedWriteBarrier(Object** dst, Object* src)
-LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
-
- // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
- // but if it isn't then it will just return.
- //
- // See if this is in GCHeap
- PREPARE_EXTERNAL_VAR g_lowest_address, rax
- cmp rdi, [rax]
- // jb NotInHeap
- .byte 0x72, 0x0e
- PREPARE_EXTERNAL_VAR g_highest_address, rax
- cmp rdi, [rax]
- // jnb NotInHeap
- .byte 0x73, 0x02
-
- // call C_FUNC(JIT_WriteBarrier)
- .byte 0xeb, 0x84
-
- NotInHeap:
- // See comment above about possible AV
- mov [rdi], rsi
- ret
-LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
-
// JIT_ByRefWriteBarrier has weird symantics, see usage in StubLinkerX86.cpp
//
// Entry:
@@ -128,7 +197,7 @@ LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
//
// RCX is trashed
// RAX is trashed
-// R10 is trashed on Debug build
+// R10 is trashed
// R11 is trashed on Debug build
// Exit:
// RDI, RSI are incremented by SIZEOF(LPVOID)
@@ -202,6 +271,21 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
DoneShadow_ByRefWriteBarrier:
#endif
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ // Update the write watch table if necessary
+ PREPARE_EXTERNAL_VAR g_sw_ww_enabled_for_gc_heap, rax
+ cmp byte ptr [rax], 0h
+ je CheckCardTable_ByRefWriteBarrier
+ mov rax, rdi
+ shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift
+ PREPARE_EXTERNAL_VAR g_sw_ww_table, r10
+ add rax, qword ptr [r10]
+ cmp byte ptr [rax], 0h
+ jne CheckCardTable_ByRefWriteBarrier
+ mov byte ptr [rax], 0FFh
+#endif
+
+ CheckCardTable_ByRefWriteBarrier:
// See if we can just quick out
PREPARE_EXTERNAL_VAR g_ephemeral_low, rax
cmp rcx, [rax]