summaryrefslogtreecommitdiff
path: root/src/vm/i386/jithelp.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm/i386/jithelp.S')
-rw-r--r--src/vm/i386/jithelp.S749
1 files changed, 749 insertions, 0 deletions
diff --git a/src/vm/i386/jithelp.S b/src/vm/i386/jithelp.S
new file mode 100644
index 0000000000..66ae9fb451
--- /dev/null
+++ b/src/vm/i386/jithelp.S
@@ -0,0 +1,749 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+.intel_syntax noprefix
+#include "unixasmmacros.inc"
+#include "asmconstants.h"
+
+
+// ***
+// JIT_WriteBarrier* - GC write barrier helper
+//
+// Purpose:
+// Helper calls in order to assign an object to a field
+// Enables book-keeping of the GC.
+//
+// Entry:
+// EDX - address of ref-field (assigned to)
+// the resp. other reg - RHS of assignment
+//
+// Exit:
+//
+// Uses:
+// EDX is destroyed.
+//
+// Exceptions:
+//
+// *******************************************************************************
+
+// The code here is tightly coupled with AdjustContextForWriteBarrier, if you change
+// anything here, you might need to change AdjustContextForWriteBarrier as well
+.macro WriteBarrierHelper rg
+.align 4
+
+// The entry point is the fully 'safe' one in which we check if EDX (the REF
+// begin updated) is actually in the GC heap
+NESTED_ENTRY JIT_CheckedWriteBarrier\rg, _TEXT, NoHandler
+ // check in the REF being updated is in the GC heap
+ push eax
+ PREPARE_EXTERNAL_VAR g_lowest_address, eax
+ cmp edx, [eax]
+ pop eax
+ jb LOCAL_LABEL(WriteBarrier_NotInHeap_\rg)
+ push eax
+ PREPARE_EXTERNAL_VAR g_highest_address, eax
+ cmp edx, [eax]
+ pop eax
+ jae LOCAL_LABEL(WriteBarrier_NotInHeap_\rg)
+
+ // fall through to unchecked routine
+ // note that its entry point also happens to be aligned
+
+#ifdef WRITE_BARRIER_CHECK
+ // This entry point is used when you know the REF pointer being updated
+ // is in the GC heap
+PATCH_LABEL JIT_DebugWriteBarrier\rg
+#endif // WRITE_BARRIER_CHECK
+
+#ifdef _DEBUG
+ push edx
+ push ecx
+ push eax
+
+ push \rg
+ push edx
+ call C_FUNC(WriteBarrierAssert)
+
+ pop eax
+ pop ecx
+ pop edx
+#endif // _DEBUG
+
+ // in the !WRITE_BARRIER_CHECK case this will be the move for all
+ // addresses in the GCHeap, addresses outside the GCHeap will get
+ // taken care of below at WriteBarrier_NotInHeap_&rg
+
+#ifndef WRITE_BARRIER_CHECK
+ mov DWORD PTR [edx], \rg
+#endif // !WRITE_BARRIER_CHECK
+
+#ifdef WRITE_BARRIER_CHECK
+ // Test dest here so if it is bad AV would happen before we change register/stack
+ // status. This makes job of AdjustContextForWriteBarrier easier.
+ cmp BYTE PTR [edx], 0
+ // ALSO update the shadow GC heap if that is enabled
+ // Make ebp into the temporary src register. We need to do this so that we can use ecx
+ // in the calculation of the shadow GC address, but still have access to the src register
+ push ecx
+ push ebp
+ mov ebp, \rg
+
+ // if g_GCShadow is 0, don't perform the check
+ push eax
+ PREPARE_EXTERNAL_VAR g_GCShadow, eax
+ cmp DWORD PTR [eax], 0
+ pop eax
+ je LOCAL_LABEL(WriteBarrier_NoShadow_\rg)
+
+ mov ecx, edx
+ push eax
+ PREPARE_EXTERNAL_VAR g_lowest_address, eax
+ sub ecx, [eax]
+ pop eax
+ jb LOCAL_LABEL(WriteBarrier_NoShadow_\rg)
+ push edx
+ PREPARE_EXTERNAL_VAR g_GCShadow, edx
+ mov [edx], edx
+ add ecx, [edx]
+ PREPARE_EXTERNAL_VAR g_GCShadowEnd, edx
+ mov [edx], edx
+ cmp ecx, [edx]
+ pop edx
+ ja LOCAL_LABEL(WriteBarrier_NoShadow_\rg)
+
+ // TODO: In Orcas timeframe if we move to P4+ only on X86 we should enable
+ // mfence barriers on either side of these two writes to make sure that
+ // they stay as close together as possible
+
+ // edx contains address in GC
+ // ecx contains address in ShadowGC
+ // ebp temporarially becomes the src register
+
+ // When we're writing to the shadow GC heap we want to be careful to minimize
+ // the risk of a race that can occur here where the GC and ShadowGC don't match
+ mov DWORD PTR [edx], ebp
+ mov DWORD PTR [ecx], ebp
+
+ // We need a scratch register to verify the shadow heap. We also need to
+ // construct a memory barrier so that the write to the shadow heap happens
+ // before the read from the GC heap. We can do both by using SUB/XCHG
+ // rather than PUSH.
+ //
+ // TODO: Should be changed to a push if the mfence described above is added.
+ //
+ sub esp, 4
+ xchg [esp], eax
+
+ // As part of our race avoidance (see above) we will now check whether the values
+ // in the GC and ShadowGC match. There is a possibility that we're wrong here but
+ // being overaggressive means we might mask a case where someone updates GC refs
+ // without going to a write barrier, but by its nature it will be indeterminant
+ // and we will find real bugs whereas the current implementation is indeterminant
+ // but only leads to investigations that find that this code is fundamentally flawed
+ mov eax, [edx]
+ cmp [ecx], eax
+ je LOCAL_LABEL(WriteBarrier_CleanupShadowCheck_\rg)
+ mov DWORD PTR [ecx], INVALIDGCVALUE
+
+LOCAL_LABEL(WriteBarrier_CleanupShadowCheck_\rg):
+ pop eax
+
+ jmp LOCAL_LABEL(WriteBarrier_ShadowCheckEnd_\rg)
+
+LOCAL_LABEL(WriteBarrier_NoShadow_\rg):
+ // If we come here then we haven't written the value to the GC and need to.
+ // ebp contains rg
+ // We restore ebp/ecx immediately after this, and if either of them is the src
+ // register it will regain its value as the src register.
+ mov DWORD PTR [edx], ebp
+LOCAL_LABEL(WriteBarrier_ShadowCheckEnd_\rg):
+ pop ebp
+ pop ecx
+#endif // WRITE_BARRIER_CHECK
+
+ push eax
+ push ebx
+ mov eax, \rg
+ PREPARE_EXTERNAL_VAR g_ephemeral_low, ebx
+ cmp eax, [ebx]
+ pop ebx
+ pop eax
+ jb LOCAL_LABEL(WriteBarrier_NotInEphemeral_\rg)
+ push eax
+ push ebx
+ mov eax, \rg
+ PREPARE_EXTERNAL_VAR g_ephemeral_high, ebx
+ cmp eax, [ebx]
+ pop ebx
+ pop eax
+ jae LOCAL_LABEL(WriteBarrier_NotInEphemeral_\rg)
+
+ shr edx, 10
+ push eax
+ PREPARE_EXTERNAL_VAR g_card_table, eax
+ add edx, [eax]
+ pop eax
+ cmp BYTE PTR [edx], 0FFh
+ jne LOCAL_LABEL(WriteBarrier_UpdateCardTable_\rg)
+ ret
+
+LOCAL_LABEL(WriteBarrier_UpdateCardTable_\rg):
+ mov BYTE PTR [edx], 0FFh
+ ret
+
+LOCAL_LABEL(WriteBarrier_NotInHeap_\rg):
+ // If it wasn't in the heap then we haven't updated the dst in memory yet
+ mov DWORD PTR [edx], \rg
+
+LOCAL_LABEL(WriteBarrier_NotInEphemeral_\rg):
+ // If it is in the GC Heap but isn't in the ephemeral range we've already
+ // updated the Heap with the Object*.
+ ret
+NESTED_END JIT_CheckedWriteBarrier\rg, _TEXT
+
+.endm
+
+
+// ***
+// JIT_ByRefWriteBarrier* - GC write barrier helper
+//
+// Purpose:
+// Helper calls in order to assign an object to a byref field
+// Enables book-keeping of the GC.
+//
+// Entry:
+// EDI - address of ref-field (assigned to)
+// ESI - address of the data (source)
+// ECX can be trashed
+//
+// Exit:
+//
+// Uses:
+// EDI and ESI are incremented by a DWORD
+//
+// Exceptions:
+//
+// *******************************************************************************
+//
+// The code here is tightly coupled with AdjustContextForWriteBarrier, if you change
+// anything here, you might need to change AdjustContextForWriteBarrier as well
+//
+.macro ByRefWriteBarrierHelper
+.align 4
+
+LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
+ // test for dest in range
+ mov ecx, [esi]
+ push eax
+ PREPARE_EXTERNAL_VAR g_lowest_address, eax
+ cmp edi, [eax]
+ pop eax
+ jb LOCAL_LABEL(ByRefWriteBarrier_NotInHeap)
+ push eax
+ PREPARE_EXTERNAL_VAR g_highest_address, eax
+ cmp edi, [eax]
+ pop eax
+ jae LOCAL_LABEL(ByRefWriteBarrier_NotInHeap)
+
+#ifndef WRITE_BARRIER_CHECK
+ // write barrier
+ mov [edi], ecx
+#endif // !WRITE_BARRIER_CHECK
+
+#ifdef WRITE_BARRIER_CHECK
+ // Test dest here so if it is bad AV would happen before we change register/stack
+ // status. This makes job of AdjustContextForWriteBarrier easier.
+ cmp BYTE PTR [edi], 0
+
+ // ALSO update the shadow GC heap if that is enabled
+
+ // use edx for address in GC Shadow,
+ push edx
+
+ // if g_GCShadow is 0, don't do the update
+ push ebx
+ PREPARE_EXTERNAL_VAR g_GCShadow, ebx
+ cmp DWORD PTR [ebx], 0
+ pop ebx
+ je LOCAL_LABEL(ByRefWriteBarrier_NoShadow)
+
+ mov edx, edi
+ push ebx
+ PREPARE_EXTERNAL_VAR g_lowest_address, ebx
+ sub edx, [ebx] // U/V
+ pop ebx
+ jb LOCAL_LABEL(ByRefWriteBarrier_NoShadow)
+ push eax
+ PREPARE_EXTERNAL_VAR g_GCShadow, eax
+ mov eax, [eax]
+ add edx, [eax]
+ PREPARE_EXTERNAL_VAR g_GCShadowEnd, eax
+ mov eax, [eax]
+ cmp edx, [eax]
+ pop eax
+ ja LOCAL_LABEL(ByRefWriteBarrier_NoShadow)
+
+ // TODO: In Orcas timeframe if we move to P4+ only on X86 we should enable
+ // mfence barriers on either side of these two writes to make sure that
+ // they stay as close together as possible
+
+ // edi contains address in GC
+ // edx contains address in ShadowGC
+ // ecx is the value to assign
+
+ // When we're writing to the shadow GC heap we want to be careful to minimize
+ // the risk of a race that can occur here where the GC and ShadowGC don't match
+ mov DWORD PTR [edi], ecx
+ mov DWORD PTR [edx], ecx
+
+ // We need a scratch register to verify the shadow heap. We also need to
+ // construct a memory barrier so that the write to the shadow heap happens
+ // before the read from the GC heap. We can do both by using SUB/XCHG
+ // rather than PUSH.
+ //
+ // TODO: Should be changed to a push if the mfence described above is added.
+ //
+ sub esp, 4
+ xchg [esp], eax
+
+ // As part of our race avoidance (see above) we will now check whether the values
+ // in the GC and ShadowGC match. There is a possibility that we're wrong here but
+ // being overaggressive means we might mask a case where someone updates GC refs
+ // without going to a write barrier, but by its nature it will be indeterminant
+ // and we will find real bugs whereas the current implementation is indeterminant
+ // but only leads to investigations that find that this code is fundamentally flawed
+
+ mov eax, [edi]
+ cmp [edx], eax
+ je LOCAL_LABEL(ByRefWriteBarrier_CleanupShadowCheck)
+ mov DWORD PTR [edx], INVALIDGCVALUE
+LOCAL_LABEL(ByRefWriteBarrier_CleanupShadowCheck):
+ pop eax
+ jmp LOCAL_LABEL(ByRefWriteBarrier_ShadowCheckEnd)
+
+LOCAL_LABEL(ByRefWriteBarrier_NoShadow):
+ // If we come here then we haven't written the value to the GC and need to.
+ mov DWORD PTR [edi], ecx
+
+LOCAL_LABEL(ByRefWriteBarrier_ShadowCheckEnd):
+ pop edx
+#endif // WRITE_BARRIER_CHECK
+
+ // test for *src in ephemeral segement
+ push eax
+ PREPARE_EXTERNAL_VAR g_ephemeral_low, eax
+ cmp ecx, [eax]
+ pop eax
+ jb LOCAL_LABEL(ByRefWriteBarrier_NotInEphemeral)
+ push eax
+ PREPARE_EXTERNAL_VAR g_ephemeral_high, eax
+ cmp ecx, [eax]
+ pop eax
+ jae LOCAL_LABEL(ByRefWriteBarrier_NotInEphemeral)
+
+ mov ecx, edi
+ add esi, 4
+ add edi, 4
+
+ shr ecx, 10
+ push eax
+ PREPARE_EXTERNAL_VAR g_card_table, eax
+ add ecx, [eax]
+ pop eax
+ cmp BYTE PTR [ecx], 0FFh
+ jne LOCAL_LABEL(ByRefWriteBarrier_UpdateCardTable)
+ ret
+LOCAL_LABEL(ByRefWriteBarrier_UpdateCardTable):
+ mov BYTE PTR [ecx], 0FFh
+ ret
+
+LOCAL_LABEL(ByRefWriteBarrier_NotInHeap):
+ // If it wasn't in the heap then we haven't updated the dst in memory yet
+ mov [edi], ecx
+LOCAL_LABEL(ByRefWriteBarrier_NotInEphemeral):
+ // If it is in the GC Heap but isn't in the ephemeral range we've already
+ // updated the Heap with the Object*.
+ add esi, 4
+ add edi, 4
+ ret
+NESTED_END JIT_ByRefWriteBarrier, _TEXT
+
+.endm
+
+// WriteBarrierStart and WriteBarrierEnd are used to determine bounds of
+// WriteBarrier functions so can determine if got AV in them.
+//
+LEAF_ENTRY JIT_WriteBarrierStart, _TEXT
+ ret
+LEAF_END JIT_WriteBarrierStart, _TEXT
+
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+// *******************************************************************************
+// Write barrier wrappers with fcall calling convention
+//
+.macro UniversalWriteBarrierHelper name
+.align 4
+
+LEAF_ENTRY JIT_\name, _TEXT
+ mov eax, edx
+ mov edx, ecx
+ jmp C_FUNC(JIT_\name\()EAX)
+LEAF_END JIT_\name, _TEXT
+
+.endm
+
+// Only define these if we're using the ASM GC write barriers; if this flag is not defined,
+// we'll use C++ versions of these write barriers.
+UniversalWriteBarrierHelper CheckedWriteBarrier
+UniversalWriteBarrierHelper WriteBarrier
+#endif // FEATURE_USE_ASM_GC_WRITE_BARRIERS
+
+WriteBarrierHelper EAX
+WriteBarrierHelper EBX
+WriteBarrierHelper ECX
+WriteBarrierHelper ESI
+WriteBarrierHelper EDI
+WriteBarrierHelper EBP
+
+ByRefWriteBarrierHelper
+
+LEAF_ENTRY JIT_WriteBarrierLast, _TEXT
+ ret
+LEAF_END JIT_WriteBarrierLast, _TEXT
+
+// This is the first function outside the "keep together range". Used by BBT scripts.
+LEAF_ENTRY JIT_WriteBarrierEnd, _TEXT
+ ret
+LEAF_END JIT_WriteBarrierEnd, _TEXT
+
+// *********************************************************************/
+// In cases where we support it we have an optimized GC Poll callback.
+// Normall (when we're not trying to suspend for GC, the CORINFO_HELP_POLL_GC
+// helper points to this nop routine. When we're ready to suspend for GC,
+// we whack the Jit Helper table entry to point to the real helper. When we're
+// done with GC we whack it back.
+LEAF_ENTRY JIT_PollGC_Nop, _TEXT
+ ret
+LEAF_END JIT_PollGC_Nop, _TEXT
+
+// *********************************************************************/
+// llshl - long shift left
+//
+// Purpose:
+// Does a Long Shift Left (signed and unsigned are identical)
+// Shifts a long left any number of bits.
+//
+// NOTE: This routine has been adapted from the Microsoft CRTs.
+//
+// Entry:
+// EDX:EAX - long value to be shifted
+// ECX - number of bits to shift by
+//
+// Exit:
+// EDX:EAX - shifted value
+//
+.align 16
+LEAF_ENTRY JIT_LLsh, _TEXT
+ cmp ecx, 32
+ jae LOCAL_LABEL(LLshMORE32)
+
+ // Handle shifts of between bits 0 and 31
+ shld edx, eax, cl
+ shl eax, cl
+ ret
+
+LOCAL_LABEL(LLshMORE32):
+ // Handle shifts of between bits 32 and 63
+ // The x86 shift instructions only use the lower 5 bits.
+ mov edx, eax
+ xor eax, eax
+ shl edx, cl
+ ret
+LEAF_END JIT_LLsh, _TEXT
+
+// *********************************************************************/
+// LRsh - long shift right
+//
+// Purpose:
+// Does a signed Long Shift Right
+// Shifts a long right any number of bits.
+//
+// NOTE: This routine has been adapted from the Microsoft CRTs.
+//
+// Entry:
+// EDX:EAX - long value to be shifted
+// ECX - number of bits to shift by
+//
+// Exit:
+// EDX:EAX - shifted value
+//
+.align 16
+LEAF_ENTRY JIT_LRsh, _TEXT
+ cmp ecx, 32
+ jae LOCAL_LABEL(LRshMORE32)
+
+ // Handle shifts of between bits 0 and 31
+ shrd eax, edx, cl
+ sar edx, cl
+ ret
+
+LOCAL_LABEL(LRshMORE32):
+ // Handle shifts of between bits 32 and 63
+ // The x86 shift instructions only use the lower 5 bits.
+ mov eax, edx
+ sar edx, 31
+ sar eax, cl
+ ret
+LEAF_END JIT_LRsh, _TEXT
+
+// *********************************************************************/
+// LRsz:
+// Purpose:
+// Does a unsigned Long Shift Right
+// Shifts a long right any number of bits.
+//
+// NOTE: This routine has been adapted from the Microsoft CRTs.
+//
+// Entry:
+// EDX:EAX - long value to be shifted
+// ECX - number of bits to shift by
+//
+// Exit:
+// EDX:EAX - shifted value
+//
+.align 16
+LEAF_ENTRY JIT_LRsz, _TEXT
+ cmp ecx, 32
+ jae LOCAL_LABEL(LRszMORE32)
+
+ // Handle shifts of between bits 0 and 31
+ shrd eax, edx, cl
+ shr edx, cl
+ ret
+
+LOCAL_LABEL(LRszMORE32):
+ // Handle shifts of between bits 32 and 63
+ // The x86 shift instructions only use the lower 5 bits.
+ mov eax, edx
+ xor edx, edx
+ shr eax, cl
+ ret
+LEAF_END JIT_LRsz, _TEXT
+
+// *********************************************************************/
+// JIT_Dbl2LngP4x87
+//
+// Purpose:
+// converts a double to a long truncating toward zero (C semantics)
+//
+// uses stdcall calling conventions
+//
+// This code is faster on a P4 than the Dbl2Lng code above, but is
+// slower on a PIII. Hence we choose this code when on a P4 or above.
+//
+LEAF_ENTRY JIT_Dbl2LngP4x87, _TEXT
+ // get some local space
+ sub esp, 8
+
+ #define arg1 [esp + 0Ch]
+ fld QWORD PTR arg1 // fetch arg
+ fnstcw WORD PTR arg1 // store FPCW
+ movzx eax, WORD PTR arg1 // zero extend - wide
+ or ah, 0Ch // turn on OE and DE flags
+ mov DWORD PTR [esp], eax // store new FPCW bits
+ fldcw WORD PTR [esp] // reload FPCW with new bits
+ fistp QWORD PTR [esp] // convert
+
+ // reload FP result
+ mov eax, DWORD PTR [esp]
+ mov edx, DWORD PTR [esp + 4]
+
+ // reload original FPCW value
+ fldcw WORD PTR arg1
+ #undef arg1
+
+ // restore stack
+ add esp, 8
+
+ ret 8
+LEAF_END JIT_Dbl2LngP4x87, _TEXT
+
+// *********************************************************************/
+// JIT_Dbl2LngSSE3
+//
+// Purpose:
+// converts a double to a long truncating toward zero (C semantics)
+//
+// uses stdcall calling conventions
+//
+// This code is faster than the above P4 x87 code for Intel processors
+// equal or later than Core2 and Atom that have SSE3 support
+//
+LEAF_ENTRY JIT_Dbl2LngSSE3, _TEXT
+ // get some local space
+ sub esp, 8
+
+ fld QWORD PTR [esp + 0Ch] // fetch arg
+ fisttp QWORD PTR [esp] // convert
+ mov eax, DWORD PTR [esp] // reload FP result
+ mov edx, DWORD PTR [esp + 4]
+
+ // restore stack
+ add esp, 8
+
+ ret 8
+LEAF_END JIT_Dbl2LngSSE3, _TEXT
+
+// *********************************************************************/
+// JIT_Dbl2IntSSE2
+//
+// Purpose:
+// converts a double to a long truncating toward zero (C semantics)
+//
+// uses stdcall calling conventions
+//
+// This code is even faster than the P4 x87 code for Dbl2LongP4x87,
+// but only returns a 32 bit value (only good for int).
+//
+LEAF_ENTRY JIT_Dbl2IntSSE2, _TEXT
+ movsd xmm0, [esp + 4]
+ cvttsd2si eax, xmm0
+ ret 8
+LEAF_END JIT_Dbl2IntSSE2, _TEXT
+
+// *********************************************************************/
+// This is the small write barrier thunk we use when we know the
+// ephemeral generation is higher in memory than older generations.
+// The 0x0F0F0F0F values are bashed by the two functions above.
+// This the generic version - wherever the code says ECX,
+// the specific register is patched later into a copy
+// Note: do not replace ECX by EAX - there is a smaller encoding for
+// the compares just for EAX, which won't work for other registers.
+//
+// READ THIS!!!!!!
+// it is imperative that the addresses of of the values that we overwrite
+// (card table, ephemeral region ranges, etc) are naturally aligned since
+// there are codepaths that will overwrite these values while the EE is running.
+//
+LEAF_ENTRY JIT_WriteBarrierReg_PreGrow, _TEXT
+ mov DWORD PTR [edx], ecx
+ cmp ecx, 0F0F0F0F0h
+ jb LOCAL_LABEL(NoWriteBarrierPre)
+
+ shr edx, 10
+ nop // padding for alignment of constant
+ cmp BYTE PTR [edx + 0F0F0F0F0h], 0FFh
+ jne LOCAL_LABEL(WriteBarrierPre)
+
+LOCAL_LABEL(NoWriteBarrierPre):
+ ret
+ nop // padding for alignment of constant
+ nop // padding for alignment of constant
+
+LOCAL_LABEL(WriteBarrierPre):
+ mov BYTE PTR [edx+0F0F0F0F0h], 0FFh
+ ret
+LEAF_END JIT_WriteBarrierReg_PreGrow, _TEXT
+
+// *********************************************************************/
+// This is the larger write barrier thunk we use when we know that older
+// generations may be higher in memory than the ephemeral generation
+// The 0x0F0F0F0F values are bashed by the two functions above.
+// This the generic version - wherever the code says ECX,
+// the specific register is patched later into a copy
+// Note: do not replace ECX by EAX - there is a smaller encoding for
+// the compares just for EAX, which won't work for other registers.
+// NOTE: we need this aligned for our validation to work properly
+.align 4
+LEAF_ENTRY JIT_WriteBarrierReg_PostGrow, _TEXT
+ mov DWORD PTR [edx], ecx
+ cmp ecx, 0F0F0F0F0h
+ jb LOCAL_LABEL(NoWriteBarrierPost)
+ cmp ecx, 0F0F0F0F0h
+ jae LOCAL_LABEL(NoWriteBarrierPost)
+
+ shr edx, 10
+ nop // padding for alignment of constant
+ cmp BYTE PTR [edx + 0F0F0F0F0h], 0FFh
+ jne LOCAL_LABEL(WriteBarrierPost)
+
+LOCAL_LABEL(NoWriteBarrierPost):
+ ret
+ nop // padding for alignment of constant
+ nop // padding for alignment of constant
+
+LOCAL_LABEL(WriteBarrierPost):
+ mov BYTE PTR [edx + 0F0F0F0F0h], 0FFh
+ ret
+LEAF_END JIT_WriteBarrierReg_PostGrow,_TEXT
+
+// PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code.
+//
+
+LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
+ ret
+LEAF_END JIT_PatchedCodeStart, _TEXT
+
+// **********************************************************************
+// Write barriers generated at runtime
+
+LEAF_ENTRY JIT_PatchedWriteBarrierStart, _TEXT
+ ret
+LEAF_END JIT_PatchedWriteBarrierStart, _TEXT
+
+.macro PatchedWriteBarrierHelper rg
+.align 8
+LEAF_ENTRY JIT_WriteBarrier\rg, _TEXT
+ // Just allocate space that will be filled in at runtime
+ .space 0CCH, 48
+LEAF_END JIT_WriteBarrier\rg, _TEXT
+
+.endm
+
+PatchedWriteBarrierHelper EAX
+PatchedWriteBarrierHelper EBX
+PatchedWriteBarrierHelper ECX
+PatchedWriteBarrierHelper ESI
+PatchedWriteBarrierHelper EDI
+PatchedWriteBarrierHelper EBP
+
+LEAF_ENTRY JIT_PatchedWriteBarrierLast, _TEXT
+ ret
+LEAF_END JIT_PatchedWriteBarrierLast, _TEXT
+
+LEAF_ENTRY JIT_PatchedCodeLast, _TEXT
+ ret
+LEAF_END JIT_PatchedCodeLast, _TEXT
+
+// This is the first function outside the "keep together range". Used by BBT scripts.
+LEAF_ENTRY JIT_PatchedCodeEnd, _TEXT
+ ret
+LEAF_END JIT_PatchedCodeEnd, _TEXT
+
+// Note that the debugger skips this entirely when doing SetIP,
+// since COMPlusCheckForAbort should always return 0. Excep.cpp:LeaveCatch
+// asserts that to be true. If this ends up doing more work, then the
+// debugger may need additional support.
+// void __stdcall JIT_EndCatch();
+NESTED_ENTRY JIT_EndCatch, _TEXT, NoHandler
+ // make temp storage for return address, and push the address of that
+ // as the last arg to COMPlusEndCatch
+ mov ecx, [esp]
+ push ecx
+ push esp
+
+ // push the rest of COMPlusEndCatch's args, right-to-left
+ push esi
+ push edi
+ push ebx
+ push ebp
+
+ // returns old esp value in eax, stores jump address
+ call C_FUNC(COMPlusEndCatch)
+ // now eax = new esp, [esp] = new eip
+
+ pop edx // edx = new eip
+ mov esp, eax // esp = new esp
+ jmp edx // eip = new eip
+NESTED_END JIT_EndCatch, _TEXT