summaryrefslogtreecommitdiff
path: root/src/vm/amd64/JitHelpers_Fast.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm/amd64/JitHelpers_Fast.asm')
-rw-r--r--src/vm/amd64/JitHelpers_Fast.asm1028
1 files changed, 1028 insertions, 0 deletions
diff --git a/src/vm/amd64/JitHelpers_Fast.asm b/src/vm/amd64/JitHelpers_Fast.asm
new file mode 100644
index 0000000000..f004be549e
--- /dev/null
+++ b/src/vm/amd64/JitHelpers_Fast.asm
@@ -0,0 +1,1028 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+
+; ==++==
+;
+
+;
+; ==--==
+; ***********************************************************************
+; File: JitHelpers_Fast.asm, see jithelp.asm for history
+;
+; Notes: routinues which we believe to be on the hot path for managed
+; code in most scenarios.
+; ***********************************************************************
+
+
+include AsmMacros.inc
+include asmconstants.inc
+
+; Min amount of stack space that a nested function should allocate.
+MIN_SIZE equ 28h
+
+EXTERN g_ephemeral_low:QWORD
+EXTERN g_ephemeral_high:QWORD
+EXTERN g_lowest_address:QWORD
+EXTERN g_highest_address:QWORD
+EXTERN g_card_table:QWORD
+
+ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+EXTERN g_sw_ww_table:QWORD
+EXTERN g_sw_ww_enabled_for_gc_heap:BYTE
+endif
+
+ifdef WRITE_BARRIER_CHECK
+; Those global variables are always defined, but should be 0 for Server GC
+g_GCShadow TEXTEQU <?g_GCShadow@@3PEAEEA>
+g_GCShadowEnd TEXTEQU <?g_GCShadowEnd@@3PEAEEA>
+EXTERN g_GCShadow:QWORD
+EXTERN g_GCShadowEnd:QWORD
+endif
+
+INVALIDGCVALUE equ 0CCCCCCCDh
+
+ifdef _DEBUG
+extern JIT_WriteBarrier_Debug:proc
+endif
+
+extern JIT_InternalThrow:proc
+
+extern JITutil_ChkCastInterface:proc
+extern JITutil_IsInstanceOfInterface:proc
+extern JITutil_ChkCastAny:proc
+extern JITutil_IsInstanceOfAny:proc
+
+;EXTERN_C Object* JIT_IsInstanceOfClass(MethodTable* pMT, Object* pObject);
+LEAF_ENTRY JIT_IsInstanceOfClass, _TEXT
+ ; move rdx into rax in case of a match or null
+ mov rax, rdx
+
+ ; check if the instance is null
+ test rdx, rdx
+ je IsNullInst
+
+ ; check is the MethodTable for the instance matches pMT
+ cmp rcx, qword ptr [rdx]
+ jne JIT_IsInstanceOfClass2
+
+ IsNullInst:
+ REPRET
+LEAF_END JIT_IsInstanceOfClass, _TEXT
+
+LEAF_ENTRY JIT_IsInstanceOfClass2, _TEXT
+ ; check if the parent class matches.
+ ; start by putting the MethodTable for the instance in rdx
+ mov rdx, qword ptr [rdx]
+
+ align 16
+ CheckParent:
+ ; NULL parent MethodTable* indicates that we're at the top of the hierarchy
+
+ ; unroll 0
+ mov rdx, qword ptr [rdx + OFFSETOF__MethodTable__m_pParentMethodTable]
+ cmp rcx, rdx
+ je IsInst
+ test rdx, rdx
+ je DoneWithLoop
+
+ ; unroll 1
+ mov rdx, qword ptr [rdx + OFFSETOF__MethodTable__m_pParentMethodTable]
+ cmp rcx, rdx
+ je IsInst
+ test rdx, rdx
+ je DoneWithLoop
+
+ ; unroll 2
+ mov rdx, qword ptr [rdx + OFFSETOF__MethodTable__m_pParentMethodTable]
+ cmp rcx, rdx
+ je IsInst
+ test rdx, rdx
+ je DoneWithLoop
+
+ ; unroll 3
+ mov rdx, qword ptr [rdx + OFFSETOF__MethodTable__m_pParentMethodTable]
+ cmp rcx, rdx
+ je IsInst
+ test rdx, rdx
+ jne CheckParent
+
+ align 16
+ DoneWithLoop:
+if METHODTABLE_EQUIVALENCE_FLAGS gt 0
+ ; check if the instance is a proxy or has type equivalence
+ ; get the MethodTable of the original Object (stored earlier in rax)
+ mov rdx, [rax]
+ test dword ptr [rdx + OFFSETOF__MethodTable__m_dwFlags], METHODTABLE_EQUIVALENCE_FLAGS
+ jne SlowPath
+endif ; METHODTABLE_EQUIVALENCE_FLAGS gt 0
+
+ ; we didn't find a match in the ParentMethodTable hierarchy
+ ; and it isn't a proxy and doesn't have type equivalence, return NULL
+ xor eax, eax
+ ret
+if METHODTABLE_EQUIVALENCE_FLAGS gt 0
+ SlowPath:
+ ; Set up the args to call JITutil_IsInstanceOfAny. Note that rcx already contains
+ ; the MethodTable*
+ mov rdx, rax ; rdx = Object*
+
+ ; Call out to JITutil_IsInstanceOfAny to handle the proxy/equivalence case.
+ jmp JITutil_IsInstanceOfAny
+endif ; METHODTABLE_EQUIVALENCE_FLAGS gt 0
+ ; if it is a null instance then rax is null
+ ; if they match then rax contains the instance
+ align 16
+ IsInst:
+ REPRET
+LEAF_END JIT_IsInstanceOfClass2, _TEXT
+
+; TODO: this is not necessary... we will be calling JIT_ChkCastClass2 all of the time
+; now that the JIT inlines the null check and the exact MT comparison... Or are
+; they only doing it on the IBC hot path??? Look into that. If it will turn out
+; to be cold then put it down at the bottom.
+
+;EXTERN_C Object* JIT_ChkCastClass(MethodTable* pMT, Object* pObject);
+LEAF_ENTRY JIT_ChkCastClass, _TEXT
+ ; check if the instance is null
+ test rdx, rdx
+ je IsNullInst
+
+ ; check if the MethodTable for the instance matches pMT
+ cmp rcx, qword ptr [rdx]
+ jne JIT_ChkCastClassSpecial
+
+ IsNullInst:
+ ; setup the return value for a match or null
+ mov rax, rdx
+ ret
+LEAF_END JIT_ChkCastClass, _TEXT
+
+LEAF_ENTRY JIT_ChkCastClassSpecial, _TEXT
+ ; save off the instance in case it is a proxy, and to setup
+ ; our return value for a match
+ mov rax, rdx
+
+ ; check if the parent class matches.
+ ; start by putting the MethodTable for the instance in rdx
+ mov rdx, qword ptr [rdx]
+ align 16
+ CheckParent:
+ ; NULL parent MethodTable* indicates that we're at the top of the hierarchy
+
+ ; unroll 0
+ mov rdx, qword ptr [rdx + OFFSETOF__MethodTable__m_pParentMethodTable]
+ cmp rcx, rdx
+ je IsInst
+ test rdx, rdx
+ je DoneWithLoop
+
+ ; unroll 1
+ mov rdx, qword ptr [rdx + OFFSETOF__MethodTable__m_pParentMethodTable]
+ cmp rcx, rdx
+ je IsInst
+ test rdx, rdx
+ je DoneWithLoop
+
+ ; unroll 2
+ mov rdx, qword ptr [rdx + OFFSETOF__MethodTable__m_pParentMethodTable]
+ cmp rcx, rdx
+ je IsInst
+ test rdx, rdx
+ je DoneWithLoop
+
+ ; unroll 3
+ mov rdx, qword ptr [rdx + OFFSETOF__MethodTable__m_pParentMethodTable]
+ cmp rcx, rdx
+ je IsInst
+ test rdx, rdx
+ jne CheckParent
+
+ align 16
+ DoneWithLoop:
+ ; Set up the args to call JITutil_ChkCastAny. Note that rcx already contains the MethodTable*
+ mov rdx, rax ; rdx = Object*
+
+ ; Call out to JITutil_ChkCastAny to handle the proxy case and throw a rich
+ ; InvalidCastException in case of failure.
+ jmp JITutil_ChkCastAny
+
+ ; if it is a null instance then rax is null
+ ; if they match then rax contains the instance
+ align 16
+ IsInst:
+ REPRET
+LEAF_END JIT_ChkCastClassSpecial, _TEXT
+
+FIX_INDIRECTION macro Reg
+ifdef FEATURE_PREJIT
+ test Reg, 1
+ jz @F
+ mov Reg, [Reg-1]
+ @@:
+endif
+endm
+
+; PERF TODO: consider prefetching the entire interface map into the cache
+
+; For all bizarre castes this quickly fails and falls back onto the JITutil_IsInstanceOfAny
+; helper, this means that all failure cases take the slow path as well.
+;
+; This can trash r10/r11
+LEAF_ENTRY JIT_IsInstanceOfInterface, _TEXT
+ test rdx, rdx
+ jz IsNullInst
+
+ ; get methodtable
+ mov rax, [rdx]
+ mov r11w, word ptr [rax + OFFSETOF__MethodTable__m_wNumInterfaces]
+
+ test r11w, r11w
+ jz DoBizarre
+
+ ; fetch interface map ptr
+ mov rax, [rax + OFFSETOF__MethodTable__m_pInterfaceMap]
+
+ ; r11 holds number of interfaces
+ ; rax is pointer to beginning of interface map list
+ align 16
+ Top:
+ ; rax -> InterfaceInfo_t* into the interface map, aligned to 4 entries
+ ; use offsets of SIZEOF__InterfaceInfo_t to get at entry 1, 2, 3 in this
+ ; block. If we make it through the full 4 without a hit we'll move to
+ ; the next block of 4 and try again.
+
+ ; unroll 0
+ifdef FEATURE_PREJIT
+ mov r10, [rax + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+ FIX_INDIRECTION r10
+ cmp rcx, r10
+else
+ cmp rcx, [rax + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+endif
+ je Found
+ ; move to next entry in list
+ dec r11w
+ jz DoBizarre
+
+ ; unroll 1
+ifdef FEATURE_PREJIT
+ mov r10, [rax + SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+ FIX_INDIRECTION r10
+ cmp rcx, r10
+else
+ cmp rcx, [rax + SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+endif
+ je Found
+ ; move to next entry in list
+ dec r11w
+ jz DoBizarre
+
+ ; unroll 2
+ifdef FEATURE_PREJIT
+ mov r10, [rax + 2 * SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+ FIX_INDIRECTION r10
+ cmp rcx, r10
+else
+ cmp rcx, [rax + 2 * SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+endif
+ je Found
+ ; move to next entry in list
+ dec r11w
+ jz DoBizarre
+
+ ; unroll 3
+ifdef FEATURE_PREJIT
+ mov r10, [rax + 3 * SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+ FIX_INDIRECTION r10
+ cmp rcx, r10
+else
+ cmp rcx, [rax + 3 * SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+endif
+ je Found
+ ; move to next entry in list
+ dec r11w
+ jz DoBizarre
+
+ ; if we didn't find the entry in this loop jump to the next 4 entries in the map
+ add rax, 4 * SIZEOF__InterfaceInfo_t
+ jmp Top
+
+ DoBizarre:
+ mov rax, [rdx]
+ test dword ptr [rax + OFFSETOF__MethodTable__m_dwFlags], METHODTABLE_NONTRIVIALINTERFACECAST_FLAGS
+ jnz NonTrivialCast
+ xor rax,rax
+ ret
+
+ align 16
+ Found:
+ IsNullInst:
+ ; return the successful instance
+ mov rax, rdx
+ ret
+
+ NonTrivialCast:
+ jmp JITutil_IsInstanceOfInterface
+LEAF_END JIT_IsInstanceOfInterface, _TEXT
+
+; For all bizarre castes this quickly fails and falls back onto the JITutil_ChkCastInterface
+; helper, this means that all failure cases take the slow path as well.
+;
+; This can trash r10/r11
+LEAF_ENTRY JIT_ChkCastInterface, _TEXT
+ test rdx, rdx
+ jz IsNullInst
+
+ ; get methodtable
+ mov rax, [rdx]
+ mov r11w, word ptr [rax + OFFSETOF__MethodTable__m_wNumInterfaces]
+
+ ; speculatively fetch interface map ptr
+ mov rax, [rax + OFFSETOF__MethodTable__m_pInterfaceMap]
+
+ test r11w, r11w
+ jz DoBizarre
+
+ ; r11 holds number of interfaces
+ ; rax is pointer to beginning of interface map list
+ align 16
+ Top:
+ ; rax -> InterfaceInfo_t* into the interface map, aligned to 4 entries
+ ; use offsets of SIZEOF__InterfaceInfo_t to get at entry 1, 2, 3 in this
+ ; block. If we make it through the full 4 without a hit we'll move to
+ ; the next block of 4 and try again.
+
+ ; unroll 0
+ifdef FEATURE_PREJIT
+ mov r10, [rax + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+ FIX_INDIRECTION r10
+ cmp rcx, r10
+else
+ cmp rcx, [rax + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+endif
+ je Found
+ ; move to next entry in list
+ dec r11w
+ jz DoBizarre
+
+ ; unroll 1
+ifdef FEATURE_PREJIT
+ mov r10, [rax + SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+ FIX_INDIRECTION r10
+ cmp rcx, r10
+else
+ cmp rcx, [rax + SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+endif
+ je Found
+ ; move to next entry in list
+ dec r11w
+ jz DoBizarre
+
+ ; unroll 2
+ifdef FEATURE_PREJIT
+ mov r10, [rax + 2 * SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+ FIX_INDIRECTION r10
+ cmp rcx, r10
+else
+ cmp rcx, [rax + 2 * SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+endif
+ je Found
+ ; move to next entry in list
+ dec r11w
+ jz DoBizarre
+
+ ; unroll 3
+ifdef FEATURE_PREJIT
+ mov r10, [rax + 3 * SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+ FIX_INDIRECTION r10
+ cmp rcx, r10
+else
+ cmp rcx, [rax + 3 * SIZEOF__InterfaceInfo_t + OFFSETOF__InterfaceInfo_t__m_pMethodTable]
+endif
+ je Found
+ ; move to next entry in list
+ dec r11w
+ jz DoBizarre
+
+ ; if we didn't find the entry in this loop jump to the next 4 entries in the map
+ add rax, 4 * SIZEOF__InterfaceInfo_t
+ jmp Top
+
+ DoBizarre:
+ jmp JITutil_ChkCastInterface
+
+ align 16
+ Found:
+ IsNullInst:
+ ; return either NULL or the successful instance
+ mov rax, rdx
+ ret
+LEAF_END JIT_ChkCastInterface, _TEXT
+
+; There is an even more optimized version of these helpers possible which takes
+; advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
+; that check (this is more significant in the JIT_WriteBarrier case).
+;
+; Additionally we can look into providing helpers which will take the src/dest from
+; specific registers (like x86) which _could_ (??) make for easier register allocation
+; for the JIT64, however it might lead to having to have some nasty code that treats
+; these guys really special like... :(.
+;
+; Version that does the move, checks whether or not it's in the GC and whether or not
+; it needs to have it's card updated
+;
+; void JIT_CheckedWriteBarrier(Object** dst, Object* src)
+LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
+
+ ; When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
+ ; but if it isn't then it will just return.
+ ;
+ ; See if this is in GCHeap
+ cmp rcx, [g_lowest_address]
+ jb NotInHeap
+ cmp rcx, [g_highest_address]
+ jnb NotInHeap
+
+ jmp JIT_WriteBarrier
+
+ NotInHeap:
+ ; See comment above about possible AV
+ mov [rcx], rdx
+ ret
+LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
+
+; Mark start of the code region that we patch at runtime
+LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
+ ret
+LEAF_END JIT_PatchedCodeStart, _TEXT
+
+
+; This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow
+; or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_
+; change at runtime as the GC changes. Initially it should simply be a copy of the
+; larger of the two functions (JIT_WriteBarrier_PostGrow) to ensure we have created
+; enough space to copy that code in.
+LEAF_ENTRY JIT_WriteBarrier, _TEXT
+ align 16
+
+ifdef _DEBUG
+ ; In debug builds, this just contains jump to the debug version of the write barrier by default
+ jmp JIT_WriteBarrier_Debug
+endif
+
+ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ ; JIT_WriteBarrier_WriteWatch_PostGrow64
+
+ ; Regarding patchable constants:
+ ; - 64-bit constants have to be loaded into a register
+ ; - The constants have to be aligned to 8 bytes so that they can be patched easily
+ ; - The constant loads have been located to minimize NOP padding required to align the constants
+ ; - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
+ ; non-volatile calling convention, this should be changed to use just one register.
+
+ ; Do the move into the GC . It is correct to take an AV here, the EH code
+ ; figures out that this came from a WriteBarrier and correctly maps it back
+ ; to the managed method which called the WriteBarrier (see setup in
+ ; InitializeExceptionHandling, vm\exceptionhandling.cpp).
+ mov [rcx], rdx
+
+ ; Update the write watch table if necessary
+ mov rax, rcx
+ mov r8, 0F0F0F0F0F0F0F0F0h
+ shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift
+ NOP_2_BYTE ; padding for alignment of constant
+ mov r9, 0F0F0F0F0F0F0F0F0h
+ add rax, r8
+ cmp byte ptr [rax], 0h
+ jne CheckCardTable
+ mov byte ptr [rax], 0FFh
+
+ NOP_3_BYTE ; padding for alignment of constant
+
+ ; Check the lower and upper ephemeral region bounds
+ CheckCardTable:
+ cmp rdx, r9
+ jb Exit
+
+ NOP_3_BYTE ; padding for alignment of constant
+
+ mov r8, 0F0F0F0F0F0F0F0F0h
+
+ cmp rdx, r8
+ jae Exit
+
+ nop ; padding for alignment of constant
+
+ mov rax, 0F0F0F0F0F0F0F0F0h
+
+ ; Touch the card table entry, if not already dirty.
+ shr rcx, 0Bh
+ cmp byte ptr [rcx + rax], 0FFh
+ jne UpdateCardTable
+ REPRET
+
+ UpdateCardTable:
+ mov byte ptr [rcx + rax], 0FFh
+ ret
+
+ align 16
+ Exit:
+ REPRET
+else
+ ; JIT_WriteBarrier_PostGrow64
+
+ ; Do the move into the GC . It is correct to take an AV here, the EH code
+ ; figures out that this came from a WriteBarrier and correctly maps it back
+ ; to the managed method which called the WriteBarrier (see setup in
+ ; InitializeExceptionHandling, vm\exceptionhandling.cpp).
+ mov [rcx], rdx
+
+ NOP_3_BYTE ; padding for alignment of constant
+
+ ; Can't compare a 64 bit immediate, so we have to move them into a
+ ; register. Values of these immediates will be patched at runtime.
+ ; By using two registers we can pipeline better. Should we decide to use
+ ; a special non-volatile calling convention, this should be changed to
+ ; just one.
+
+ mov rax, 0F0F0F0F0F0F0F0F0h
+
+ ; Check the lower and upper ephemeral region bounds
+ cmp rdx, rax
+ jb Exit
+
+ nop ; padding for alignment of constant
+
+ mov r8, 0F0F0F0F0F0F0F0F0h
+
+ cmp rdx, r8
+ jae Exit
+
+ nop ; padding for alignment of constant
+
+ mov rax, 0F0F0F0F0F0F0F0F0h
+
+ ; Touch the card table entry, if not already dirty.
+ shr rcx, 0Bh
+ cmp byte ptr [rcx + rax], 0FFh
+ jne UpdateCardTable
+ REPRET
+
+ UpdateCardTable:
+ mov byte ptr [rcx + rax], 0FFh
+ ret
+
+ align 16
+ Exit:
+ REPRET
+endif
+
+ ; make sure this guy is bigger than any of the other guys
+ align 16
+ nop
+LEAF_END_MARKED JIT_WriteBarrier, _TEXT
+
+ifndef FEATURE_IMPLICIT_TLS
+LEAF_ENTRY GetThread, _TEXT
+ ; the default implementation will just jump to one that returns null until
+ ; MakeOptimizedTlsGetter is run which will overwrite this with the actual
+ ; implementation.
+ jmp short GetTLSDummy
+
+ ;
+ ; insert enough NOPS to be able to insert the largest optimized TLS getter
+ ; that we might need, it is important that the TLS getter doesn't overwrite
+ ; into the dummy getter.
+ ;
+ db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh)
+
+LEAF_END GetThread, _TEXT
+
+LEAF_ENTRY GetAppDomain, _TEXT
+ ; the default implementation will just jump to one that returns null until
+ ; MakeOptimizedTlsGetter is run which will overwrite this with the actual
+ ; implementation.
+ jmp short GetTLSDummy
+
+ ;
+ ; insert enough NOPS to be able to insert the largest optimized TLS getter
+ ; that we might need, it is important that the TLS getter doesn't overwrite
+ ; into the dummy getter.
+ ;
+ db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh)
+
+LEAF_END GetAppDomain, _TEXT
+
+LEAF_ENTRY GetTLSDummy, _TEXT
+ xor rax, rax
+ ret
+LEAF_END GetTLSDummy, _TEXT
+
+LEAF_ENTRY ClrFlsGetBlock, _TEXT
+ ; the default implementation will just jump to one that returns null until
+ ; MakeOptimizedTlsGetter is run which will overwrite this with the actual
+ ; implementation.
+ jmp short GetTLSDummy
+
+ ;
+ ; insert enough NOPS to be able to insert the largest optimized TLS getter
+ ; that we might need, it is important that the TLS getter doesn't overwrite
+ ; into the dummy getter.
+ ;
+ db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh)
+
+LEAF_END ClrFlsGetBlock, _TEXT
+endif
+
+; Mark start of the code region that we patch at runtime
+LEAF_ENTRY JIT_PatchedCodeLast, _TEXT
+ ret
+LEAF_END JIT_PatchedCodeLast, _TEXT
+
+; JIT_ByRefWriteBarrier has weird symantics, see usage in StubLinkerX86.cpp
+;
+; Entry:
+; RDI - address of ref-field (assigned to)
+; RSI - address of the data (source)
+; RCX is trashed
+; RAX is trashed when FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP is defined
+; Exit:
+; RDI, RSI are incremented by SIZEOF(LPVOID)
+LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
+ mov rcx, [rsi]
+
+; If !WRITE_BARRIER_CHECK do the write first, otherwise we might have to do some ShadowGC stuff
+ifndef WRITE_BARRIER_CHECK
+ ; rcx is [rsi]
+ mov [rdi], rcx
+endif
+
+ ; When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
+ ; but if it isn't then it will just return.
+ ;
+ ; See if this is in GCHeap
+ cmp rdi, [g_lowest_address]
+ jb NotInHeap
+ cmp rdi, [g_highest_address]
+ jnb NotInHeap
+
+ifdef WRITE_BARRIER_CHECK
+ ; we can only trash rcx in this function so in _DEBUG we need to save
+ ; some scratch registers.
+ push r10
+ push r11
+ push rax
+
+ ; **ALSO update the shadow GC heap if that is enabled**
+ ; Do not perform the work if g_GCShadow is 0
+ cmp g_GCShadow, 0
+ je NoShadow
+
+ ; If we end up outside of the heap don't corrupt random memory
+ mov r10, rdi
+ sub r10, [g_lowest_address]
+ jb NoShadow
+
+ ; Check that our adjusted destination is somewhere in the shadow gc
+ add r10, [g_GCShadow]
+ cmp r10, [g_GCShadowEnd]
+ ja NoShadow
+
+ ; Write ref into real GC
+ mov [rdi], rcx
+ ; Write ref into shadow GC
+ mov [r10], rcx
+
+ ; Ensure that the write to the shadow heap occurs before the read from
+ ; the GC heap so that race conditions are caught by INVALIDGCVALUE
+ mfence
+
+ ; Check that GC/ShadowGC values match
+ mov r11, [rdi]
+ mov rax, [r10]
+ cmp rax, r11
+ je DoneShadow
+ mov r11, INVALIDGCVALUE
+ mov [r10], r11
+
+ jmp DoneShadow
+
+ ; If we don't have a shadow GC we won't have done the write yet
+ NoShadow:
+ mov [rdi], rcx
+
+ ; If we had a shadow GC then we already wrote to the real GC at the same time
+ ; as the shadow GC so we want to jump over the real write immediately above.
+ ; Additionally we know for sure that we are inside the heap and therefore don't
+ ; need to replicate the above checks.
+ DoneShadow:
+ pop rax
+ pop r11
+ pop r10
+endif
+
+ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+ ; Update the write watch table if necessary
+ cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
+ je CheckCardTable
+ mov rax, rdi
+ shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift
+ add rax, qword ptr [g_sw_ww_table]
+ cmp byte ptr [rax], 0h
+ jne CheckCardTable
+ mov byte ptr [rax], 0FFh
+endif
+
+ ; See if we can just quick out
+ CheckCardTable:
+ cmp rcx, [g_ephemeral_low]
+ jb Exit
+ cmp rcx, [g_ephemeral_high]
+ jnb Exit
+
+ ; move current rdi value into rcx and then increment the pointers
+ mov rcx, rdi
+ add rsi, 8h
+ add rdi, 8h
+
+ ; Check if we need to update the card table
+ ; Calc pCardByte
+ shr rcx, 0Bh
+ add rcx, [g_card_table]
+
+ ; Check if this card is dirty
+ cmp byte ptr [rcx], 0FFh
+ jne UpdateCardTable
+ REPRET
+
+ UpdateCardTable:
+ mov byte ptr [rcx], 0FFh
+ ret
+
+ align 16
+ NotInHeap:
+; If WRITE_BARRIER_CHECK then we won't have already done the mov and should do it here
+; If !WRITE_BARRIER_CHECK we want _NotInHeap and _Leave to be the same and have both
+; 16 byte aligned.
+ifdef WRITE_BARRIER_CHECK
+ ; rcx is [rsi]
+ mov [rdi], rcx
+endif
+ Exit:
+ ; Increment the pointers before leaving
+ add rdi, 8h
+ add rsi, 8h
+ ret
+LEAF_END JIT_ByRefWriteBarrier, _TEXT
+
+
+g_pObjectClass equ ?g_pObjectClass@@3PEAVMethodTable@@EA
+
+EXTERN g_pObjectClass:qword
+extern ArrayStoreCheck:proc
+extern ObjIsInstanceOfNoGC:proc
+
+; TODO: put definition for this in asmconstants.h
+CanCast equ 1
+
+;__declspec(naked) void F_CALL_CONV JIT_Stelem_Ref(PtrArray* array, unsigned idx, Object* val)
+LEAF_ENTRY JIT_Stelem_Ref, _TEXT
+ ; check for null PtrArray*
+ test rcx, rcx
+ je ThrowNullReferenceException
+
+ ; we only want the lower 32-bits of edx, it might be dirty
+ or edx, edx
+
+ ; check that index is in bounds
+ cmp edx, dword ptr [rcx + OFFSETOF__PtrArray__m_NumComponents] ; 8h -> array size offset
+ jae ThrowIndexOutOfRangeException
+
+ ; r10 = Array MT
+ mov r10, [rcx]
+
+ ; if we're assigning a null object* then we don't need a write barrier
+ test r8, r8
+ jz AssigningNull
+
+ifdef CHECK_APP_DOMAIN_LEAKS
+ ; get Array TypeHandle
+ mov r9, [r10 + OFFSETOF__MethodTable__m_ElementType] ; 10h -> typehandle offset
+ ; check for non-MT
+ test r9, 2
+ jnz NoCheck
+
+ ; Check VMflags of element type
+ mov r9, [r9 + OFFSETOF__MethodTable__m_pEEClass]
+ mov r9d, dword ptr [r9 + OFFSETOF__EEClass__m_wAuxFlags]
+ test r9d, EEClassFlags
+ jnz ArrayStoreCheck_Helper
+
+ NoCheck:
+endif
+
+ mov r9, [r10 + OFFSETOF__MethodTable__m_ElementType] ; 10h -> typehandle offset
+
+ ; check for exact match
+ cmp r9, [r8]
+ jne NotExactMatch
+
+ DoWrite:
+ lea rcx, [rcx + 8*rdx + OFFSETOF__PtrArray__m_Array]
+ mov rdx, r8
+
+ ; JIT_WriteBarrier(Object** dst, Object* src)
+ jmp JIT_WriteBarrier
+
+ AssigningNull:
+ ; write barrier is not needed for assignment of NULL references
+ mov [rcx + 8*rdx + OFFSETOF__PtrArray__m_Array], r8
+ ret
+
+ NotExactMatch:
+ cmp r9, [g_pObjectClass]
+ je DoWrite
+
+ jmp JIT_Stelem_Ref__ObjIsInstanceOfNoGC_Helper
+
+ ThrowNullReferenceException:
+ mov rcx, CORINFO_NullReferenceException_ASM
+ jmp JIT_InternalThrow
+
+ ThrowIndexOutOfRangeException:
+ mov rcx, CORINFO_IndexOutOfRangeException_ASM
+ jmp JIT_InternalThrow
+LEAF_END JIT_Stelem_Ref, _TEXT
+
+NESTED_ENTRY JIT_Stelem_Ref__ObjIsInstanceOfNoGC_Helper, _TEXT
+ alloc_stack MIN_SIZE
+ save_reg_postrsp rcx, MIN_SIZE + 8h
+ save_reg_postrsp rdx, MIN_SIZE + 10h
+ save_reg_postrsp r8, MIN_SIZE + 18h
+ END_PROLOGUE
+
+ ; need to get TypeHandle before setting rcx to be the Obj* because that trashes the PtrArray*
+ mov rdx, r9
+ mov rcx, r8
+
+ ; TypeHandle::CastResult ObjIsInstanceOfNoGC(Object *pElement, TypeHandle toTypeHnd)
+ call ObjIsInstanceOfNoGC
+
+ mov rcx, [rsp + MIN_SIZE + 8h]
+ mov rdx, [rsp + MIN_SIZE + 10h]
+ mov r8, [rsp + MIN_SIZE + 18h]
+
+ cmp eax, CanCast
+ jne NeedCheck
+
+ lea rcx, [rcx + 8*rdx + OFFSETOF__PtrArray__m_Array]
+ mov rdx, r8
+ add rsp, MIN_SIZE
+
+ ; JIT_WriteBarrier(Object** dst, Object* src)
+ jmp JIT_WriteBarrier
+
+ NeedCheck:
+ add rsp, MIN_SIZE
+ jmp JIT_Stelem_Ref__ArrayStoreCheck_Helper
+NESTED_END JIT_Stelem_Ref__ObjIsInstanceOfNoGC_Helper, _TEXT
+
+; Need to save r8 to provide a stack address for the Object*
+NESTED_ENTRY JIT_Stelem_Ref__ArrayStoreCheck_Helper, _TEXT
+ alloc_stack MIN_SIZE
+ save_reg_postrsp rcx, MIN_SIZE + 8h
+ save_reg_postrsp rdx, MIN_SIZE + 10h
+ save_reg_postrsp r8, MIN_SIZE + 18h
+ END_PROLOGUE
+
+ lea rcx, [rsp + MIN_SIZE + 18h]
+ lea rdx, [rsp + MIN_SIZE + 8h]
+
+ ; HCIMPL2(FC_INNER_RET, ArrayStoreCheck, Object** pElement, PtrArray** pArray)
+ call ArrayStoreCheck
+
+ mov rcx, [rsp + MIN_SIZE + 8h]
+ mov rdx, [rsp + MIN_SIZE + 10h]
+ mov r8, [rsp + MIN_SIZE + 18h]
+
+ lea rcx, [rcx + 8*rdx + OFFSETOF__PtrArray__m_Array]
+ mov rdx, r8
+ add rsp, MIN_SIZE
+
+ ; JIT_WriteBarrier(Object** dst, Object* src)
+ jmp JIT_WriteBarrier
+
+NESTED_END JIT_Stelem_Ref__ArrayStoreCheck_Helper, _TEXT
+
+
+extern JIT_FailFast:proc
+extern s_gsCookie:qword
+
+OFFSETOF_GSCOOKIE equ 0h
+OFFSETOF_FRAME equ OFFSETOF_GSCOOKIE + \
+ 8h
+
+;
+; incoming:
+;
+; rsp -> return address
+; :
+;
+; Stack Layout:
+;
+; rsp-> callee scratch
+; + 8h callee scratch
+; +10h callee scratch
+; +18h callee scratch
+; :
+; stack arguments
+; :
+; r13-> gsCookie
+; + 8h __VFN_table
+; +10h m_Next
+; +18h m_pGCLayout
+; +20h m_padding
+; +28h m_rdi
+; +30h m_rsi
+; +38h m_rbx
+; +40h m_rbp
+; +48h m_r12
+; +50h m_r13
+; +58h m_r14
+; +60h m_r15
+; +68h m_ReturnAddress
+; r12 -> // Caller's SP
+;
+; r14 = GetThread();
+; r15 = GetThread()->GetFrame(); // For restoring/popping the frame
+;
+NESTED_ENTRY TailCallHelperStub, _TEXT
+ PUSH_CALLEE_SAVED_REGISTERS
+
+ alloc_stack 48h ; m_padding, m_pGCLayout, m_Next, __VFN_table, gsCookie, outgoing shadow area
+
+ set_frame r13, 20h
+ END_PROLOGUE
+
+ ;
+ ; This part is never executed, but we keep it here for reference
+ ;
+ int 3
+
+if 0 ne 0
+ ; Save the caller's SP
+ mov r12, rsp + ...
+
+ ;
+ ; fully initialize the TailCallFrame
+ ;
+ call TCF_GETMETHODFRAMEVPTR
+ mov [r13 + OFFSETOF_FRAME], rax
+
+ mov rax, s_gsCookie
+ mov [r13 + OFFSETOF_GSCOOKIE], rax
+
+ ;
+ ; link the TailCallFrame
+ ;
+ CALL_GETTHREAD
+ mov r14, rax
+ mov r15, [rax + OFFSETOF__Thread__m_pFrame]
+ mov [r13 + OFFSETOF_FRAME + OFFSETOF__Frame__m_Next], r15
+ lea r10, [r13 + OFFSETOF_FRAME]
+ mov [rax + OFFSETOF__Thread__m_pFrame], r10
+endif
+
+ ; the pretend call would be here
+ ; with the return address pointing this this real epilog
+
+PATCH_LABEL JIT_TailCallHelperStub_ReturnAddress
+
+ ; our epilog (which also unlinks the TailCallFrame)
+
+ifdef _DEBUG
+ mov rcx, s_gsCookie
+ cmp [r13 + OFFSETOF_GSCookie], rcx
+ je GoodGSCookie
+ call JIT_FailFast
+GoodGSCookie:
+endif ; _DEBUG
+
+ ;
+ ; unlink the TailCallFrame
+ ;
+ mov [r14 + OFFSETOF__Thread__m_pFrame], r15
+
+ ;
+ ; epilog
+ ;
+
+ lea rsp, [r13 + 28h]
+ POP_CALLEE_SAVED_REGISTERS
+ ret
+
+NESTED_END TailCallHelperStub, _TEXT
+
+ end
+