diff options
Diffstat (limited to 'src/vm/amd64/jithelpers_fast.S')
-rw-r--r-- | src/vm/amd64/jithelpers_fast.S | 473 |
1 files changed, 473 insertions, 0 deletions
diff --git a/src/vm/amd64/jithelpers_fast.S b/src/vm/amd64/jithelpers_fast.S new file mode 100644 index 0000000000..8076655ad9 --- /dev/null +++ b/src/vm/amd64/jithelpers_fast.S @@ -0,0 +1,473 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +.intel_syntax noprefix +#include "unixasmmacros.inc" +#include "asmconstants.h" + +// Mark start of the code region that we patch at runtime +LEAF_ENTRY JIT_PatchedCodeStart, _TEXT + ret +LEAF_END JIT_PatchedCodeStart, _TEXT + + +// There is an even more optimized version of these helpers possible which takes +// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2 +// that check (this is more significant in the JIT_WriteBarrier case). +// +// Additionally we can look into providing helpers which will take the src/dest from +// specific registers (like x86) which _could_ (??) make for easier register allocation +// for the JIT64, however it might lead to having to have some nasty code that treats +// these guys really special like... :(. +// +// Version that does the move, checks whether or not it's in the GC and whether or not +// it needs to have it's card updated +// +// void JIT_CheckedWriteBarrier(Object** dst, Object* src) +LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT + + // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference + // but if it isn't then it will just return. + // + // See if this is in GCHeap + PREPARE_EXTERNAL_VAR g_lowest_address, rax + cmp rdi, [rax] + // jb NotInHeap + .byte 0x72, 0x0e + PREPARE_EXTERNAL_VAR g_highest_address, rax + cmp rdi, [rax] + // jnb NotInHeap + .byte 0x73, 0x02 + + // call C_FUNC(JIT_WriteBarrier) + .byte 0xeb, 0x05 + + NotInHeap: + // See comment above about possible AV + mov [rdi], rsi + ret +LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT + + +// This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow +// or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_ +// change at runtime as the GC changes. Initially it should simply be a copy of the +// larger of the two functions (JIT_WriteBarrier_PostGrow) to ensure we have created +// enough space to copy that code in. +.balign 16 +LEAF_ENTRY JIT_WriteBarrier, _TEXT +#ifdef _DEBUG + // In debug builds, this just contains jump to the debug version of the write barrier by default + jmp C_FUNC(JIT_WriteBarrier_Debug) +#endif + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // JIT_WriteBarrier_WriteWatch_PostGrow64 + + // Regarding patchable constants: + // - 64-bit constants have to be loaded into a register + // - The constants have to be aligned to 8 bytes so that they can be patched easily + // - The constant loads have been located to minimize NOP padding required to align the constants + // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special + // non-volatile calling convention, this should be changed to use just one register. + + // Do the move into the GC . It is correct to take an AV here, the EH code + // figures out that this came from a WriteBarrier and correctly maps it back + // to the managed method which called the WriteBarrier (see setup in + // InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rdi], rsi + + // Update the write watch table if necessary + mov rax, rdi + movabs r10, 0xF0F0F0F0F0F0F0F0 + shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift + NOP_2_BYTE // padding for alignment of constant + movabs r11, 0xF0F0F0F0F0F0F0F0 + add rax, r10 + cmp byte ptr [rax], 0h + .byte 0x75, 0x06 + // jne CheckCardTable + mov byte ptr [rax], 0FFh + + NOP_3_BYTE // padding for alignment of constant + + // Check the lower and upper ephemeral region bounds + CheckCardTable: + cmp rsi, r11 + .byte 0x72,0x3D + // jb Exit + + NOP_3_BYTE // padding for alignment of constant + + movabs r10, 0xF0F0F0F0F0F0F0F0 + + cmp rsi, r10 + .byte 0x73,0x2B + // jae Exit + + nop // padding for alignment of constant + + movabs rax, 0xF0F0F0F0F0F0F0F0 + + // Touch the card table entry, if not already dirty. + shr rdi, 0x0B + cmp byte ptr [rdi + rax], 0FFh + .byte 0x75, 0x02 + // jne UpdateCardTable + REPRET + + UpdateCardTable: + mov byte ptr [rdi + rax], 0FFh + ret + + .balign 16 + Exit: + REPRET +#else + // JIT_WriteBarrier_PostGrow64 + + // Do the move into the GC . It is correct to take an AV here, the EH code + // figures out that this came from a WriteBarrier and correctly maps it back + // to the managed method which called the WriteBarrier (see setup in + // InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rdi], rsi + + NOP_3_BYTE // padding for alignment of constant + + // Can't compare a 64 bit immediate, so we have to move them into a + // register. Values of these immediates will be patched at runtime. + // By using two registers we can pipeline better. Should we decide to use + // a special non-volatile calling convention, this should be changed to + // just one. + + movabs rax, 0xF0F0F0F0F0F0F0F0 + + // Check the lower and upper ephemeral region bounds + cmp rsi, rax + // jb Exit + .byte 0x72, 0x36 + + nop // padding for alignment of constant + + movabs r8, 0xF0F0F0F0F0F0F0F0 + + cmp rsi, r8 + // jae Exit + .byte 0x73, 0x26 + + nop // padding for alignment of constant + + movabs rax, 0xF0F0F0F0F0F0F0F0 + + // Touch the card table entry, if not already dirty. + shr rdi, 0Bh + cmp byte ptr [rdi + rax], 0FFh + // jne UpdateCardTable + .byte 0x75, 0x02 + REPRET + + UpdateCardTable: + mov byte ptr [rdi + rax], 0FFh + ret + + .balign 16 + Exit: + REPRET +#endif + + // make sure this guy is bigger than any of the other guys + .balign 16 + nop +LEAF_END_MARKED JIT_WriteBarrier, _TEXT + +// Mark start of the code region that we patch at runtime +LEAF_ENTRY JIT_PatchedCodeLast, _TEXT + ret +LEAF_END JIT_PatchedCodeLast, _TEXT + +// JIT_ByRefWriteBarrier has weird symantics, see usage in StubLinkerX86.cpp +// +// Entry: +// RDI - address of ref-field (assigned to) +// RSI - address of the data (source) +// +// Note: RyuJIT assumes that all volatile registers can be trashed by +// the CORINFO_HELP_ASSIGN_BYREF helper (i.e. JIT_ByRefWriteBarrier). +// The precise set is defined by RBM_CALLEE_TRASH. +// +// RCX is trashed +// RAX is trashed +// R10 is trashed +// R11 is trashed on Debug build +// Exit: +// RDI, RSI are incremented by SIZEOF(LPVOID) +LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT + mov rcx, [rsi] + +// If !WRITE_BARRIER_CHECK do the write first, otherwise we might have to do some ShadowGC stuff +#ifndef WRITE_BARRIER_CHECK + // rcx is [rsi] + mov [rdi], rcx +#endif + + // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference + // but if it isn't then it will just return. + // + // See if this is in GCHeap + PREPARE_EXTERNAL_VAR g_lowest_address, rax + cmp rdi, [rax] + jb NotInHeap_ByRefWriteBarrier + PREPARE_EXTERNAL_VAR g_highest_address, rax + cmp rdi, [rax] + jnb NotInHeap_ByRefWriteBarrier + +#ifdef WRITE_BARRIER_CHECK + // **ALSO update the shadow GC heap if that is enabled** + // Do not perform the work if g_GCShadow is 0 + PREPARE_EXTERNAL_VAR g_GCShadow, rax + cmp qword ptr [rax], 0 + je NoShadow_ByRefWriteBarrier + + // If we end up outside of the heap don't corrupt random memory + mov r10, rdi + PREPARE_EXTERNAL_VAR g_lowest_address, rax + sub r10, [rax] + jb NoShadow_ByRefWriteBarrier + + // Check that our adjusted destination is somewhere in the shadow gc + PREPARE_EXTERNAL_VAR g_GCShadow, rax + add r10, [rax] + PREPARE_EXTERNAL_VAR g_GCShadowEnd, rax + cmp r10, [rax] + ja NoShadow_ByRefWriteBarrier + + // Write ref into real GC + mov [rdi], rcx + // Write ref into shadow GC + mov [r10], rcx + + // Ensure that the write to the shadow heap occurs before the read from + // the GC heap so that race conditions are caught by INVALIDGCVALUE + mfence + + // Check that GC/ShadowGC values match + mov r11, [rdi] + mov rax, [r10] + cmp rax, r11 + je DoneShadow_ByRefWriteBarrier + movabs r11, INVALIDGCVALUE + mov [r10], r11 + + jmp DoneShadow_ByRefWriteBarrier + + // If we don't have a shadow GC we won't have done the write yet + NoShadow_ByRefWriteBarrier: + mov [rdi], rcx + + // If we had a shadow GC then we already wrote to the real GC at the same time + // as the shadow GC so we want to jump over the real write immediately above. + // Additionally we know for sure that we are inside the heap and therefore don't + // need to replicate the above checks. + DoneShadow_ByRefWriteBarrier: +#endif + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // Update the write watch table if necessary + PREPARE_EXTERNAL_VAR g_sw_ww_enabled_for_gc_heap, rax + cmp byte ptr [rax], 0h + je CheckCardTable_ByRefWriteBarrier + mov rax, rdi + shr rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift + PREPARE_EXTERNAL_VAR g_sw_ww_table, r10 + add rax, qword ptr [r10] + cmp byte ptr [rax], 0h + jne CheckCardTable_ByRefWriteBarrier + mov byte ptr [rax], 0FFh +#endif + + CheckCardTable_ByRefWriteBarrier: + // See if we can just quick out + PREPARE_EXTERNAL_VAR g_ephemeral_low, rax + cmp rcx, [rax] + jb Exit_ByRefWriteBarrier + PREPARE_EXTERNAL_VAR g_ephemeral_high, rax + cmp rcx, [rax] + jnb Exit_ByRefWriteBarrier + + // move current rdi value into rcx and then increment the pointers + mov rcx, rdi + add rsi, 8h + add rdi, 8h + + // Check if we need to update the card table + // Calc pCardByte + shr rcx, 0x0B + PREPARE_EXTERNAL_VAR g_card_table, rax + add rcx, [rax] + + // Check if this card is dirty + cmp byte ptr [rcx], 0FFh + jne UpdateCardTable_ByRefWriteBarrier + REPRET + + UpdateCardTable_ByRefWriteBarrier: + mov byte ptr [rcx], 0FFh + ret + + .balign 16 + NotInHeap_ByRefWriteBarrier: +// If WRITE_BARRIER_CHECK then we won't have already done the mov and should do it here +// If !WRITE_BARRIER_CHECK we want _NotInHeap and _Leave to be the same and have both +// 16 byte aligned. +#ifdef WRITE_BARRIER_CHECK + // rcx is [rsi] + mov [rdi], rcx +#endif + Exit_ByRefWriteBarrier: + // Increment the pointers before leaving + add rdi, 8h + add rsi, 8h + ret +LEAF_END JIT_ByRefWriteBarrier, _TEXT + +// TODO: put definition for this in asmconstants.h +#define CanCast 1 + +//__declspec(naked) void F_CALL_CONV JIT_Stelem_Ref(PtrArray* array, unsigned idx, Object* val) +.balign 16 +LEAF_ENTRY JIT_Stelem_Ref, _TEXT + // check for null PtrArray* + test rdi, rdi + je LOCAL_LABEL(ThrowNullReferenceException) + + // we only want the lower 32-bits of rsi, it might be dirty + or esi, esi + + // check that index is in bounds + cmp esi, dword ptr [rdi + OFFSETOF__PtrArray__m_NumComponents] // 8h -> array size offset + jae LOCAL_LABEL(ThrowIndexOutOfRangeException) + + // r10 = Array MT + mov r10, [rdi] + + // if we're assigning a null object* then we don't need a write barrier + test rdx, rdx + jz LOCAL_LABEL(AssigningNull) + +#ifdef CHECK_APP_DOMAIN_LEAKS + // get Array TypeHandle + mov rcx, [r10 + OFFSETOF__MethodTable__m_ElementType] // 10h -> typehandle offset, + // check for non-MT + test rcx, 2 + jnz LOCAL_LABEL(NoCheck) + + // Check VMflags of element type + mov rcx, [rcx + OFFSETOF__MethodTable__m_pEEClass] + mov ecx, dword ptr [rcx + OFFSETOF__EEClass__m_wAuxFlags] + test ecx, EEClassFlags + jnz C_FUNC(ArrayStoreCheck_Helper) + + LOCAL_LABEL(NoCheck): +#endif + + mov rcx, [r10 + OFFSETOF__MethodTable__m_ElementType] // 10h -> typehandle offset + + // check for exact match + cmp rcx, [rdx] + jne LOCAL_LABEL(NotExactMatch) + + LOCAL_LABEL(DoWrite): + lea rdi, [rdi + 8*rsi] + add rdi, OFFSETOF__PtrArray__m_Array + mov rsi, rdx + + // JIT_WriteBarrier(Object** dst, Object* src) + jmp C_FUNC(JIT_WriteBarrier) + + LOCAL_LABEL(AssigningNull): + // write barrier is not needed for assignment of NULL references + mov [rdi + 8*rsi + OFFSETOF__PtrArray__m_Array], rdx + ret + + LOCAL_LABEL(NotExactMatch): + PREPARE_EXTERNAL_VAR g_pObjectClass, r11 + cmp rcx, [r11] + je LOCAL_LABEL(DoWrite) + + jmp C_FUNC(JIT_Stelem_Ref__ObjIsInstanceOfNoGC_Helper) + + LOCAL_LABEL(ThrowNullReferenceException): + mov rdi, CORINFO_NullReferenceException_ASM + jmp C_FUNC(JIT_InternalThrow) + + LOCAL_LABEL(ThrowIndexOutOfRangeException): + mov rdi, CORINFO_IndexOutOfRangeException_ASM + jmp C_FUNC(JIT_InternalThrow) +LEAF_END JIT_Stelem_Ref, _TEXT + +LEAF_ENTRY JIT_Stelem_Ref__ObjIsInstanceOfNoGC_Helper, _TEXT + push_nonvol_reg rbp + mov rbp, rsp + set_cfa_register rbp, 16 + + sub rsp, 0x20 + mov [rbp - 0x08], rdi + mov [rbp - 0x10], rsi + mov [rbp - 0x18], rdx + + // need to get TypeHandle before setting rcx to be the Obj* because that trashes the PtrArray* + mov rsi, rcx + mov rdi, rdx + + // TypeHandle::CastResult ObjIsInstanceOfNoGC(Object *pElement, TypeHandle toTypeHnd) + call C_FUNC(ObjIsInstanceOfNoGC) + + mov rdi, [rbp - 0x08] + mov rsi, [rbp - 0x10] + mov rdx, [rbp - 0x18] + + RESET_FRAME_WITH_RBP + + cmp eax, CanCast + jne LOCAL_LABEL(NeedCheck) + + lea rdi, [rdi + 8*rsi] + add rdi, OFFSETOF__PtrArray__m_Array + mov rsi, rdx + + // JIT_WriteBarrier(Object** dst, Object* src) + jmp C_FUNC(JIT_WriteBarrier) + + LOCAL_LABEL(NeedCheck): + jmp C_FUNC(JIT_Stelem_Ref__ArrayStoreCheck_Helper) +LEAF_END JIT_Stelem_Ref__ObjIsInstanceOfNoGC_Helper, _TEXT + +// Need to save reg to provide a stack address for the Object* +LEAF_ENTRY JIT_Stelem_Ref__ArrayStoreCheck_Helper, _TEXT + push_nonvol_reg rbp + mov rbp, rsp + set_cfa_register rbp, 16 + + sub rsp, 0x20 + mov [rbp - 0x10], rdi + mov [rbp - 0x18], rsi + mov [rbp - 0x20], rdx + + mov rdi, rsp + lea rsi, [rbp - 0x10] + // HCIMPL2(FC_INNER_RET, ArrayStoreCheck, Object** pElement, PtrArray** pArray) + call C_FUNC(ArrayStoreCheck) + mov rdi, [rbp - 0x10] + mov rsi, [rbp - 0x18] + mov rdx, [rbp - 0x20] + + lea rdi, [rdi + 8*rsi] + add rdi, OFFSETOF__PtrArray__m_Array + mov rsi, rdx + + RESET_FRAME_WITH_RBP + + // JIT_WriteBarrier(Object** dst, Object* src) + jmp C_FUNC(JIT_WriteBarrier) +LEAF_END JIT_Stelem_Ref__ArrayStoreCheck_Helper, _TEXT |