; Licensed to the .NET Foundation under one or more agreements.
; The .NET Foundation licenses this file to you under the MIT license.
; See the LICENSE file in the project root for more information.
; ***********************************************************************
; File: JIThelp.asm
;
; ***********************************************************************
;
; *** NOTE: If you make changes to this file, propagate the changes to
; jithelp.s in this directory
; This contains JITinterface routines that are 100% x86 assembly
.586
.model flat
include asmconstants.inc
option casemap:none
.code
;
; @TODO Switch to g_ephemeral_low and g_ephemeral_high
; @TODO instead of g_lowest_address, g_highest address
;
ARGUMENT_REG1 equ ecx
ARGUMENT_REG2 equ edx
g_ephemeral_low TEXTEQU <_g_ephemeral_low>
g_ephemeral_high TEXTEQU <_g_ephemeral_high>
g_lowest_address TEXTEQU <_g_lowest_address>
g_highest_address TEXTEQU <_g_highest_address>
g_card_table TEXTEQU <_g_card_table>
WriteBarrierAssert TEXTEQU <_WriteBarrierAssert@8>
JIT_LLsh TEXTEQU <_JIT_LLsh@0>
JIT_LRsh TEXTEQU <_JIT_LRsh@0>
JIT_LRsz TEXTEQU <_JIT_LRsz@0>
JIT_LMul TEXTEQU <@JIT_LMul@16>
JIT_Dbl2LngOvf TEXTEQU <@JIT_Dbl2LngOvf@8>
JIT_Dbl2Lng TEXTEQU <@JIT_Dbl2Lng@8>
JIT_Dbl2IntSSE2 TEXTEQU <@JIT_Dbl2IntSSE2@8>
JIT_Dbl2LngP4x87 TEXTEQU <@JIT_Dbl2LngP4x87@8>
JIT_Dbl2LngSSE3 TEXTEQU <@JIT_Dbl2LngSSE3@8>
JIT_InternalThrowFromHelper TEXTEQU <@JIT_InternalThrowFromHelper@4>
JIT_WriteBarrierReg_PreGrow TEXTEQU <_JIT_WriteBarrierReg_PreGrow@0>
JIT_WriteBarrierReg_PostGrow TEXTEQU <_JIT_WriteBarrierReg_PostGrow@0>
JIT_TailCall TEXTEQU <_JIT_TailCall@0>
JIT_TailCallLeave TEXTEQU <_JIT_TailCallLeave@0>
JIT_TailCallVSDLeave TEXTEQU <_JIT_TailCallVSDLeave@0>
JIT_TailCallHelper TEXTEQU <_JIT_TailCallHelper@4>
JIT_TailCallReturnFromVSD TEXTEQU <_JIT_TailCallReturnFromVSD@0>
EXTERN g_ephemeral_low:DWORD
EXTERN g_ephemeral_high:DWORD
EXTERN g_lowest_address:DWORD
EXTERN g_highest_address:DWORD
EXTERN g_card_table:DWORD
ifdef _DEBUG
EXTERN WriteBarrierAssert:PROC
endif ; _DEBUG
EXTERN JIT_InternalThrowFromHelper:PROC
ifdef FEATURE_HIJACK
EXTERN JIT_TailCallHelper:PROC
endif
EXTERN _g_TailCallFrameVptr:DWORD
EXTERN @JIT_FailFast@0:PROC
EXTERN _s_gsCookie:DWORD
EXTERN _GetThread@0:PROC
EXTERN @JITutil_IsInstanceOfInterface@8:PROC
EXTERN @JITutil_ChkCastInterface@8:PROC
EXTERN @JITutil_IsInstanceOfAny@8:PROC
EXTERN @JITutil_ChkCastAny@8:PROC
ifdef WRITE_BARRIER_CHECK
; Those global variables are always defined, but should be 0 for Server GC
g_GCShadow TEXTEQU
g_GCShadowEnd TEXTEQU
EXTERN g_GCShadow:DWORD
EXTERN g_GCShadowEnd:DWORD
INVALIDGCVALUE equ 0CCCCCCCDh
endif
EXTERN _COMPlusEndCatch@20:PROC
.686P
.XMM
; The following macro is needed because of a MASM issue with the
; movsd mnemonic
;
$movsd MACRO op1, op2
LOCAL begin_movsd, end_movsd
begin_movsd:
movupd op1, op2
end_movsd:
org begin_movsd
db 0F2h
org end_movsd
ENDM
.586
; The following macro is used to match the JITs
; multi-byte NOP sequence
$nop3 MACRO
db 090h
db 090h
db 090h
ENDM
;***
;JIT_WriteBarrier* - GC write barrier helper
;
;Purpose:
; Helper calls in order to assign an object to a field
; Enables book-keeping of the GC.
;
;Entry:
; EDX - address of ref-field (assigned to)
; the resp. other reg - RHS of assignment
;
;Exit:
;
;Uses:
; EDX is destroyed.
;
;Exceptions:
;
;*******************************************************************************
; The code here is tightly coupled with AdjustContextForWriteBarrier, if you change
; anything here, you might need to change AdjustContextForWriteBarrier as well
WriteBarrierHelper MACRO rg
ALIGN 4
;; The entry point is the fully 'safe' one in which we check if EDX (the REF
;; begin updated) is actually in the GC heap
PUBLIC _JIT_CheckedWriteBarrier&rg&@0
_JIT_CheckedWriteBarrier&rg&@0 PROC
;; check in the REF being updated is in the GC heap
cmp edx, g_lowest_address
jb WriteBarrier_NotInHeap_&rg
cmp edx, g_highest_address
jae WriteBarrier_NotInHeap_&rg
;; fall through to unchecked routine
;; note that its entry point also happens to be aligned
ifdef WRITE_BARRIER_CHECK
;; This entry point is used when you know the REF pointer being updated
;; is in the GC heap
PUBLIC _JIT_DebugWriteBarrier&rg&@0
_JIT_DebugWriteBarrier&rg&@0:
endif
ifdef _DEBUG
push edx
push ecx
push eax
push rg
push edx
call WriteBarrierAssert
pop eax
pop ecx
pop edx
endif ;_DEBUG
; in the !WRITE_BARRIER_CHECK case this will be the move for all
; addresses in the GCHeap, addresses outside the GCHeap will get
; taken care of below at WriteBarrier_NotInHeap_&rg
ifndef WRITE_BARRIER_CHECK
mov DWORD PTR [edx], rg
endif
ifdef WRITE_BARRIER_CHECK
; Test dest here so if it is bad AV would happen before we change register/stack
; status. This makes job of AdjustContextForWriteBarrier easier.
cmp [edx], 0
;; ALSO update the shadow GC heap if that is enabled
; Make ebp into the temporary src register. We need to do this so that we can use ecx
; in the calculation of the shadow GC address, but still have access to the src register
push ecx
push ebp
mov ebp, rg
; if g_GCShadow is 0, don't perform the check
cmp g_GCShadow, 0
je WriteBarrier_NoShadow_&rg
mov ecx, edx
sub ecx, g_lowest_address ; U/V
jb WriteBarrier_NoShadow_&rg
add ecx, [g_GCShadow]
cmp ecx, [g_GCShadowEnd]
ja WriteBarrier_NoShadow_&rg
; TODO: In Orcas timeframe if we move to P4+ only on X86 we should enable
; mfence barriers on either side of these two writes to make sure that
; they stay as close together as possible
; edx contains address in GC
; ecx contains address in ShadowGC
; ebp temporarially becomes the src register
;; When we're writing to the shadow GC heap we want to be careful to minimize
;; the risk of a race that can occur here where the GC and ShadowGC don't match
mov DWORD PTR [edx], ebp
mov DWORD PTR [ecx], ebp
;; We need a scratch register to verify the shadow heap. We also need to
;; construct a memory barrier so that the write to the shadow heap happens
;; before the read from the GC heap. We can do both by using SUB/XCHG
;; rather than PUSH.
;;
;; TODO: Should be changed to a push if the mfence described above is added.
;;
sub esp, 4
xchg [esp], eax
;; As part of our race avoidance (see above) we will now check whether the values
;; in the GC and ShadowGC match. There is a possibility that we're wrong here but
;; being overaggressive means we might mask a case where someone updates GC refs
;; without going to a write barrier, but by its nature it will be indeterminant
;; and we will find real bugs whereas the current implementation is indeterminant
;; but only leads to investigations that find that this code is fundamentally flawed
mov eax, [edx]
cmp [ecx], eax
je WriteBarrier_CleanupShadowCheck_&rg
mov [ecx], INVALIDGCVALUE
WriteBarrier_CleanupShadowCheck_&rg:
pop eax
jmp WriteBarrier_ShadowCheckEnd_&rg
WriteBarrier_NoShadow_&rg:
; If we come here then we haven't written the value to the GC and need to.
; ebp contains rg
; We restore ebp/ecx immediately after this, and if either of them is the src
; register it will regain its value as the src register.
mov DWORD PTR [edx], ebp
WriteBarrier_ShadowCheckEnd_&rg:
pop ebp
pop ecx
endif
cmp rg, g_ephemeral_low
jb WriteBarrier_NotInEphemeral_&rg
cmp rg, g_ephemeral_high
jae WriteBarrier_NotInEphemeral_&rg
shr edx, 10
add edx, [g_card_table]
cmp BYTE PTR [edx], 0FFh
jne WriteBarrier_UpdateCardTable_&rg
ret
WriteBarrier_UpdateCardTable_&rg:
mov BYTE PTR [edx], 0FFh
ret
WriteBarrier_NotInHeap_&rg:
; If it wasn't in the heap then we haven't updated the dst in memory yet
mov DWORD PTR [edx], rg
WriteBarrier_NotInEphemeral_&rg:
; If it is in the GC Heap but isn't in the ephemeral range we've already
; updated the Heap with the Object*.
ret
_JIT_CheckedWriteBarrier&rg&@0 ENDP
ENDM
;***
;JIT_ByRefWriteBarrier* - GC write barrier helper
;
;Purpose:
; Helper calls in order to assign an object to a byref field
; Enables book-keeping of the GC.
;
;Entry:
; EDI - address of ref-field (assigned to)
; ESI - address of the data (source)
; ECX can be trashed
;
;Exit:
;
;Uses:
; EDI and ESI are incremented by a DWORD
;
;Exceptions:
;
;*******************************************************************************
; The code here is tightly coupled with AdjustContextForWriteBarrier, if you change
; anything here, you might need to change AdjustContextForWriteBarrier as well
ByRefWriteBarrierHelper MACRO
ALIGN 4
PUBLIC _JIT_ByRefWriteBarrier@0
_JIT_ByRefWriteBarrier@0 PROC
;;test for dest in range
mov ecx, [esi]
cmp edi, g_lowest_address
jb ByRefWriteBarrier_NotInHeap
cmp edi, g_highest_address
jae ByRefWriteBarrier_NotInHeap
ifndef WRITE_BARRIER_CHECK
;;write barrier
mov [edi],ecx
endif
ifdef WRITE_BARRIER_CHECK
; Test dest here so if it is bad AV would happen before we change register/stack
; status. This makes job of AdjustContextForWriteBarrier easier.
cmp [edi], 0
;; ALSO update the shadow GC heap if that is enabled
; use edx for address in GC Shadow,
push edx
;if g_GCShadow is 0, don't do the update
cmp g_GCShadow, 0
je ByRefWriteBarrier_NoShadow
mov edx, edi
sub edx, g_lowest_address ; U/V
jb ByRefWriteBarrier_NoShadow
add edx, [g_GCShadow]
cmp edx, [g_GCShadowEnd]
ja ByRefWriteBarrier_NoShadow
; TODO: In Orcas timeframe if we move to P4+ only on X86 we should enable
; mfence barriers on either side of these two writes to make sure that
; they stay as close together as possible
; edi contains address in GC
; edx contains address in ShadowGC
; ecx is the value to assign
;; When we're writing to the shadow GC heap we want to be careful to minimize
;; the risk of a race that can occur here where the GC and ShadowGC don't match
mov DWORD PTR [edi], ecx
mov DWORD PTR [edx], ecx
;; We need a scratch register to verify the shadow heap. We also need to
;; construct a memory barrier so that the write to the shadow heap happens
;; before the read from the GC heap. We can do both by using SUB/XCHG
;; rather than PUSH.
;;
;; TODO: Should be changed to a push if the mfence described above is added.
;;
sub esp, 4
xchg [esp], eax
;; As part of our race avoidance (see above) we will now check whether the values
;; in the GC and ShadowGC match. There is a possibility that we're wrong here but
;; being overaggressive means we might mask a case where someone updates GC refs
;; without going to a write barrier, but by its nature it will be indeterminant
;; and we will find real bugs whereas the current implementation is indeterminant
;; but only leads to investigations that find that this code is fundamentally flawed
mov eax, [edi]
cmp [edx], eax
je ByRefWriteBarrier_CleanupShadowCheck
mov [edx], INVALIDGCVALUE
ByRefWriteBarrier_CleanupShadowCheck:
pop eax
jmp ByRefWriteBarrier_ShadowCheckEnd
ByRefWriteBarrier_NoShadow:
; If we come here then we haven't written the value to the GC and need to.
mov DWORD PTR [edi], ecx
ByRefWriteBarrier_ShadowCheckEnd:
pop edx
endif
;;test for *src in ephemeral segement
cmp ecx, g_ephemeral_low
jb ByRefWriteBarrier_NotInEphemeral
cmp ecx, g_ephemeral_high
jae ByRefWriteBarrier_NotInEphemeral
mov ecx, edi
add esi,4
add edi,4
shr ecx, 10
add ecx, [g_card_table]
cmp byte ptr [ecx], 0FFh
jne ByRefWriteBarrier_UpdateCardTable
ret
ByRefWriteBarrier_UpdateCardTable:
mov byte ptr [ecx], 0FFh
ret
ByRefWriteBarrier_NotInHeap:
; If it wasn't in the heap then we haven't updated the dst in memory yet
mov [edi],ecx
ByRefWriteBarrier_NotInEphemeral:
; If it is in the GC Heap but isn't in the ephemeral range we've already
; updated the Heap with the Object*.
add esi,4
add edi,4
ret
_JIT_ByRefWriteBarrier@0 ENDP
ENDM
;*******************************************************************************
; Write barrier wrappers with fcall calling convention
;
UniversalWriteBarrierHelper MACRO name
ALIGN 4
PUBLIC @JIT_&name&@8
@JIT_&name&@8 PROC
mov eax,edx
mov edx,ecx
jmp _JIT_&name&EAX@0
@JIT_&name&@8 ENDP
ENDM
; WriteBarrierStart and WriteBarrierEnd are used to determine bounds of
; WriteBarrier functions so can determine if got AV in them.
;
PUBLIC _JIT_WriteBarrierGroup@0
_JIT_WriteBarrierGroup@0 PROC
ret
_JIT_WriteBarrierGroup@0 ENDP
ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
; Only define these if we're using the ASM GC write barriers; if this flag is not defined,
; we'll use C++ versions of these write barriers.
UniversalWriteBarrierHelper
UniversalWriteBarrierHelper
endif
WriteBarrierHelper
WriteBarrierHelper
WriteBarrierHelper
WriteBarrierHelper
WriteBarrierHelper
WriteBarrierHelper
ByRefWriteBarrierHelper
; This is the first function outside the "keep together range". Used by BBT scripts.
PUBLIC _JIT_WriteBarrierGroup_End@0
_JIT_WriteBarrierGroup_End@0 PROC
ret
_JIT_WriteBarrierGroup_End@0 ENDP
;*********************************************************************/
; In cases where we support it we have an optimized GC Poll callback. Normall (when we're not trying to
; suspend for GC, the CORINFO_HELP_POLL_GC helper points to this nop routine. When we're ready to suspend
; for GC, we whack the Jit Helper table entry to point to the real helper. When we're done with GC we
; whack it back.
PUBLIC @JIT_PollGC_Nop@0
@JIT_PollGC_Nop@0 PROC
ret
@JIT_PollGC_Nop@0 ENDP
;*********************************************************************/
;llshl - long shift left
;
;Purpose:
; Does a Long Shift Left (signed and unsigned are identical)
; Shifts a long left any number of bits.
;
; NOTE: This routine has been adapted from the Microsoft CRTs.
;
;Entry:
; EDX:EAX - long value to be shifted
; ECX - number of bits to shift by
;
;Exit:
; EDX:EAX - shifted value
;
ALIGN 16
PUBLIC JIT_LLsh
JIT_LLsh PROC
; Handle shifts of between bits 0 and 31
cmp ecx, 32
jae short LLshMORE32
shld edx,eax,cl
shl eax,cl
ret
; Handle shifts of between bits 32 and 63
LLshMORE32:
; The x86 shift instructions only use the lower 5 bits.
mov edx,eax
xor eax,eax
shl edx,cl
ret
JIT_LLsh ENDP
;*********************************************************************/
;LRsh - long shift right
;
;Purpose:
; Does a signed Long Shift Right
; Shifts a long right any number of bits.
;
; NOTE: This routine has been adapted from the Microsoft CRTs.
;
;Entry:
; EDX:EAX - long value to be shifted
; ECX - number of bits to shift by
;
;Exit:
; EDX:EAX - shifted value
;
ALIGN 16
PUBLIC JIT_LRsh
JIT_LRsh PROC
; Handle shifts of between bits 0 and 31
cmp ecx, 32
jae short LRshMORE32
shrd eax,edx,cl
sar edx,cl
ret
; Handle shifts of between bits 32 and 63
LRshMORE32:
; The x86 shift instructions only use the lower 5 bits.
mov eax,edx
sar edx, 31
sar eax,cl
ret
JIT_LRsh ENDP
;*********************************************************************/
; LRsz:
;Purpose:
; Does a unsigned Long Shift Right
; Shifts a long right any number of bits.
;
; NOTE: This routine has been adapted from the Microsoft CRTs.
;
;Entry:
; EDX:EAX - long value to be shifted
; ECX - number of bits to shift by
;
;Exit:
; EDX:EAX - shifted value
;
ALIGN 16
PUBLIC JIT_LRsz
JIT_LRsz PROC
; Handle shifts of between bits 0 and 31
cmp ecx, 32
jae short LRszMORE32
shrd eax,edx,cl
shr edx,cl
ret
; Handle shifts of between bits 32 and 63
LRszMORE32:
; The x86 shift instructions only use the lower 5 bits.
mov eax,edx
xor edx,edx
shr eax,cl
ret
JIT_LRsz ENDP
;*********************************************************************/
; LMul:
;Purpose:
; Does a long multiply (same for signed/unsigned)
;
; NOTE: This routine has been adapted from the Microsoft CRTs.
;
;Entry:
; Parameters are passed on the stack:
; 1st pushed: multiplier (QWORD)
; 2nd pushed: multiplicand (QWORD)
;
;Exit:
; EDX:EAX - product of multiplier and multiplicand
;
ALIGN 16
PUBLIC JIT_LMul
JIT_LMul PROC
; AHI, BHI : upper 32 bits of A and B
; ALO, BLO : lower 32 bits of A and B
;
; ALO * BLO
; ALO * BHI
; + BLO * AHI
; ---------------------
mov eax,[esp + 8] ; AHI
mov ecx,[esp + 16] ; BHI
or ecx,eax ;test for both hiwords zero.
mov ecx,[esp + 12] ; BLO
jnz LMul_hard ;both are zero, just mult ALO and BLO
mov eax,[esp + 4]
mul ecx
ret 16 ; callee restores the stack
LMul_hard:
push ebx
mul ecx ;eax has AHI, ecx has BLO, so AHI * BLO
mov ebx,eax ;save result
mov eax,[esp + 8] ; ALO
mul dword ptr [esp + 20] ;ALO * BHI
add ebx,eax ;ebx = ((ALO * BHI) + (AHI * BLO))
mov eax,[esp + 8] ; ALO ;ecx = BLO
mul ecx ;so edx:eax = ALO*BLO
add edx,ebx ;now edx has all the LO*HI stuff
pop ebx
ret 16 ; callee restores the stack
JIT_LMul ENDP
;*********************************************************************/
; JIT_Dbl2LngOvf
;Purpose:
; converts a double to a long truncating toward zero (C semantics)
; with check for overflow
;
; uses stdcall calling conventions
;
PUBLIC JIT_Dbl2LngOvf
JIT_Dbl2LngOvf PROC
fnclex
fld qword ptr [esp+4]
push ecx
push ecx
fstp qword ptr [esp]
call JIT_Dbl2Lng
mov ecx,eax
fnstsw ax
test ax,01h
jnz Dbl2LngOvf_throw
mov eax,ecx
ret 8
Dbl2LngOvf_throw:
mov ECX, CORINFO_OverflowException_ASM
call JIT_InternalThrowFromHelper
ret 8
JIT_Dbl2LngOvf ENDP
;*********************************************************************/
; JIT_Dbl2Lng
;Purpose:
; converts a double to a long truncating toward zero (C semantics)
;
; uses stdcall calling conventions
;
; note that changing the rounding mode is very expensive. This
; routine basiclly does the truncation sematics without changing
; the rounding mode, resulting in a win.
;
PUBLIC JIT_Dbl2Lng
JIT_Dbl2Lng PROC
fld qword ptr[ESP+4] ; fetch arg
lea ecx,[esp-8]
sub esp,16 ; allocate frame
and ecx,-8 ; align pointer on boundary of 8
fld st(0) ; duplciate top of stack
fistp qword ptr[ecx] ; leave arg on stack, also save in temp
fild qword ptr[ecx] ; arg, round(arg) now on stack
mov edx,[ecx+4] ; high dword of integer
mov eax,[ecx] ; low dword of integer
test eax,eax
je integer_QNaN_or_zero
arg_is_not_integer_QNaN:
fsubp st(1),st ; TOS=d-round(d),
; { st(1)=st(1)-st & pop ST }
test edx,edx ; what's sign of integer
jns positive
; number is negative
; dead cycle
; dead cycle
fstp dword ptr[ecx] ; result of subtraction
mov ecx,[ecx] ; dword of difference(single precision)
add esp,16
xor ecx,80000000h
add ecx,7fffffffh ; if difference>0 then increment integer
adc eax,0 ; inc eax (add CARRY flag)
adc edx,0 ; propagate carry flag to upper bits
ret 8
positive:
fstp dword ptr[ecx] ;17-18 ; result of subtraction
mov ecx,[ecx] ; dword of difference (single precision)
add esp,16
add ecx,7fffffffh ; if difference<0 then decrement integer
sbb eax,0 ; dec eax (subtract CARRY flag)
sbb edx,0 ; propagate carry flag to upper bits
ret 8
integer_QNaN_or_zero:
test edx,7fffffffh
jnz arg_is_not_integer_QNaN
fstp st(0) ;; pop round(arg)
fstp st(0) ;; arg
add esp,16
ret 8
JIT_Dbl2Lng ENDP
;*********************************************************************/
; JIT_Dbl2LngP4x87
;Purpose:
; converts a double to a long truncating toward zero (C semantics)
;
; uses stdcall calling conventions
;
; This code is faster on a P4 than the Dbl2Lng code above, but is
; slower on a PIII. Hence we choose this code when on a P4 or above.
;
PUBLIC JIT_Dbl2LngP4x87
JIT_Dbl2LngP4x87 PROC
arg1 equ <[esp+0Ch]>
sub esp, 8 ; get some local space
fld qword ptr arg1 ; fetch arg
fnstcw word ptr arg1 ; store FPCW
movzx eax, word ptr arg1 ; zero extend - wide
or ah, 0Ch ; turn on OE and DE flags
mov dword ptr [esp], eax ; store new FPCW bits
fldcw word ptr [esp] ; reload FPCW with new bits
fistp qword ptr [esp] ; convert
mov eax, dword ptr [esp] ; reload FP result
mov edx, dword ptr [esp+4] ;
fldcw word ptr arg1 ; reload original FPCW value
add esp, 8 ; restore stack
ret 8
JIT_Dbl2LngP4x87 ENDP
;*********************************************************************/
; JIT_Dbl2LngSSE3
;Purpose:
; converts a double to a long truncating toward zero (C semantics)
;
; uses stdcall calling conventions
;
; This code is faster than the above P4 x87 code for Intel processors
; equal or later than Core2 and Atom that have SSE3 support
;
.686P
.XMM
PUBLIC JIT_Dbl2LngSSE3
JIT_Dbl2LngSSE3 PROC
arg1 equ <[esp+0Ch]>
sub esp, 8 ; get some local space
fld qword ptr arg1 ; fetch arg
fisttp qword ptr [esp] ; convert
mov eax, dword ptr [esp] ; reload FP result
mov edx, dword ptr [esp+4]
add esp, 8 ; restore stack
ret 8
JIT_Dbl2LngSSE3 ENDP
.586
;*********************************************************************/
; JIT_Dbl2IntSSE2
;Purpose:
; converts a double to a long truncating toward zero (C semantics)
;
; uses stdcall calling conventions
;
; This code is even faster than the P4 x87 code for Dbl2LongP4x87,
; but only returns a 32 bit value (only good for int).
;
.686P
.XMM
PUBLIC JIT_Dbl2IntSSE2
JIT_Dbl2IntSSE2 PROC
$movsd xmm0, [esp+4]
cvttsd2si eax, xmm0
ret 8
JIT_Dbl2IntSSE2 ENDP
.586
;*********************************************************************/
; This is the small write barrier thunk we use when we know the
; ephemeral generation is higher in memory than older generations.
; The 0x0F0F0F0F values are bashed by the two functions above.
; This the generic version - wherever the code says ECX,
; the specific register is patched later into a copy
; Note: do not replace ECX by EAX - there is a smaller encoding for
; the compares just for EAX, which won't work for other registers.
;
; READ THIS!!!!!!
; it is imperative that the addresses of of the values that we overwrite
; (card table, ephemeral region ranges, etc) are naturally aligned since
; there are codepaths that will overwrite these values while the EE is running.
;
PUBLIC JIT_WriteBarrierReg_PreGrow
JIT_WriteBarrierReg_PreGrow PROC
mov DWORD PTR [edx], ecx
cmp ecx, 0F0F0F0F0h
jb NoWriteBarrierPre
shr edx, 10
nop ; padding for alignment of constant
cmp byte ptr [edx+0F0F0F0F0h], 0FFh
jne WriteBarrierPre
NoWriteBarrierPre:
ret
nop ; padding for alignment of constant
nop ; padding for alignment of constant
WriteBarrierPre:
mov byte ptr [edx+0F0F0F0F0h], 0FFh
ret
JIT_WriteBarrierReg_PreGrow ENDP
;*********************************************************************/
; This is the larger write barrier thunk we use when we know that older
; generations may be higher in memory than the ephemeral generation
; The 0x0F0F0F0F values are bashed by the two functions above.
; This the generic version - wherever the code says ECX,
; the specific register is patched later into a copy
; Note: do not replace ECX by EAX - there is a smaller encoding for
; the compares just for EAX, which won't work for other registers.
; NOTE: we need this aligned for our validation to work properly
ALIGN 4
PUBLIC JIT_WriteBarrierReg_PostGrow
JIT_WriteBarrierReg_PostGrow PROC
mov DWORD PTR [edx], ecx
cmp ecx, 0F0F0F0F0h
jb NoWriteBarrierPost
cmp ecx, 0F0F0F0F0h
jae NoWriteBarrierPost
shr edx, 10
nop ; padding for alignment of constant
cmp byte ptr [edx+0F0F0F0F0h], 0FFh
jne WriteBarrierPost
NoWriteBarrierPost:
ret
nop ; padding for alignment of constant
nop ; padding for alignment of constant
WriteBarrierPost:
mov byte ptr [edx+0F0F0F0F0h], 0FFh
ret
JIT_WriteBarrierReg_PostGrow ENDP
;*********************************************************************/
;
; a fake virtual stub dispatch register indirect callsite
$nop3
call dword ptr [eax]
PUBLIC JIT_TailCallReturnFromVSD
JIT_TailCallReturnFromVSD:
ifdef _DEBUG
nop ; blessed callsite
endif
call VSDHelperLabel ; keep call-ret count balanced.
VSDHelperLabel:
; Stack at this point :
; ...
; m_ReturnAddress
; m_regs
; m_CallerAddress
; m_pThread
; vtbl
; GSCookie
; &VSDHelperLabel
OffsetOfTailCallFrame = 8
; ebx = pThread
ifdef _DEBUG
mov esi, _s_gsCookie ; GetProcessGSCookie()
cmp dword ptr [esp+OffsetOfTailCallFrame-SIZEOF_GSCookie], esi
je TailCallFrameGSCookieIsValid
call @JIT_FailFast@0
TailCallFrameGSCookieIsValid:
endif
; remove the padding frame from the chain
mov esi, dword ptr [esp+OffsetOfTailCallFrame+4] ; esi = TailCallFrame::m_Next
mov dword ptr [ebx + Thread_m_pFrame], esi
; skip the frame
add esp, 20 ; &VSDHelperLabel, GSCookie, vtbl, m_Next, m_CallerAddress
pop edi ; restore callee saved registers
pop esi
pop ebx
pop ebp
ret ; return to m_ReturnAddress
;------------------------------------------------------------------------------
;
PUBLIC JIT_TailCall
JIT_TailCall PROC
; the stack layout at this point is:
;
; ebp+8+4*nOldStackArgs <- end of argument destination
; ... ...
; ebp+8+ old args (size is nOldStackArgs)
; ... ...
; ebp+8 <- start of argument destination
; ebp+4 ret addr
; ebp+0 saved ebp
; ebp-c saved ebx, esi, edi (if have callee saved regs = 1)
;
; other stuff (local vars) in the jitted callers' frame
;
; esp+20+4*nNewStackArgs <- end of argument source
; ... ...
; esp+20+ new args (size is nNewStackArgs) to be passed to the target of the tail-call
; ... ...
; esp+20 <- start of argument source
; esp+16 nOldStackArgs
; esp+12 nNewStackArgs
; esp+8 flags (1 = have callee saved regs, 2 = virtual stub dispatch)
; esp+4 target addr
; esp+0 retaddr
;
; If you change this function, make sure you update code:TailCallStubManager as well.
RetAddr equ 0
TargetAddr equ 4
nNewStackArgs equ 12
nOldStackArgs equ 16
NewArgs equ 20
; extra space is incremented as we push things on the stack along the way
ExtraSpace = 0
push 0 ; Thread*
; save ArgumentRegisters
push ecx
push edx
call _GetThread@0; eax = Thread*
mov [esp + 8], eax
ExtraSpace = 12 ; pThread, ecx, edx
ifdef FEATURE_HIJACK
; Make sure that the EE does have the return address patched. So we can move it around.
test dword ptr [eax+Thread_m_State], TS_Hijacked_ASM
jz NoHijack
; JIT_TailCallHelper(Thread *)
push eax
call JIT_TailCallHelper ; this is __stdcall
NoHijack:
endif
mov edx, dword ptr [esp+ExtraSpace+JIT_TailCall_StackOffsetToFlags] ; edx = flags
mov eax, dword ptr [esp+ExtraSpace+nOldStackArgs] ; eax = nOldStackArgs
mov ecx, dword ptr [esp+ExtraSpace+nNewStackArgs] ; ecx = nNewStackArgs
; restore callee saved registers
; @TODO : esp based - doesnt work with localloc
test edx, 1
jz NoCalleeSaveRegisters
mov edi, dword ptr [ebp-4] ; restore edi
mov esi, dword ptr [ebp-8] ; restore esi
mov ebx, dword ptr [ebp-12] ; restore ebx
NoCalleeSaveRegisters:
push dword ptr [ebp+4] ; save the original return address for later
push edi
push esi
ExtraSpace = 24 ; pThread, ecx, edx, orig retaddr, edi, esi
CallersEsi = 0
CallersEdi = 4
OrigRetAddr = 8
pThread = 20
lea edi, [ebp+8+4*eax] ; edi = the end of argument destination
lea esi, [esp+ExtraSpace+NewArgs+4*ecx] ; esi = the end of argument source
mov ebp, dword ptr [ebp] ; restore ebp (do not use ebp as scratch register to get a good stack trace in debugger)
test edx, 2
jnz VSDTailCall
; copy the arguments to the final destination
test ecx, ecx
jz ArgumentsCopied
ArgumentCopyLoop:
; At this point, this is the value of the registers :
; edi = end of argument dest
; esi = end of argument source
; ecx = nNewStackArgs
mov eax, dword ptr [esi-4]
sub edi, 4
sub esi, 4
mov dword ptr [edi], eax
dec ecx
jnz ArgumentCopyLoop
ArgumentsCopied:
; edi = the start of argument destination
mov eax, dword ptr [esp+4+4] ; return address
mov ecx, dword ptr [esp+ExtraSpace+TargetAddr] ; target address
mov dword ptr [edi-4], eax ; return address
mov dword ptr [edi-8], ecx ; target address
lea eax, [edi-8] ; new value for esp
pop esi
pop edi
pop ecx ; skip original return address
pop edx
pop ecx
mov esp, eax
PUBLIC JIT_TailCallLeave ; add a label here so that TailCallStubManager can access it
JIT_TailCallLeave:
retn ; Will branch to targetAddr. This matches the
; "call" done by JITted code, keeping the
; call-ret count balanced.
;----------------------------------------------------------------------
VSDTailCall:
;----------------------------------------------------------------------
; For the Virtual Stub Dispatch, we create a fake callsite to fool
; the callsite probes. In order to create the call site, we need to insert TailCallFrame
; if we do not have one already.
;
; ecx = nNewStackArgs
; esi = the end of argument source
; edi = the end of argument destination
;
; The stub has pushed the following onto the stack at this point :
; pThread, ecx, edx, orig retaddr, edi, esi
cmp dword ptr [esp+OrigRetAddr], JIT_TailCallReturnFromVSD
jz VSDTailCallFrameInserted_DoSlideUpArgs ; There is an exiting TailCallFrame that can be reused
; try to allocate space for the frame / check whether there is enough space
; If there is sufficient space, we will setup the frame and then slide
; the arguments up the stack. Else, we first need to slide the arguments
; down the stack to make space for the TailCallFrame
sub edi, (SIZEOF_GSCookie + SIZEOF_TailCallFrame)
cmp edi, esi
jae VSDSpaceForFrameChecked
; There is not sufficient space to wedge in the TailCallFrame without
; overwriting the new arguments.
; We need to allocate the extra space on the stack,
; and slide down the new arguments
mov eax, esi
sub eax, edi
sub esp, eax
mov eax, ecx ; to subtract the size of arguments
mov edx, ecx ; for counter
neg eax
; copy down the arguments to the final destination, need to copy all temporary storage as well
add edx, (ExtraSpace+NewArgs)/4
lea esi, [esi+4*eax-(ExtraSpace+NewArgs)]
lea edi, [edi+4*eax-(ExtraSpace+NewArgs)]
VSDAllocFrameCopyLoop:
mov eax, dword ptr [esi]
mov dword ptr [edi], eax
add esi, 4
add edi, 4
dec edx
jnz VSDAllocFrameCopyLoop
; the argument source and destination are same now
mov esi, edi
VSDSpaceForFrameChecked:
; At this point, we have enough space on the stack for the TailCallFrame,
; and we may already have slided down the arguments
mov eax, _s_gsCookie ; GetProcessGSCookie()
mov dword ptr [edi], eax ; set GSCookie
mov eax, _g_TailCallFrameVptr ; vptr
mov edx, dword ptr [esp+OrigRetAddr] ; orig return address
mov dword ptr [edi+SIZEOF_GSCookie], eax ; TailCallFrame::vptr
mov dword ptr [edi+SIZEOF_GSCookie+28], edx ; TailCallFrame::m_ReturnAddress
mov eax, dword ptr [esp+CallersEdi] ; restored edi
mov edx, dword ptr [esp+CallersEsi] ; restored esi
mov dword ptr [edi+SIZEOF_GSCookie+12], eax ; TailCallFrame::m_regs::edi
mov dword ptr [edi+SIZEOF_GSCookie+16], edx ; TailCallFrame::m_regs::esi
mov dword ptr [edi+SIZEOF_GSCookie+20], ebx ; TailCallFrame::m_regs::ebx
mov dword ptr [edi+SIZEOF_GSCookie+24], ebp ; TailCallFrame::m_regs::ebp
mov ebx, dword ptr [esp+pThread] ; ebx = pThread
mov eax, dword ptr [ebx+Thread_m_pFrame]
lea edx, [edi+SIZEOF_GSCookie]
mov dword ptr [edi+SIZEOF_GSCookie+4], eax ; TailCallFrame::m_pNext
mov dword ptr [ebx+Thread_m_pFrame], edx ; hook the new frame into the chain
; setup ebp chain
lea ebp, [edi+SIZEOF_GSCookie+24] ; TailCallFrame::m_regs::ebp
; Do not copy arguments again if they are in place already
; Otherwise, we will need to slide the new arguments up the stack
cmp esi, edi
jne VSDTailCallFrameInserted_DoSlideUpArgs
; At this point, we must have already previously slided down the new arguments,
; or the TailCallFrame is a perfect fit
; set the caller address
mov edx, dword ptr [esp+ExtraSpace+RetAddr] ; caller address
mov dword ptr [edi+SIZEOF_GSCookie+8], edx ; TailCallFrame::m_CallerAddress
; adjust edi as it would by copying
neg ecx
lea edi, [edi+4*ecx]
jmp VSDArgumentsCopied
VSDTailCallFrameInserted_DoSlideUpArgs:
; set the caller address
mov edx, dword ptr [esp+ExtraSpace+RetAddr] ; caller address
mov dword ptr [edi+SIZEOF_GSCookie+8], edx ; TailCallFrame::m_CallerAddress
; copy the arguments to the final destination
test ecx, ecx
jz VSDArgumentsCopied
VSDArgumentCopyLoop:
mov eax, dword ptr [esi-4]
sub edi, 4
sub esi, 4
mov dword ptr [edi], eax
dec ecx
jnz VSDArgumentCopyLoop
VSDArgumentsCopied:
; edi = the start of argument destination
mov ecx, dword ptr [esp+ExtraSpace+TargetAddr] ; target address
mov dword ptr [edi-4], JIT_TailCallReturnFromVSD ; return address
mov dword ptr [edi-12], ecx ; address of indirection cell
mov ecx, [ecx]
mov dword ptr [edi-8], ecx ; target address
; skip original return address and saved esi, edi
add esp, 12
pop edx
pop ecx
lea esp, [edi-12] ; new value for esp
pop eax
PUBLIC JIT_TailCallVSDLeave ; add a label here so that TailCallStubManager can access it
JIT_TailCallVSDLeave:
retn ; Will branch to targetAddr. This matches the
; "call" done by JITted code, keeping the
; call-ret count balanced.
JIT_TailCall ENDP
;------------------------------------------------------------------------------
; HCIMPL2_VV(float, JIT_FltRem, float dividend, float divisor)
@JIT_FltRem@8 proc public
fld dword ptr [esp+4] ; divisor
fld dword ptr [esp+8] ; dividend
fremloop:
fprem
fstsw ax
fwait
sahf
jp fremloop ; Continue while the FPU status bit C2 is set
fxch ; swap, so divisor is on top and result is in st(1)
fstp ST(0) ; Pop the divisor from the FP stack
retn 8 ; Return value is in st(0)
@JIT_FltRem@8 endp
; HCIMPL2_VV(float, JIT_DblRem, float dividend, float divisor)
@JIT_DblRem@16 proc public
fld qword ptr [esp+4] ; divisor
fld qword ptr [esp+12] ; dividend
fremloopd:
fprem
fstsw ax
fwait
sahf
jp fremloopd ; Continue while the FPU status bit C2 is set
fxch ; swap, so divisor is on top and result is in st(1)
fstp ST(0) ; Pop the divisor from the FP stack
retn 16 ; Return value is in st(0)
@JIT_DblRem@16 endp
;------------------------------------------------------------------------------
; PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code.
;
_JIT_PatchedCodeStart@0 proc public
ret
_JIT_PatchedCodeStart@0 endp
ALIGN 4
;**********************************************************************
; Write barriers generated at runtime
PUBLIC _JIT_PatchedWriteBarrierGroup@0
_JIT_PatchedWriteBarrierGroup@0 PROC
ret
_JIT_PatchedWriteBarrierGroup@0 ENDP
PatchedWriteBarrierHelper MACRO rg
ALIGN 8
PUBLIC _JIT_WriteBarrier&rg&@0
_JIT_WriteBarrier&rg&@0 PROC
; Just allocate space that will be filled in at runtime
db (48) DUP (0CCh)
_JIT_WriteBarrier&rg&@0 ENDP
ENDM
PatchedWriteBarrierHelper
PatchedWriteBarrierHelper
PatchedWriteBarrierHelper
PatchedWriteBarrierHelper
PatchedWriteBarrierHelper
PatchedWriteBarrierHelper
PUBLIC _JIT_PatchedWriteBarrierGroup_End@0
_JIT_PatchedWriteBarrierGroup_End@0 PROC
ret
_JIT_PatchedWriteBarrierGroup_End@0 ENDP
_JIT_PatchedCodeLast@0 proc public
ret
_JIT_PatchedCodeLast@0 endp
; This is the first function outside the "keep together range". Used by BBT scripts.
_JIT_PatchedCodeEnd@0 proc public
ret
_JIT_PatchedCodeEnd@0 endp
; This is the ASM portion of JIT_IsInstanceOfInterface. For all the bizarre cases, it quickly
; fails and falls back on the JITutil_IsInstanceOfAny helper. So all failure cases take
; the slow path, too.
;
; ARGUMENT_REG1 = array or interface to check for.
; ARGUMENT_REG2 = instance to be cast.
ALIGN 16
PUBLIC @JIT_IsInstanceOfInterface@8
@JIT_IsInstanceOfInterface@8 PROC
test ARGUMENT_REG2, ARGUMENT_REG2
jz IsNullInst
mov eax, [ARGUMENT_REG2] ; get MethodTable
push ebx
push esi
movzx ebx, word ptr [eax+MethodTable_m_wNumInterfaces]
; check if this MT implements any interfaces
test ebx, ebx
jz IsInstanceOfInterfaceDoBizarre
; move Interface map ptr into eax
mov eax, [eax+MethodTable_m_pInterfaceMap]
IsInstanceOfInterfaceTop:
; eax -> current InterfaceInfo_t entry in interface map list
ifdef FEATURE_PREJIT
mov esi, [eax]
test esi, 1
; Move the deference out of line so that this jump is correctly predicted for the case
; when there is no indirection
jnz IsInstanceOfInterfaceIndir
cmp ARGUMENT_REG1, esi
else
cmp ARGUMENT_REG1, [eax]
endif
je IsInstanceOfInterfaceFound
IsInstanceOfInterfaceNext:
add eax, SIZEOF_InterfaceInfo_t
dec ebx
jnz IsInstanceOfInterfaceTop
; fall through to DoBizarre
IsInstanceOfInterfaceDoBizarre:
pop esi
pop ebx
mov eax, [ARGUMENT_REG2] ; get MethodTable
test dword ptr [eax+MethodTable_m_dwFlags], NonTrivialInterfaceCastFlags
jnz IsInstanceOfInterfaceNonTrivialCast
IsNullInst:
xor eax,eax
ret
ifdef FEATURE_PREJIT
IsInstanceOfInterfaceIndir:
cmp ARGUMENT_REG1,[esi-1]
jne IsInstanceOfInterfaceNext
endif
IsInstanceOfInterfaceFound:
pop esi
pop ebx
mov eax, ARGUMENT_REG2 ; the successful instance
ret
IsInstanceOfInterfaceNonTrivialCast:
jmp @JITutil_IsInstanceOfInterface@8
@JIT_IsInstanceOfInterface@8 endp
; This is the ASM portion of JIT_ChkCastInterface. For all the bizarre cases, it quickly
; fails and falls back on the JITutil_ChkCastAny helper. So all failure cases take
; the slow path, too.
;
; ARGUMENT_REG1 = array or interface to check for.
; ARGUMENT_REG2 = instance to be cast.
ALIGN 16
PUBLIC @JIT_ChkCastInterface@8
@JIT_ChkCastInterface@8 PROC
test ARGUMENT_REG2, ARGUMENT_REG2
jz ChkCastInterfaceIsNullInst
mov eax, [ARGUMENT_REG2] ; get MethodTable
push ebx
push esi
movzx ebx, word ptr [eax+MethodTable_m_wNumInterfaces]
; speculatively move Interface map ptr into eax
mov eax, [eax+MethodTable_m_pInterfaceMap]
; check if this MT implements any interfaces
test ebx, ebx
jz ChkCastInterfaceDoBizarre
ChkCastInterfaceTop:
; eax -> current InterfaceInfo_t entry in interface map list
ifdef FEATURE_PREJIT
mov esi, [eax]
test esi, 1
; Move the deference out of line so that this jump is correctly predicted for the case
; when there is no indirection
jnz ChkCastInterfaceIndir
cmp ARGUMENT_REG1, esi
else
cmp ARGUMENT_REG1, [eax]
endif
je ChkCastInterfaceFound
ChkCastInterfaceNext:
add eax, SIZEOF_InterfaceInfo_t
dec ebx
jnz ChkCastInterfaceTop
; fall through to DoBizarre
ChkCastInterfaceDoBizarre:
pop esi
pop ebx
jmp @JITutil_ChkCastInterface@8
ifdef FEATURE_PREJIT
ChkCastInterfaceIndir:
cmp ARGUMENT_REG1,[esi-1]
jne ChkCastInterfaceNext
endif
ChkCastInterfaceFound:
pop esi
pop ebx
ChkCastInterfaceIsNullInst:
mov eax, ARGUMENT_REG2 ; either null, or the successful instance
ret
@JIT_ChkCastInterface@8 endp
; Note that the debugger skips this entirely when doing SetIP,
; since COMPlusCheckForAbort should always return 0. Excep.cpp:LeaveCatch
; asserts that to be true. If this ends up doing more work, then the
; debugger may need additional support.
; void __stdcall JIT_EndCatch();
JIT_EndCatch PROC stdcall public
; make temp storage for return address, and push the address of that
; as the last arg to COMPlusEndCatch
mov ecx, [esp]
push ecx;
push esp;
; push the rest of COMPlusEndCatch's args, right-to-left
push esi
push edi
push ebx
push ebp
call _COMPlusEndCatch@20 ; returns old esp value in eax, stores jump address
; now eax = new esp, [esp] = new eip
pop edx ; edx = new eip
mov esp, eax ; esp = new esp
jmp edx ; eip = new eip
JIT_EndCatch ENDP
end