diff options
Diffstat (limited to 'src/vm/i386/jithelp.asm')
-rw-r--r-- | src/vm/i386/jithelp.asm | 2574 |
1 files changed, 2574 insertions, 0 deletions
diff --git a/src/vm/i386/jithelp.asm b/src/vm/i386/jithelp.asm new file mode 100644 index 0000000000..ac767287ee --- /dev/null +++ b/src/vm/i386/jithelp.asm @@ -0,0 +1,2574 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. +; See the LICENSE file in the project root for more information. + +; ==++== +; + +; +; ==--== +; *********************************************************************** +; File: JIThelp.asm +; +; *********************************************************************** +; +; *** NOTE: If you make changes to this file, propagate the changes to +; jithelp.s in this directory + +; This contains JITinterface routines that are 100% x86 assembly + + .586 + .model flat + + include asmconstants.inc + + option casemap:none + .code +; +; <TODO>@TODO Switch to g_ephemeral_low and g_ephemeral_high +; @TODO instead of g_lowest_address, g_highest address</TODO> +; + +ARGUMENT_REG1 equ ecx +ARGUMENT_REG2 equ edx +g_ephemeral_low TEXTEQU <_g_ephemeral_low> +g_ephemeral_high TEXTEQU <_g_ephemeral_high> +g_lowest_address TEXTEQU <_g_lowest_address> +g_highest_address TEXTEQU <_g_highest_address> +g_card_table TEXTEQU <_g_card_table> +WriteBarrierAssert TEXTEQU <_WriteBarrierAssert@8> +JIT_LLsh TEXTEQU <_JIT_LLsh@0> +JIT_LRsh TEXTEQU <_JIT_LRsh@0> +JIT_LRsz TEXTEQU <_JIT_LRsz@0> +JIT_LMul TEXTEQU <@JIT_LMul@16> +JIT_Dbl2LngOvf TEXTEQU <@JIT_Dbl2LngOvf@8> +JIT_Dbl2Lng TEXTEQU <@JIT_Dbl2Lng@8> +JIT_Dbl2IntSSE2 TEXTEQU <@JIT_Dbl2IntSSE2@8> +JIT_Dbl2LngP4x87 TEXTEQU <@JIT_Dbl2LngP4x87@8> +JIT_Dbl2LngSSE3 TEXTEQU <@JIT_Dbl2LngSSE3@8> +JIT_InternalThrowFromHelper TEXTEQU <@JIT_InternalThrowFromHelper@4> +JIT_WriteBarrierReg_PreGrow TEXTEQU <_JIT_WriteBarrierReg_PreGrow@0> +JIT_WriteBarrierReg_PostGrow TEXTEQU <_JIT_WriteBarrierReg_PostGrow@0> +JIT_TailCall TEXTEQU <_JIT_TailCall@0> +JIT_TailCallLeave TEXTEQU <_JIT_TailCallLeave@0> +JIT_TailCallVSDLeave TEXTEQU <_JIT_TailCallVSDLeave@0> +JIT_TailCallHelper TEXTEQU <_JIT_TailCallHelper@4> +JIT_TailCallReturnFromVSD TEXTEQU <_JIT_TailCallReturnFromVSD@0> + +EXTERN g_ephemeral_low:DWORD +EXTERN g_ephemeral_high:DWORD +EXTERN g_lowest_address:DWORD +EXTERN g_highest_address:DWORD +EXTERN g_card_table:DWORD +ifdef _DEBUG +EXTERN WriteBarrierAssert:PROC +endif ; _DEBUG +EXTERN JIT_InternalThrowFromHelper:PROC +ifdef FEATURE_HIJACK +EXTERN JIT_TailCallHelper:PROC +endif +EXTERN _g_TailCallFrameVptr:DWORD +EXTERN @JIT_FailFast@0:PROC +EXTERN _s_gsCookie:DWORD +EXTERN @JITutil_IsInstanceOfInterface@8:PROC +EXTERN @JITutil_ChkCastInterface@8:PROC +EXTERN @JITutil_IsInstanceOfAny@8:PROC +EXTERN @JITutil_ChkCastAny@8:PROC +ifdef FEATURE_IMPLICIT_TLS +EXTERN _GetThread@0:PROC +endif + +ifdef WRITE_BARRIER_CHECK +; Those global variables are always defined, but should be 0 for Server GC +g_GCShadow TEXTEQU <?g_GCShadow@@3PAEA> +g_GCShadowEnd TEXTEQU <?g_GCShadowEnd@@3PAEA> +EXTERN g_GCShadow:DWORD +EXTERN g_GCShadowEnd:DWORD +INVALIDGCVALUE equ 0CCCCCCCDh +endif + +ifdef FEATURE_REMOTING +EXTERN _TransparentProxyStub_CrossContext@0:PROC +EXTERN _InContextTPQuickDispatchAsmStub@0:PROC +endif + +.686P +.XMM +; The following macro is needed because of a MASM issue with the +; movsd mnemonic +; +$movsd MACRO op1, op2 + LOCAL begin_movsd, end_movsd +begin_movsd: + movupd op1, op2 +end_movsd: + org begin_movsd + db 0F2h + org end_movsd +ENDM +.586 + +; The following macro is used to match the JITs +; multi-byte NOP sequence +$nop3 MACRO + db 090h + db 090h + db 090h +ENDM + + + +;*** +;JIT_WriteBarrier* - GC write barrier helper +; +;Purpose: +; Helper calls in order to assign an object to a field +; Enables book-keeping of the GC. +; +;Entry: +; EDX - address of ref-field (assigned to) +; the resp. other reg - RHS of assignment +; +;Exit: +; +;Uses: +; EDX is destroyed. +; +;Exceptions: +; +;******************************************************************************* + +; The code here is tightly coupled with AdjustContextForWriteBarrier, if you change +; anything here, you might need to change AdjustContextForWriteBarrier as well +WriteBarrierHelper MACRO rg + ALIGN 4 + + ;; The entry point is the fully 'safe' one in which we check if EDX (the REF + ;; begin updated) is actually in the GC heap + +PUBLIC _JIT_CheckedWriteBarrier&rg&@0 +_JIT_CheckedWriteBarrier&rg&@0 PROC + ;; check in the REF being updated is in the GC heap + cmp edx, g_lowest_address + jb WriteBarrier_NotInHeap_&rg + cmp edx, g_highest_address + jae WriteBarrier_NotInHeap_&rg + + ;; fall through to unchecked routine + ;; note that its entry point also happens to be aligned + +ifdef WRITE_BARRIER_CHECK + ;; This entry point is used when you know the REF pointer being updated + ;; is in the GC heap +PUBLIC _JIT_DebugWriteBarrier&rg&@0 +_JIT_DebugWriteBarrier&rg&@0: +endif + +ifdef _DEBUG + push edx + push ecx + push eax + + push rg + push edx + call WriteBarrierAssert + + pop eax + pop ecx + pop edx +endif ;_DEBUG + + ; in the !WRITE_BARRIER_CHECK case this will be the move for all + ; addresses in the GCHeap, addresses outside the GCHeap will get + ; taken care of below at WriteBarrier_NotInHeap_&rg + +ifndef WRITE_BARRIER_CHECK + mov DWORD PTR [edx], rg +endif + +ifdef WRITE_BARRIER_CHECK + ; Test dest here so if it is bad AV would happen before we change register/stack + ; status. This makes job of AdjustContextForWriteBarrier easier. + cmp [edx], 0 + ;; ALSO update the shadow GC heap if that is enabled + ; Make ebp into the temporary src register. We need to do this so that we can use ecx + ; in the calculation of the shadow GC address, but still have access to the src register + push ecx + push ebp + mov ebp, rg + + ; if g_GCShadow is 0, don't perform the check + cmp g_GCShadow, 0 + je WriteBarrier_NoShadow_&rg + + mov ecx, edx + sub ecx, g_lowest_address ; U/V + jb WriteBarrier_NoShadow_&rg + add ecx, [g_GCShadow] + cmp ecx, [g_GCShadowEnd] + ja WriteBarrier_NoShadow_&rg + + ; TODO: In Orcas timeframe if we move to P4+ only on X86 we should enable + ; mfence barriers on either side of these two writes to make sure that + ; they stay as close together as possible + + ; edx contains address in GC + ; ecx contains address in ShadowGC + ; ebp temporarially becomes the src register + + ;; When we're writing to the shadow GC heap we want to be careful to minimize + ;; the risk of a race that can occur here where the GC and ShadowGC don't match + mov DWORD PTR [edx], ebp + mov DWORD PTR [ecx], ebp + + ;; We need a scratch register to verify the shadow heap. We also need to + ;; construct a memory barrier so that the write to the shadow heap happens + ;; before the read from the GC heap. We can do both by using SUB/XCHG + ;; rather than PUSH. + ;; + ;; TODO: Should be changed to a push if the mfence described above is added. + ;; + sub esp, 4 + xchg [esp], eax + + ;; As part of our race avoidance (see above) we will now check whether the values + ;; in the GC and ShadowGC match. There is a possibility that we're wrong here but + ;; being overaggressive means we might mask a case where someone updates GC refs + ;; without going to a write barrier, but by its nature it will be indeterminant + ;; and we will find real bugs whereas the current implementation is indeterminant + ;; but only leads to investigations that find that this code is fundamentally flawed + mov eax, [edx] + cmp [ecx], eax + je WriteBarrier_CleanupShadowCheck_&rg + mov [ecx], INVALIDGCVALUE + +WriteBarrier_CleanupShadowCheck_&rg: + pop eax + + jmp WriteBarrier_ShadowCheckEnd_&rg + +WriteBarrier_NoShadow_&rg: + ; If we come here then we haven't written the value to the GC and need to. + ; ebp contains rg + ; We restore ebp/ecx immediately after this, and if either of them is the src + ; register it will regain its value as the src register. + mov DWORD PTR [edx], ebp +WriteBarrier_ShadowCheckEnd_&rg: + pop ebp + pop ecx +endif + cmp rg, g_ephemeral_low + jb WriteBarrier_NotInEphemeral_&rg + cmp rg, g_ephemeral_high + jae WriteBarrier_NotInEphemeral_&rg + + shr edx, 10 + add edx, [g_card_table] + cmp BYTE PTR [edx], 0FFh + jne WriteBarrier_UpdateCardTable_&rg + ret + +WriteBarrier_UpdateCardTable_&rg: + mov BYTE PTR [edx], 0FFh + ret + +WriteBarrier_NotInHeap_&rg: + ; If it wasn't in the heap then we haven't updated the dst in memory yet + mov DWORD PTR [edx], rg +WriteBarrier_NotInEphemeral_&rg: + ; If it is in the GC Heap but isn't in the ephemeral range we've already + ; updated the Heap with the Object*. + ret +_JIT_CheckedWriteBarrier&rg&@0 ENDP + +ENDM + + +;*** +;JIT_ByRefWriteBarrier* - GC write barrier helper +; +;Purpose: +; Helper calls in order to assign an object to a byref field +; Enables book-keeping of the GC. +; +;Entry: +; EDI - address of ref-field (assigned to) +; ESI - address of the data (source) +; ECX can be trashed +; +;Exit: +; +;Uses: +; EDI and ESI are incremented by a DWORD +; +;Exceptions: +; +;******************************************************************************* + +; The code here is tightly coupled with AdjustContextForWriteBarrier, if you change +; anything here, you might need to change AdjustContextForWriteBarrier as well + +ByRefWriteBarrierHelper MACRO + ALIGN 4 +PUBLIC _JIT_ByRefWriteBarrier@0 +_JIT_ByRefWriteBarrier@0 PROC + ;;test for dest in range + mov ecx, [esi] + cmp edi, g_lowest_address + jb ByRefWriteBarrier_NotInHeap + cmp edi, g_highest_address + jae ByRefWriteBarrier_NotInHeap + +ifndef WRITE_BARRIER_CHECK + ;;write barrier + mov [edi],ecx +endif + +ifdef WRITE_BARRIER_CHECK + ; Test dest here so if it is bad AV would happen before we change register/stack + ; status. This makes job of AdjustContextForWriteBarrier easier. + cmp [edi], 0 + + ;; ALSO update the shadow GC heap if that is enabled + + ; use edx for address in GC Shadow, + push edx + + ;if g_GCShadow is 0, don't do the update + cmp g_GCShadow, 0 + je ByRefWriteBarrier_NoShadow + + mov edx, edi + sub edx, g_lowest_address ; U/V + jb ByRefWriteBarrier_NoShadow + add edx, [g_GCShadow] + cmp edx, [g_GCShadowEnd] + ja ByRefWriteBarrier_NoShadow + + ; TODO: In Orcas timeframe if we move to P4+ only on X86 we should enable + ; mfence barriers on either side of these two writes to make sure that + ; they stay as close together as possible + + ; edi contains address in GC + ; edx contains address in ShadowGC + ; ecx is the value to assign + + ;; When we're writing to the shadow GC heap we want to be careful to minimize + ;; the risk of a race that can occur here where the GC and ShadowGC don't match + mov DWORD PTR [edi], ecx + mov DWORD PTR [edx], ecx + + ;; We need a scratch register to verify the shadow heap. We also need to + ;; construct a memory barrier so that the write to the shadow heap happens + ;; before the read from the GC heap. We can do both by using SUB/XCHG + ;; rather than PUSH. + ;; + ;; TODO: Should be changed to a push if the mfence described above is added. + ;; + sub esp, 4 + xchg [esp], eax + + ;; As part of our race avoidance (see above) we will now check whether the values + ;; in the GC and ShadowGC match. There is a possibility that we're wrong here but + ;; being overaggressive means we might mask a case where someone updates GC refs + ;; without going to a write barrier, but by its nature it will be indeterminant + ;; and we will find real bugs whereas the current implementation is indeterminant + ;; but only leads to investigations that find that this code is fundamentally flawed + + mov eax, [edi] + cmp [edx], eax + je ByRefWriteBarrier_CleanupShadowCheck + mov [edx], INVALIDGCVALUE +ByRefWriteBarrier_CleanupShadowCheck: + pop eax + jmp ByRefWriteBarrier_ShadowCheckEnd + +ByRefWriteBarrier_NoShadow: + ; If we come here then we haven't written the value to the GC and need to. + mov DWORD PTR [edi], ecx + +ByRefWriteBarrier_ShadowCheckEnd: + pop edx +endif + ;;test for *src in ephemeral segement + cmp ecx, g_ephemeral_low + jb ByRefWriteBarrier_NotInEphemeral + cmp ecx, g_ephemeral_high + jae ByRefWriteBarrier_NotInEphemeral + + mov ecx, edi + add esi,4 + add edi,4 + + shr ecx, 10 + add ecx, [g_card_table] + cmp byte ptr [ecx], 0FFh + jne ByRefWriteBarrier_UpdateCardTable + ret +ByRefWriteBarrier_UpdateCardTable: + mov byte ptr [ecx], 0FFh + ret + +ByRefWriteBarrier_NotInHeap: + ; If it wasn't in the heap then we haven't updated the dst in memory yet + mov [edi],ecx +ByRefWriteBarrier_NotInEphemeral: + ; If it is in the GC Heap but isn't in the ephemeral range we've already + ; updated the Heap with the Object*. + add esi,4 + add edi,4 + ret +_JIT_ByRefWriteBarrier@0 ENDP +ENDM + +;******************************************************************************* +; Write barrier wrappers with fcall calling convention +; +UniversalWriteBarrierHelper MACRO name + ALIGN 4 +PUBLIC @JIT_&name&@8 +@JIT_&name&@8 PROC + mov eax,edx + mov edx,ecx + jmp _JIT_&name&EAX@0 +@JIT_&name&@8 ENDP +ENDM + +; WriteBarrierStart and WriteBarrierEnd are used to determine bounds of +; WriteBarrier functions so can determine if got AV in them. +; +PUBLIC _JIT_WriteBarrierStart@0 +_JIT_WriteBarrierStart@0 PROC +ret +_JIT_WriteBarrierStart@0 ENDP + +ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS +; Only define these if we're using the ASM GC write barriers; if this flag is not defined, +; we'll use C++ versions of these write barriers. +UniversalWriteBarrierHelper <CheckedWriteBarrier> +UniversalWriteBarrierHelper <WriteBarrier> +endif + +WriteBarrierHelper <EAX> +WriteBarrierHelper <EBX> +WriteBarrierHelper <ECX> +WriteBarrierHelper <ESI> +WriteBarrierHelper <EDI> +WriteBarrierHelper <EBP> + +ByRefWriteBarrierHelper + +PUBLIC _JIT_WriteBarrierLast@0 +_JIT_WriteBarrierLast@0 PROC +ret +_JIT_WriteBarrierLast@0 ENDP + +; This is the first function outside the "keep together range". Used by BBT scripts. +PUBLIC _JIT_WriteBarrierEnd@0 +_JIT_WriteBarrierEnd@0 PROC +ret +_JIT_WriteBarrierEnd@0 ENDP + +;*********************************************************************/ +; In cases where we support it we have an optimized GC Poll callback. Normall (when we're not trying to +; suspend for GC, the CORINFO_HELP_POLL_GC helper points to this nop routine. When we're ready to suspend +; for GC, we whack the Jit Helper table entry to point to the real helper. When we're done with GC we +; whack it back. +PUBLIC @JIT_PollGC_Nop@0 +@JIT_PollGC_Nop@0 PROC +ret +@JIT_PollGC_Nop@0 ENDP + +;*********************************************************************/ +;llshl - long shift left +; +;Purpose: +; Does a Long Shift Left (signed and unsigned are identical) +; Shifts a long left any number of bits. +; +; NOTE: This routine has been adapted from the Microsoft CRTs. +; +;Entry: +; EDX:EAX - long value to be shifted +; ECX - number of bits to shift by +; +;Exit: +; EDX:EAX - shifted value +; + ALIGN 16 +PUBLIC JIT_LLsh +JIT_LLsh PROC +; Handle shifts of between bits 0 and 31 + cmp ecx, 32 + jae short LLshMORE32 + shld edx,eax,cl + shl eax,cl + ret +; Handle shifts of between bits 32 and 63 +LLshMORE32: + ; The x86 shift instructions only use the lower 5 bits. + mov edx,eax + xor eax,eax + shl edx,cl + ret +JIT_LLsh ENDP + + +;*********************************************************************/ +;LRsh - long shift right +; +;Purpose: +; Does a signed Long Shift Right +; Shifts a long right any number of bits. +; +; NOTE: This routine has been adapted from the Microsoft CRTs. +; +;Entry: +; EDX:EAX - long value to be shifted +; ECX - number of bits to shift by +; +;Exit: +; EDX:EAX - shifted value +; + ALIGN 16 +PUBLIC JIT_LRsh +JIT_LRsh PROC +; Handle shifts of between bits 0 and 31 + cmp ecx, 32 + jae short LRshMORE32 + shrd eax,edx,cl + sar edx,cl + ret +; Handle shifts of between bits 32 and 63 +LRshMORE32: + ; The x86 shift instructions only use the lower 5 bits. + mov eax,edx + sar edx, 31 + sar eax,cl + ret +JIT_LRsh ENDP + + +;*********************************************************************/ +; LRsz: +;Purpose: +; Does a unsigned Long Shift Right +; Shifts a long right any number of bits. +; +; NOTE: This routine has been adapted from the Microsoft CRTs. +; +;Entry: +; EDX:EAX - long value to be shifted +; ECX - number of bits to shift by +; +;Exit: +; EDX:EAX - shifted value +; + ALIGN 16 +PUBLIC JIT_LRsz +JIT_LRsz PROC +; Handle shifts of between bits 0 and 31 + cmp ecx, 32 + jae short LRszMORE32 + shrd eax,edx,cl + shr edx,cl + ret +; Handle shifts of between bits 32 and 63 +LRszMORE32: + ; The x86 shift instructions only use the lower 5 bits. + mov eax,edx + xor edx,edx + shr eax,cl + ret +JIT_LRsz ENDP + +;*********************************************************************/ +; LMul: +;Purpose: +; Does a long multiply (same for signed/unsigned) +; +; NOTE: This routine has been adapted from the Microsoft CRTs. +; +;Entry: +; Parameters are passed on the stack: +; 1st pushed: multiplier (QWORD) +; 2nd pushed: multiplicand (QWORD) +; +;Exit: +; EDX:EAX - product of multiplier and multiplicand +; + ALIGN 16 +PUBLIC JIT_LMul +JIT_LMul PROC + +; AHI, BHI : upper 32 bits of A and B +; ALO, BLO : lower 32 bits of A and B +; +; ALO * BLO +; ALO * BHI +; + BLO * AHI +; --------------------- + + mov eax,[esp + 8] ; AHI + mov ecx,[esp + 16] ; BHI + or ecx,eax ;test for both hiwords zero. + mov ecx,[esp + 12] ; BLO + jnz LMul_hard ;both are zero, just mult ALO and BLO + + mov eax,[esp + 4] + mul ecx + + ret 16 ; callee restores the stack + +LMul_hard: + push ebx + + mul ecx ;eax has AHI, ecx has BLO, so AHI * BLO + mov ebx,eax ;save result + + mov eax,[esp + 8] ; ALO + mul dword ptr [esp + 20] ;ALO * BHI + add ebx,eax ;ebx = ((ALO * BHI) + (AHI * BLO)) + + mov eax,[esp + 8] ; ALO ;ecx = BLO + mul ecx ;so edx:eax = ALO*BLO + add edx,ebx ;now edx has all the LO*HI stuff + + pop ebx + + ret 16 ; callee restores the stack + +JIT_LMul ENDP + +;*********************************************************************/ +; JIT_Dbl2LngOvf + +;Purpose: +; converts a double to a long truncating toward zero (C semantics) +; with check for overflow +; +; uses stdcall calling conventions +; +PUBLIC JIT_Dbl2LngOvf +JIT_Dbl2LngOvf PROC + fnclex + fld qword ptr [esp+4] + push ecx + push ecx + fstp qword ptr [esp] + call JIT_Dbl2Lng + mov ecx,eax + fnstsw ax + test ax,01h + jnz Dbl2LngOvf_throw + mov eax,ecx + ret 8 + +Dbl2LngOvf_throw: + mov ECX, CORINFO_OverflowException_ASM + call JIT_InternalThrowFromHelper + ret 8 +JIT_Dbl2LngOvf ENDP + +;*********************************************************************/ +; JIT_Dbl2Lng + +;Purpose: +; converts a double to a long truncating toward zero (C semantics) +; +; uses stdcall calling conventions +; +; note that changing the rounding mode is very expensive. This +; routine basiclly does the truncation sematics without changing +; the rounding mode, resulting in a win. +; +PUBLIC JIT_Dbl2Lng +JIT_Dbl2Lng PROC + fld qword ptr[ESP+4] ; fetch arg + lea ecx,[esp-8] + sub esp,16 ; allocate frame + and ecx,-8 ; align pointer on boundary of 8 + fld st(0) ; duplciate top of stack + fistp qword ptr[ecx] ; leave arg on stack, also save in temp + fild qword ptr[ecx] ; arg, round(arg) now on stack + mov edx,[ecx+4] ; high dword of integer + mov eax,[ecx] ; low dword of integer + test eax,eax + je integer_QNaN_or_zero + +arg_is_not_integer_QNaN: + fsubp st(1),st ; TOS=d-round(d), + ; { st(1)=st(1)-st & pop ST } + test edx,edx ; what's sign of integer + jns positive + ; number is negative + ; dead cycle + ; dead cycle + fstp dword ptr[ecx] ; result of subtraction + mov ecx,[ecx] ; dword of difference(single precision) + add esp,16 + xor ecx,80000000h + add ecx,7fffffffh ; if difference>0 then increment integer + adc eax,0 ; inc eax (add CARRY flag) + adc edx,0 ; propagate carry flag to upper bits + ret 8 + +positive: + fstp dword ptr[ecx] ;17-18 ; result of subtraction + mov ecx,[ecx] ; dword of difference (single precision) + add esp,16 + add ecx,7fffffffh ; if difference<0 then decrement integer + sbb eax,0 ; dec eax (subtract CARRY flag) + sbb edx,0 ; propagate carry flag to upper bits + ret 8 + +integer_QNaN_or_zero: + test edx,7fffffffh + jnz arg_is_not_integer_QNaN + fstp st(0) ;; pop round(arg) + fstp st(0) ;; arg + add esp,16 + ret 8 +JIT_Dbl2Lng ENDP + +;*********************************************************************/ +; JIT_Dbl2LngP4x87 + +;Purpose: +; converts a double to a long truncating toward zero (C semantics) +; +; uses stdcall calling conventions +; +; This code is faster on a P4 than the Dbl2Lng code above, but is +; slower on a PIII. Hence we choose this code when on a P4 or above. +; +PUBLIC JIT_Dbl2LngP4x87 +JIT_Dbl2LngP4x87 PROC +arg1 equ <[esp+0Ch]> + + sub esp, 8 ; get some local space + + fld qword ptr arg1 ; fetch arg + fnstcw word ptr arg1 ; store FPCW + movzx eax, word ptr arg1 ; zero extend - wide + or ah, 0Ch ; turn on OE and DE flags + mov dword ptr [esp], eax ; store new FPCW bits + fldcw word ptr [esp] ; reload FPCW with new bits + fistp qword ptr [esp] ; convert + mov eax, dword ptr [esp] ; reload FP result + mov edx, dword ptr [esp+4] ; + fldcw word ptr arg1 ; reload original FPCW value + + add esp, 8 ; restore stack + + ret 8 +JIT_Dbl2LngP4x87 ENDP + +;*********************************************************************/ +; JIT_Dbl2LngSSE3 + +;Purpose: +; converts a double to a long truncating toward zero (C semantics) +; +; uses stdcall calling conventions +; +; This code is faster than the above P4 x87 code for Intel processors +; equal or later than Core2 and Atom that have SSE3 support +; +.686P +.XMM +PUBLIC JIT_Dbl2LngSSE3 +JIT_Dbl2LngSSE3 PROC +arg1 equ <[esp+0Ch]> + + sub esp, 8 ; get some local space + + fld qword ptr arg1 ; fetch arg + fisttp qword ptr [esp] ; convert + mov eax, dword ptr [esp] ; reload FP result + mov edx, dword ptr [esp+4] + + add esp, 8 ; restore stack + + ret 8 +JIT_Dbl2LngSSE3 ENDP +.586 + +;*********************************************************************/ +; JIT_Dbl2IntSSE2 + +;Purpose: +; converts a double to a long truncating toward zero (C semantics) +; +; uses stdcall calling conventions +; +; This code is even faster than the P4 x87 code for Dbl2LongP4x87, +; but only returns a 32 bit value (only good for int). +; +.686P +.XMM +PUBLIC JIT_Dbl2IntSSE2 +JIT_Dbl2IntSSE2 PROC + $movsd xmm0, [esp+4] + cvttsd2si eax, xmm0 + ret 8 +JIT_Dbl2IntSSE2 ENDP +.586 + + +;*********************************************************************/ +; This is the small write barrier thunk we use when we know the +; ephemeral generation is higher in memory than older generations. +; The 0x0F0F0F0F values are bashed by the two functions above. +; This the generic version - wherever the code says ECX, +; the specific register is patched later into a copy +; Note: do not replace ECX by EAX - there is a smaller encoding for +; the compares just for EAX, which won't work for other registers. +; +; READ THIS!!!!!! +; it is imperative that the addresses of of the values that we overwrite +; (card table, ephemeral region ranges, etc) are naturally aligned since +; there are codepaths that will overwrite these values while the EE is running. +; +PUBLIC JIT_WriteBarrierReg_PreGrow +JIT_WriteBarrierReg_PreGrow PROC + mov DWORD PTR [edx], ecx + cmp ecx, 0F0F0F0F0h + jb NoWriteBarrierPre + + shr edx, 10 + nop ; padding for alignment of constant + cmp byte ptr [edx+0F0F0F0F0h], 0FFh + jne WriteBarrierPre +NoWriteBarrierPre: + ret + nop ; padding for alignment of constant + nop ; padding for alignment of constant +WriteBarrierPre: + mov byte ptr [edx+0F0F0F0F0h], 0FFh + ret +JIT_WriteBarrierReg_PreGrow ENDP + +;*********************************************************************/ +; This is the larger write barrier thunk we use when we know that older +; generations may be higher in memory than the ephemeral generation +; The 0x0F0F0F0F values are bashed by the two functions above. +; This the generic version - wherever the code says ECX, +; the specific register is patched later into a copy +; Note: do not replace ECX by EAX - there is a smaller encoding for +; the compares just for EAX, which won't work for other registers. +; NOTE: we need this aligned for our validation to work properly + ALIGN 4 +PUBLIC JIT_WriteBarrierReg_PostGrow +JIT_WriteBarrierReg_PostGrow PROC + mov DWORD PTR [edx], ecx + cmp ecx, 0F0F0F0F0h + jb NoWriteBarrierPost + cmp ecx, 0F0F0F0F0h + jae NoWriteBarrierPost + + shr edx, 10 + nop ; padding for alignment of constant + cmp byte ptr [edx+0F0F0F0F0h], 0FFh + jne WriteBarrierPost +NoWriteBarrierPost: + ret + nop ; padding for alignment of constant + nop ; padding for alignment of constant +WriteBarrierPost: + mov byte ptr [edx+0F0F0F0F0h], 0FFh + ret +JIT_WriteBarrierReg_PostGrow ENDP + +;*********************************************************************/ +; + + ; a fake virtual stub dispatch register indirect callsite + $nop3 + call dword ptr [eax] + + +PUBLIC JIT_TailCallReturnFromVSD +JIT_TailCallReturnFromVSD: +ifdef _DEBUG + nop ; blessed callsite +endif + call VSDHelperLabel ; keep call-ret count balanced. +VSDHelperLabel: + +; Stack at this point : +; ... +; m_ReturnAddress +; m_regs +; m_CallerAddress +; m_pThread +; vtbl +; GSCookie +; &VSDHelperLabel +OffsetOfTailCallFrame = 8 + +; ebx = pThread + +ifdef _DEBUG + mov esi, _s_gsCookie ; GetProcessGSCookie() + cmp dword ptr [esp+OffsetOfTailCallFrame-SIZEOF_GSCookie], esi + je TailCallFrameGSCookieIsValid + call @JIT_FailFast@0 + TailCallFrameGSCookieIsValid: +endif + ; remove the padding frame from the chain + mov esi, dword ptr [esp+OffsetOfTailCallFrame+4] ; esi = TailCallFrame::m_Next + mov dword ptr [ebx + Thread_m_pFrame], esi + + ; skip the frame + add esp, 20 ; &VSDHelperLabel, GSCookie, vtbl, m_Next, m_CallerAddress + + pop edi ; restore callee saved registers + pop esi + pop ebx + pop ebp + + ret ; return to m_ReturnAddress + +;------------------------------------------------------------------------------ +; + +PUBLIC JIT_TailCall +JIT_TailCall PROC + +; the stack layout at this point is: +; +; ebp+8+4*nOldStackArgs <- end of argument destination +; ... ... +; ebp+8+ old args (size is nOldStackArgs) +; ... ... +; ebp+8 <- start of argument destination +; ebp+4 ret addr +; ebp+0 saved ebp +; ebp-c saved ebx, esi, edi (if have callee saved regs = 1) +; +; other stuff (local vars) in the jitted callers' frame +; +; esp+20+4*nNewStackArgs <- end of argument source +; ... ... +; esp+20+ new args (size is nNewStackArgs) to be passed to the target of the tail-call +; ... ... +; esp+20 <- start of argument source +; esp+16 nOldStackArgs +; esp+12 nNewStackArgs +; esp+8 flags (1 = have callee saved regs, 2 = virtual stub dispatch) +; esp+4 target addr +; esp+0 retaddr +; +; If you change this function, make sure you update code:TailCallStubManager as well. + +RetAddr equ 0 +TargetAddr equ 4 +nNewStackArgs equ 12 +nOldStackArgs equ 16 +NewArgs equ 20 + +; extra space is incremented as we push things on the stack along the way +ExtraSpace = 0 + + call _GetThread@0; eax = Thread* + push eax ; Thread* + + ; save ArgumentRegisters + push ecx + push edx + +ExtraSpace = 12 ; pThread, ecx, edx + +ifdef FEATURE_HIJACK + ; Make sure that the EE does have the return address patched. So we can move it around. + test dword ptr [eax+Thread_m_State], TS_Hijacked_ASM + jz NoHijack + + ; JIT_TailCallHelper(Thread *) + push eax + call JIT_TailCallHelper ; this is __stdcall + +NoHijack: +endif + + mov edx, dword ptr [esp+ExtraSpace+JIT_TailCall_StackOffsetToFlags] ; edx = flags + + mov eax, dword ptr [esp+ExtraSpace+nOldStackArgs] ; eax = nOldStackArgs + mov ecx, dword ptr [esp+ExtraSpace+nNewStackArgs] ; ecx = nNewStackArgs + + ; restore callee saved registers + ; <TODO>@TODO : esp based - doesnt work with localloc</TODO> + test edx, 1 + jz NoCalleeSaveRegisters + + mov edi, dword ptr [ebp-4] ; restore edi + mov esi, dword ptr [ebp-8] ; restore esi + mov ebx, dword ptr [ebp-12] ; restore ebx + +NoCalleeSaveRegisters: + + push dword ptr [ebp+4] ; save the original return address for later + push edi + push esi + +ExtraSpace = 24 ; pThread, ecx, edx, orig retaddr, edi, esi +CallersEsi = 0 +CallersEdi = 4 +OrigRetAddr = 8 +pThread = 20 + + lea edi, [ebp+8+4*eax] ; edi = the end of argument destination + lea esi, [esp+ExtraSpace+NewArgs+4*ecx] ; esi = the end of argument source + + mov ebp, dword ptr [ebp] ; restore ebp (do not use ebp as scratch register to get a good stack trace in debugger) + + test edx, 2 + jnz VSDTailCall + + ; copy the arguments to the final destination + test ecx, ecx + jz ArgumentsCopied +ArgumentCopyLoop: + ; At this point, this is the value of the registers : + ; edi = end of argument dest + ; esi = end of argument source + ; ecx = nNewStackArgs + mov eax, dword ptr [esi-4] + sub edi, 4 + sub esi, 4 + mov dword ptr [edi], eax + dec ecx + jnz ArgumentCopyLoop +ArgumentsCopied: + + ; edi = the start of argument destination + + mov eax, dword ptr [esp+4+4] ; return address + mov ecx, dword ptr [esp+ExtraSpace+TargetAddr] ; target address + + mov dword ptr [edi-4], eax ; return address + mov dword ptr [edi-8], ecx ; target address + + lea eax, [edi-8] ; new value for esp + + pop esi + pop edi + pop ecx ; skip original return address + pop edx + pop ecx + + mov esp, eax + +PUBLIC JIT_TailCallLeave ; add a label here so that TailCallStubManager can access it +JIT_TailCallLeave: + retn ; Will branch to targetAddr. This matches the + ; "call" done by JITted code, keeping the + ; call-ret count balanced. + + ;---------------------------------------------------------------------- +VSDTailCall: + ;---------------------------------------------------------------------- + + ; For the Virtual Stub Dispatch, we create a fake callsite to fool + ; the callsite probes. In order to create the call site, we need to insert TailCallFrame + ; if we do not have one already. + ; + ; ecx = nNewStackArgs + ; esi = the end of argument source + ; edi = the end of argument destination + ; + ; The stub has pushed the following onto the stack at this point : + ; pThread, ecx, edx, orig retaddr, edi, esi + + + cmp dword ptr [esp+OrigRetAddr], JIT_TailCallReturnFromVSD + jz VSDTailCallFrameInserted_DoSlideUpArgs ; There is an exiting TailCallFrame that can be reused + + ; try to allocate space for the frame / check whether there is enough space + ; If there is sufficient space, we will setup the frame and then slide + ; the arguments up the stack. Else, we first need to slide the arguments + ; down the stack to make space for the TailCallFrame + sub edi, (SIZEOF_GSCookie + SIZEOF_TailCallFrame) + cmp edi, esi + jae VSDSpaceForFrameChecked + + ; There is not sufficient space to wedge in the TailCallFrame without + ; overwriting the new arguments. + ; We need to allocate the extra space on the stack, + ; and slide down the new arguments + + mov eax, esi + sub eax, edi + sub esp, eax + + mov eax, ecx ; to subtract the size of arguments + mov edx, ecx ; for counter + + neg eax + + ; copy down the arguments to the final destination, need to copy all temporary storage as well + add edx, (ExtraSpace+NewArgs)/4 + + lea esi, [esi+4*eax-(ExtraSpace+NewArgs)] + lea edi, [edi+4*eax-(ExtraSpace+NewArgs)] + +VSDAllocFrameCopyLoop: + mov eax, dword ptr [esi] + mov dword ptr [edi], eax + add esi, 4 + add edi, 4 + dec edx + jnz VSDAllocFrameCopyLoop + + ; the argument source and destination are same now + mov esi, edi + +VSDSpaceForFrameChecked: + + ; At this point, we have enough space on the stack for the TailCallFrame, + ; and we may already have slided down the arguments + + mov eax, _s_gsCookie ; GetProcessGSCookie() + mov dword ptr [edi], eax ; set GSCookie + mov eax, _g_TailCallFrameVptr ; vptr + mov edx, dword ptr [esp+OrigRetAddr] ; orig return address + mov dword ptr [edi+SIZEOF_GSCookie], eax ; TailCallFrame::vptr + mov dword ptr [edi+SIZEOF_GSCookie+28], edx ; TailCallFrame::m_ReturnAddress + + mov eax, dword ptr [esp+CallersEdi] ; restored edi + mov edx, dword ptr [esp+CallersEsi] ; restored esi + mov dword ptr [edi+SIZEOF_GSCookie+12], eax ; TailCallFrame::m_regs::edi + mov dword ptr [edi+SIZEOF_GSCookie+16], edx ; TailCallFrame::m_regs::esi + mov dword ptr [edi+SIZEOF_GSCookie+20], ebx ; TailCallFrame::m_regs::ebx + mov dword ptr [edi+SIZEOF_GSCookie+24], ebp ; TailCallFrame::m_regs::ebp + + mov ebx, dword ptr [esp+pThread] ; ebx = pThread + + mov eax, dword ptr [ebx+Thread_m_pFrame] + lea edx, [edi+SIZEOF_GSCookie] + mov dword ptr [edi+SIZEOF_GSCookie+4], eax ; TailCallFrame::m_pNext + mov dword ptr [ebx+Thread_m_pFrame], edx ; hook the new frame into the chain + + ; setup ebp chain + lea ebp, [edi+SIZEOF_GSCookie+24] ; TailCallFrame::m_regs::ebp + + ; Do not copy arguments again if they are in place already + ; Otherwise, we will need to slide the new arguments up the stack + cmp esi, edi + jne VSDTailCallFrameInserted_DoSlideUpArgs + + ; At this point, we must have already previously slided down the new arguments, + ; or the TailCallFrame is a perfect fit + ; set the caller address + mov edx, dword ptr [esp+ExtraSpace+RetAddr] ; caller address + mov dword ptr [edi+SIZEOF_GSCookie+8], edx ; TailCallFrame::m_CallerAddress + + ; adjust edi as it would by copying + neg ecx + lea edi, [edi+4*ecx] + + jmp VSDArgumentsCopied + +VSDTailCallFrameInserted_DoSlideUpArgs: + ; set the caller address + mov edx, dword ptr [esp+ExtraSpace+RetAddr] ; caller address + mov dword ptr [edi+SIZEOF_GSCookie+8], edx ; TailCallFrame::m_CallerAddress + + ; copy the arguments to the final destination + test ecx, ecx + jz VSDArgumentsCopied +VSDArgumentCopyLoop: + mov eax, dword ptr [esi-4] + sub edi, 4 + sub esi, 4 + mov dword ptr [edi], eax + dec ecx + jnz VSDArgumentCopyLoop +VSDArgumentsCopied: + + ; edi = the start of argument destination + + mov ecx, dword ptr [esp+ExtraSpace+TargetAddr] ; target address + + mov dword ptr [edi-4], JIT_TailCallReturnFromVSD ; return address + mov dword ptr [edi-12], ecx ; address of indirection cell + mov ecx, [ecx] + mov dword ptr [edi-8], ecx ; target address + + ; skip original return address and saved esi, edi + add esp, 12 + + pop edx + pop ecx + + lea esp, [edi-12] ; new value for esp + pop eax + +PUBLIC JIT_TailCallVSDLeave ; add a label here so that TailCallStubManager can access it +JIT_TailCallVSDLeave: + retn ; Will branch to targetAddr. This matches the + ; "call" done by JITted code, keeping the + ; call-ret count balanced. + +JIT_TailCall ENDP + + +;------------------------------------------------------------------------------ + +; HCIMPL2_VV(float, JIT_FltRem, float dividend, float divisor) +@JIT_FltRem@8 proc public + fld dword ptr [esp+4] ; divisor + fld dword ptr [esp+8] ; dividend +fremloop: + fprem + fstsw ax + fwait + sahf + jp fremloop ; Continue while the FPU status bit C2 is set + fxch ; swap, so divisor is on top and result is in st(1) + fstp ST(0) ; Pop the divisor from the FP stack + retn 8 ; Return value is in st(0) +@JIT_FltRem@8 endp + +; HCIMPL2_VV(float, JIT_DblRem, float dividend, float divisor) +@JIT_DblRem@16 proc public + fld qword ptr [esp+4] ; divisor + fld qword ptr [esp+12] ; dividend +fremloopd: + fprem + fstsw ax + fwait + sahf + jp fremloopd ; Continue while the FPU status bit C2 is set + fxch ; swap, so divisor is on top and result is in st(1) + fstp ST(0) ; Pop the divisor from the FP stack + retn 16 ; Return value is in st(0) +@JIT_DblRem@16 endp + +;------------------------------------------------------------------------------ + +g_SystemInfo TEXTEQU <?g_SystemInfo@@3U_SYSTEM_INFO@@A> +g_SpinConstants TEXTEQU <?g_SpinConstants@@3USpinConstants@@A> +g_pSyncTable TEXTEQU <?g_pSyncTable@@3PAVSyncTableEntry@@A> +JITutil_MonEnterWorker TEXTEQU <@JITutil_MonEnterWorker@4> +JITutil_MonReliableEnter TEXTEQU <@JITutil_MonReliableEnter@8> +JITutil_MonTryEnter TEXTEQU <@JITutil_MonTryEnter@12> +JITutil_MonExitWorker TEXTEQU <@JITutil_MonExitWorker@4> +JITutil_MonContention TEXTEQU <@JITutil_MonContention@4> +JITutil_MonReliableContention TEXTEQU <@JITutil_MonReliableContention@8> +JITutil_MonSignal TEXTEQU <@JITutil_MonSignal@4> +JIT_InternalThrow TEXTEQU <@JIT_InternalThrow@4> +EXTRN g_SystemInfo:BYTE +EXTRN g_SpinConstants:BYTE +EXTRN g_pSyncTable:DWORD +EXTRN JITutil_MonEnterWorker:PROC +EXTRN JITutil_MonReliableEnter:PROC +EXTRN JITutil_MonTryEnter:PROC +EXTRN JITutil_MonExitWorker:PROC +EXTRN JITutil_MonContention:PROC +EXTRN JITutil_MonReliableContention:PROC +EXTRN JITutil_MonSignal:PROC +EXTRN JIT_InternalThrow:PROC + +ifdef MON_DEBUG +ifdef TRACK_SYNC +EnterSyncHelper TEXTEQU <_EnterSyncHelper@8> +LeaveSyncHelper TEXTEQU <_LeaveSyncHelper@8> +EXTRN EnterSyncHelper:PROC +EXTRN LeaveSyncHelper:PROC +endif ;TRACK_SYNC +endif ;MON_DEBUG + +; The following macro is needed because MASM returns +; "instruction prefix not allowed" error message for +; rep nop mnemonic +$repnop MACRO + db 0F3h + db 090h +ENDM + +; Safe ThreadAbort does not abort a thread if it is running finally or has lock counts. +; At the time we call Monitor.Enter, we initiate the abort if we can. +; We do not need to do the same for Monitor.Leave, since most of time, Monitor.Leave is called +; during finally. + +;********************************************************************** +; This is a frameless helper for entering a monitor on a object. +; The object is in ARGUMENT_REG1. This tries the normal case (no +; blocking or object allocation) in line and calls a framed helper +; for the other cases. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonEnterWorker@4 proc public + ; Initialize delay value for retry with exponential backoff + push ebx + mov ebx, dword ptr g_SpinConstants+SpinConstants_dwInitialDuration + + ; We need yet another register to avoid refetching the thread object + push esi + + ; Check if the instance is NULL. + test ARGUMENT_REG1, ARGUMENT_REG1 + jz MonEnterFramedLockHelper + + call _GetThread@0 + mov esi,eax + + ; Check if we can abort here + mov eax, [esi+Thread_m_State] + and eax, TS_CatchAtSafePoint_ASM + jz MonEnterRetryThinLock + ; go through the slow code path to initiate ThreadAbort. + jmp MonEnterFramedLockHelper + +MonEnterRetryThinLock: + ; Fetch the object header dword + mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM] + + ; Check whether we have the "thin lock" layout, the lock is free and the spin lock bit not set + ; SBLK_COMBINED_MASK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + SBLK_MASK_LOCK_THREADID + SBLK_MASK_LOCK_RECLEVEL + test eax, SBLK_COMBINED_MASK_ASM + jnz MonEnterNeedMoreTests + + ; Everything is fine - get the thread id to store in the lock + mov edx, [esi+Thread_m_ThreadId] + + ; If the thread id is too large, we need a syncblock for sure + cmp edx, SBLK_MASK_LOCK_THREADID_ASM + ja MonEnterFramedLockHelper + + ; We want to store a new value with the current thread id set in the low 10 bits + or edx,eax + lock cmpxchg dword ptr [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx + jnz MonEnterPrepareToWaitThinLock + + ; Everything went fine and we're done + add [esi+Thread_m_dwLockCount],1 + pop esi + pop ebx + ret + +MonEnterNeedMoreTests: + ; Ok, it's not the simple case - find out which case it is + test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM + jnz MonEnterHaveHashOrSyncBlockIndex + + ; The header is transitioning or the lock - treat this as if the lock was taken + test eax, BIT_SBLK_SPIN_LOCK_ASM + jnz MonEnterPrepareToWaitThinLock + + ; Here we know we have the "thin lock" layout, but the lock is not free. + ; It could still be the recursion case - compare the thread id to check + mov edx,eax + and edx, SBLK_MASK_LOCK_THREADID_ASM + cmp edx, [esi+Thread_m_ThreadId] + jne MonEnterPrepareToWaitThinLock + + ; Ok, the thread id matches, it's the recursion case. + ; Bump up the recursion level and check for overflow + lea edx, [eax+SBLK_LOCK_RECLEVEL_INC_ASM] + test edx, SBLK_MASK_LOCK_RECLEVEL_ASM + jz MonEnterFramedLockHelper + + ; Try to put the new recursion level back. If the header was changed in the meantime, + ; we need a full retry, because the layout could have changed. + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx + jnz MonEnterRetryHelperThinLock + + ; Everything went fine and we're done + pop esi + pop ebx + ret + +MonEnterPrepareToWaitThinLock: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonEnterFramedLockHelper + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax, ebx +MonEnterdelayLoopThinLock: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonEnterdelayLoopThinLock + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonEnterRetryHelperThinLock + + jmp MonEnterFramedLockHelper + +MonEnterRetryHelperThinLock: + jmp MonEnterRetryThinLock + +MonEnterHaveHashOrSyncBlockIndex: + ; If we have a hash code already, we need to create a sync block + test eax, BIT_SBLK_IS_HASHCODE_ASM + jnz MonEnterFramedLockHelper + + ; Ok, we have a sync block index - just and out the top bits and grab the syncblock index + and eax, MASK_SYNCBLOCKINDEX_ASM + + ; Get the sync block pointer. + mov ARGUMENT_REG2, dword ptr g_pSyncTable + mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock] + + ; Check if the sync block has been allocated. + test ARGUMENT_REG2, ARGUMENT_REG2 + jz MonEnterFramedLockHelper + + ; Get a pointer to the lock object. + lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor] + + ; Attempt to acquire the lock. +MonEnterRetrySyncBlock: + mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld] + test eax,eax + jne MonEnterHaveWaiters + + ; Common case, lock isn't held and there are no waiters. Attempt to + ; gain ownership ourselves. + mov ARGUMENT_REG1,1 + lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld], ARGUMENT_REG1 + jnz MonEnterRetryHelperSyncBlock + + ; Success. Save the thread object in the lock and increment the use count. + mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + inc dword ptr [esi+Thread_m_dwLockCount] + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop esi + pop ebx + ret + + ; It's possible to get here with waiters but no lock held, but in this + ; case a signal is about to be fired which will wake up a waiter. So + ; for fairness sake we should wait too. + ; Check first for recursive lock attempts on the same thread. +MonEnterHaveWaiters: + ; Is mutex already owned by current thread? + cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + jne MonEnterPrepareToWait + + ; Yes, bump our use count. + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop esi + pop ebx + ret + +MonEnterPrepareToWait: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonEnterHaveWaiters1 + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax,ebx +MonEnterdelayLoop: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonEnterdelayLoop + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonEnterRetrySyncBlock + +MonEnterHaveWaiters1: + + pop esi + pop ebx + + ; Place AwareLock in arg1 then call contention helper. + mov ARGUMENT_REG1, ARGUMENT_REG2 + jmp JITutil_MonContention + +MonEnterRetryHelperSyncBlock: + jmp MonEnterRetrySyncBlock + + ; ECX has the object to synchronize on +MonEnterFramedLockHelper: + pop esi + pop ebx + jmp JITutil_MonEnterWorker + +@JIT_MonEnterWorker@4 endp + +;********************************************************************** +; This is a frameless helper for entering a monitor on a object, and +; setting a flag to indicate that the lock was taken. +; The object is in ARGUMENT_REG1. The flag is in ARGUMENT_REG2. +; This tries the normal case (no blocking or object allocation) in line +; and calls a framed helper for the other cases. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonReliableEnter@8 proc public + ; Initialize delay value for retry with exponential backoff + push ebx + mov ebx, dword ptr g_SpinConstants+SpinConstants_dwInitialDuration + + ; Put pbLockTaken in edi + push edi + mov edi, ARGUMENT_REG2 + + ; We need yet another register to avoid refetching the thread object + push esi + + ; Check if the instance is NULL. + test ARGUMENT_REG1, ARGUMENT_REG1 + jz MonReliableEnterFramedLockHelper + + call _GetThread@0 + mov esi,eax + + ; Check if we can abort here + mov eax, [esi+Thread_m_State] + and eax, TS_CatchAtSafePoint_ASM + jz MonReliableEnterRetryThinLock + ; go through the slow code path to initiate ThreadAbort. + jmp MonReliableEnterFramedLockHelper + +MonReliableEnterRetryThinLock: + ; Fetch the object header dword + mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM] + + ; Check whether we have the "thin lock" layout, the lock is free and the spin lock bit not set + ; SBLK_COMBINED_MASK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + SBLK_MASK_LOCK_THREADID + SBLK_MASK_LOCK_RECLEVEL + test eax, SBLK_COMBINED_MASK_ASM + jnz MonReliableEnterNeedMoreTests + + ; Everything is fine - get the thread id to store in the lock + mov edx, [esi+Thread_m_ThreadId] + + ; If the thread id is too large, we need a syncblock for sure + cmp edx, SBLK_MASK_LOCK_THREADID_ASM + ja MonReliableEnterFramedLockHelper + + ; We want to store a new value with the current thread id set in the low 10 bits + or edx,eax + lock cmpxchg dword ptr [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx + jnz MonReliableEnterPrepareToWaitThinLock + + ; Everything went fine and we're done + add [esi+Thread_m_dwLockCount],1 + ; Set *pbLockTaken=true + mov byte ptr [edi],1 + pop esi + pop edi + pop ebx + ret + +MonReliableEnterNeedMoreTests: + ; Ok, it's not the simple case - find out which case it is + test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM + jnz MonReliableEnterHaveHashOrSyncBlockIndex + + ; The header is transitioning or the lock - treat this as if the lock was taken + test eax, BIT_SBLK_SPIN_LOCK_ASM + jnz MonReliableEnterPrepareToWaitThinLock + + ; Here we know we have the "thin lock" layout, but the lock is not free. + ; It could still be the recursion case - compare the thread id to check + mov edx,eax + and edx, SBLK_MASK_LOCK_THREADID_ASM + cmp edx, [esi+Thread_m_ThreadId] + jne MonReliableEnterPrepareToWaitThinLock + + ; Ok, the thread id matches, it's the recursion case. + ; Bump up the recursion level and check for overflow + lea edx, [eax+SBLK_LOCK_RECLEVEL_INC_ASM] + test edx, SBLK_MASK_LOCK_RECLEVEL_ASM + jz MonReliableEnterFramedLockHelper + + ; Try to put the new recursion level back. If the header was changed in the meantime, + ; we need a full retry, because the layout could have changed. + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM], edx + jnz MonReliableEnterRetryHelperThinLock + + ; Everything went fine and we're done + ; Set *pbLockTaken=true + mov byte ptr [edi],1 + pop esi + pop edi + pop ebx + ret + +MonReliableEnterPrepareToWaitThinLock: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonReliableEnterFramedLockHelper + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax, ebx +MonReliableEnterdelayLoopThinLock: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonReliableEnterdelayLoopThinLock + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonReliableEnterRetryHelperThinLock + + jmp MonReliableEnterFramedLockHelper + +MonReliableEnterRetryHelperThinLock: + jmp MonReliableEnterRetryThinLock + +MonReliableEnterHaveHashOrSyncBlockIndex: + ; If we have a hash code already, we need to create a sync block + test eax, BIT_SBLK_IS_HASHCODE_ASM + jnz MonReliableEnterFramedLockHelper + + ; Ok, we have a sync block index - just and out the top bits and grab the syncblock index + and eax, MASK_SYNCBLOCKINDEX_ASM + + ; Get the sync block pointer. + mov ARGUMENT_REG2, dword ptr g_pSyncTable + mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock] + + ; Check if the sync block has been allocated. + test ARGUMENT_REG2, ARGUMENT_REG2 + jz MonReliableEnterFramedLockHelper + + ; Get a pointer to the lock object. + lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor] + + ; Attempt to acquire the lock. +MonReliableEnterRetrySyncBlock: + mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld] + test eax,eax + jne MonReliableEnterHaveWaiters + + ; Common case, lock isn't held and there are no waiters. Attempt to + ; gain ownership ourselves. + mov ARGUMENT_REG1,1 + lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld], ARGUMENT_REG1 + jnz MonReliableEnterRetryHelperSyncBlock + + ; Success. Save the thread object in the lock and increment the use count. + mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + inc dword ptr [esi+Thread_m_dwLockCount] + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] + ; Set *pbLockTaken=true + mov byte ptr [edi],1 + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop esi + pop edi + pop ebx + ret + + ; It's possible to get here with waiters but no lock held, but in this + ; case a signal is about to be fired which will wake up a waiter. So + ; for fairness sake we should wait too. + ; Check first for recursive lock attempts on the same thread. +MonReliableEnterHaveWaiters: + ; Is mutex already owned by current thread? + cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + jne MonReliableEnterPrepareToWait + + ; Yes, bump our use count. + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] + ; Set *pbLockTaken=true + mov byte ptr [edi],1 +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop esi + pop edi + pop ebx + ret + +MonReliableEnterPrepareToWait: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonReliableEnterHaveWaiters1 + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax,ebx +MonReliableEnterdelayLoop: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonReliableEnterdelayLoop + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonReliableEnterRetrySyncBlock + +MonReliableEnterHaveWaiters1: + + ; Place AwareLock in arg1, pbLockTaken in arg2, then call contention helper. + mov ARGUMENT_REG1, ARGUMENT_REG2 + mov ARGUMENT_REG2, edi + + pop esi + pop edi + pop ebx + + jmp JITutil_MonReliableContention + +MonReliableEnterRetryHelperSyncBlock: + jmp MonReliableEnterRetrySyncBlock + + ; ECX has the object to synchronize on +MonReliableEnterFramedLockHelper: + mov ARGUMENT_REG2, edi + pop esi + pop edi + pop ebx + jmp JITutil_MonReliableEnter + +@JIT_MonReliableEnter@8 endp + +;************************************************************************ +; This is a frameless helper for trying to enter a monitor on a object. +; The object is in ARGUMENT_REG1 and a timeout in ARGUMENT_REG2. This tries the +; normal case (no object allocation) in line and calls a framed helper for the +; other cases. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonTryEnter@12 proc public + ; Save the timeout parameter. + push ARGUMENT_REG2 + + ; Initialize delay value for retry with exponential backoff + push ebx + mov ebx, dword ptr g_SpinConstants+SpinConstants_dwInitialDuration + + ; The thin lock logic needs another register to store the thread + push esi + + ; Check if the instance is NULL. + test ARGUMENT_REG1, ARGUMENT_REG1 + jz MonTryEnterFramedLockHelper + + ; Check if the timeout looks valid + cmp ARGUMENT_REG2,-1 + jl MonTryEnterFramedLockHelper + + ; Get the thread right away, we'll need it in any case + call _GetThread@0 + mov esi,eax + + ; Check if we can abort here + mov eax, [esi+Thread_m_State] + and eax, TS_CatchAtSafePoint_ASM + jz MonTryEnterRetryThinLock + ; go through the slow code path to initiate ThreadAbort. + jmp MonTryEnterFramedLockHelper + +MonTryEnterRetryThinLock: + ; Get the header dword and check its layout + mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM] + + ; Check whether we have the "thin lock" layout, the lock is free and the spin lock bit not set + ; SBLK_COMBINED_MASK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + SBLK_MASK_LOCK_THREADID + SBLK_MASK_LOCK_RECLEVEL + test eax, SBLK_COMBINED_MASK_ASM + jnz MonTryEnterNeedMoreTests + + ; Ok, everything is fine. Fetch the thread id and make sure it's small enough for thin locks + mov edx, [esi+Thread_m_ThreadId] + cmp edx, SBLK_MASK_LOCK_THREADID_ASM + ja MonTryEnterFramedLockHelper + + ; Try to put our thread id in there + or edx,eax + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx + jnz MonTryEnterRetryHelperThinLock + + ; Got the lock - everything is fine" + add [esi+Thread_m_dwLockCount],1 + pop esi + + ; Delay value no longer needed + pop ebx + + ; Timeout parameter not needed, ditch it from the stack. + add esp,4 + + mov eax, [esp+4] + mov byte ptr [eax], 1 + ret 4 + +MonTryEnterNeedMoreTests: + ; Ok, it's not the simple case - find out which case it is + test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_ASM + jnz MonTryEnterHaveSyncBlockIndexOrHash + + ; The header is transitioning or the lock is taken + test eax, BIT_SBLK_SPIN_LOCK_ASM + jnz MonTryEnterRetryHelperThinLock + + mov edx, eax + and edx, SBLK_MASK_LOCK_THREADID_ASM + cmp edx, [esi+Thread_m_ThreadId] + jne MonTryEnterPrepareToWaitThinLock + + ; Ok, the thread id matches, it's the recursion case. + ; Bump up the recursion level and check for overflow + lea edx, [eax+SBLK_LOCK_RECLEVEL_INC_ASM] + test edx, SBLK_MASK_LOCK_RECLEVEL_ASM + jz MonTryEnterFramedLockHelper + + ; Try to put the new recursion level back. If the header was changed in the meantime, + ; we need a full retry, because the layout could have changed. + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx + jnz MonTryEnterRetryHelperThinLock + + ; Everything went fine and we're done + pop esi + pop ebx + + ; Timeout parameter not needed, ditch it from the stack. + add esp, 4 + mov eax, [esp+4] + mov byte ptr [eax], 1 + ret 4 + +MonTryEnterPrepareToWaitThinLock: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonTryEnterFramedLockHelper + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax, ebx +MonTryEnterdelayLoopThinLock: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonTryEnterdelayLoopThinLock + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonTryEnterRetryHelperThinLock + + jmp MonTryEnterWouldBlock + +MonTryEnterRetryHelperThinLock: + jmp MonTryEnterRetryThinLock + + +MonTryEnterHaveSyncBlockIndexOrHash: + ; If we have a hash code already, we need to create a sync block + test eax, BIT_SBLK_IS_HASHCODE_ASM + jnz MonTryEnterFramedLockHelper + + ; Just and out the top bits and grab the syncblock index + and eax, MASK_SYNCBLOCKINDEX_ASM + + ; Get the sync block pointer. + mov ARGUMENT_REG2, dword ptr g_pSyncTable + mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock] + + ; Check if the sync block has been allocated. + test ARGUMENT_REG2, ARGUMENT_REG2 + jz MonTryEnterFramedLockHelper + + ; Get a pointer to the lock object. + lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor] + +MonTryEnterRetrySyncBlock: + ; Attempt to acquire the lock. + mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld] + test eax,eax + jne MonTryEnterHaveWaiters + + ; We need another scratch register for what follows, so save EBX now so" + ; we can use it for that purpose." + push ebx + + ; Common case, lock isn't held and there are no waiters. Attempt to + ; gain ownership ourselves. + mov ebx,1 + lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld],ebx + + pop ebx + + jnz MonTryEnterRetryHelperSyncBlock + + ; Success. Save the thread object in the lock and increment the use count. + mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] + inc dword ptr [esi+Thread_m_dwLockCount] + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + + pop esi + pop ebx + + ; Timeout parameter not needed, ditch it from the stack." + add esp,4 + + mov eax, [esp+4] + mov byte ptr [eax], 1 + ret 4 + + ; It's possible to get here with waiters but no lock held, but in this + ; case a signal is about to be fired which will wake up a waiter. So + ; for fairness sake we should wait too. + ; Check first for recursive lock attempts on the same thread. +MonTryEnterHaveWaiters: + ; Is mutex already owned by current thread? + cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + jne MonTryEnterPrepareToWait + + ; Yes, bump our use count. + inc dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG2 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop esi + pop ebx + + ; Timeout parameter not needed, ditch it from the stack. + add esp,4 + + mov eax, [esp+4] + mov byte ptr [eax], 1 + ret 4 + +MonTryEnterPrepareToWait: + ; If we are on an MP system, we try spinning for a certain number of iterations + cmp dword ptr g_SystemInfo+SYSTEM_INFO_dwNumberOfProcessors,1 + jle MonTryEnterWouldBlock + + ; exponential backoff: delay by approximately 2*ebx clock cycles (on a PIII) + mov eax, ebx +MonTryEnterdelayLoop: + $repnop ; indicate to the CPU that we are spin waiting (useful for some Intel P4 multiprocs) + dec eax + jnz MonTryEnterdelayLoop + + ; next time, wait a factor longer + imul ebx, dword ptr g_SpinConstants+SpinConstants_dwBackoffFactor + + cmp ebx, dword ptr g_SpinConstants+SpinConstants_dwMaximumDuration + jle MonTryEnterRetrySyncBlock + + ; We would need to block to enter the section. Return failure if + ; timeout is zero, else call the framed helper to do the blocking + ; form of TryEnter." +MonTryEnterWouldBlock: + pop esi + pop ebx + pop ARGUMENT_REG2 + test ARGUMENT_REG2, ARGUMENT_REG2 + jnz MonTryEnterBlock + mov eax, [esp+4] + mov byte ptr [eax], 0 + ret 4 + +MonTryEnterRetryHelperSyncBlock: + jmp MonTryEnterRetrySyncBlock + +MonTryEnterFramedLockHelper: + ; ARGUMENT_REG1 has the object to synchronize on, must retrieve the + ; timeout parameter from the stack. + pop esi + pop ebx + pop ARGUMENT_REG2 +MonTryEnterBlock: + jmp JITutil_MonTryEnter + +@JIT_MonTryEnter@12 endp + +;********************************************************************** +; This is a frameless helper for exiting a monitor on a object. +; The object is in ARGUMENT_REG1. This tries the normal case (no +; blocking or object allocation) in line and calls a framed helper +; for the other cases. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonExitWorker@4 proc public + ; The thin lock logic needs an additional register to hold the thread, unfortunately + push esi + + ; Check if the instance is NULL. + test ARGUMENT_REG1, ARGUMENT_REG1 + jz MonExitFramedLockHelper + + call _GetThread@0 + mov esi,eax + +MonExitRetryThinLock: + ; Fetch the header dword and check its layout and the spin lock bit + mov eax, [ARGUMENT_REG1-SyncBlockIndexOffset_ASM] + ;BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_SPIN_LOCK_ASM = BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX + BIT_SBLK_SPIN_LOCK + test eax, BIT_SBLK_IS_HASH_OR_SYNCBLKINDEX_SPIN_LOCK_ASM + jnz MonExitNeedMoreTests + + ; Ok, we have a "thin lock" layout - check whether the thread id matches + mov edx,eax + and edx, SBLK_MASK_LOCK_THREADID_ASM + cmp edx, [esi+Thread_m_ThreadId] + jne MonExitFramedLockHelper + + ; Check the recursion level + test eax, SBLK_MASK_LOCK_RECLEVEL_ASM + jne MonExitDecRecursionLevel + + ; It's zero - we're leaving the lock. + ; So try to put back a zero thread id. + ; edx and eax match in the thread id bits, and edx is zero elsewhere, so the xor is sufficient + xor edx,eax + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx + jnz MonExitRetryHelperThinLock + + ; We're done + sub [esi+Thread_m_dwLockCount],1 + pop esi + ret + +MonExitDecRecursionLevel: + lea edx, [eax-SBLK_LOCK_RECLEVEL_INC_ASM] + lock cmpxchg [ARGUMENT_REG1-SyncBlockIndexOffset_ASM],edx + jnz MonExitRetryHelperThinLock + + ; We're done + pop esi + ret + +MonExitNeedMoreTests: + ;Forward all special cases to the slow helper + ;BIT_SBLK_IS_HASHCODE_OR_SPIN_LOCK_ASM = BIT_SBLK_IS_HASHCODE + BIT_SBLK_SPIN_LOCK + test eax, BIT_SBLK_IS_HASHCODE_OR_SPIN_LOCK_ASM + jnz MonExitFramedLockHelper + + ; Get the sync block index and use it to compute the sync block pointer + mov ARGUMENT_REG2, dword ptr g_pSyncTable + and eax, MASK_SYNCBLOCKINDEX_ASM + mov ARGUMENT_REG2, [ARGUMENT_REG2+eax*SizeOfSyncTableEntry_ASM+SyncTableEntry_m_SyncBlock] + + ; was there a sync block? + test ARGUMENT_REG2, ARGUMENT_REG2 + jz MonExitFramedLockHelper + + ; Get a pointer to the lock object. + lea ARGUMENT_REG2, [ARGUMENT_REG2+SyncBlock_m_Monitor] + + ; Check if lock is held. + cmp [ARGUMENT_REG2+AwareLock_m_HoldingThread],esi + jne MonExitFramedLockHelper + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG1 ; preserve regs + push ARGUMENT_REG2 + + push ARGUMENT_REG2 ; AwareLock + push [esp+8] ; return address + call LeaveSyncHelper + + pop ARGUMENT_REG2 ; restore regs + pop ARGUMENT_REG1 +endif ;TRACK_SYNC +endif ;MON_DEBUG + ; Reduce our recursion count. + dec dword ptr [ARGUMENT_REG2+AwareLock_m_Recursion] + jz MonExitLastRecursion + + pop esi + ret + +MonExitRetryHelperThinLock: + jmp MonExitRetryThinLock + +MonExitFramedLockHelper: + pop esi + jmp JITutil_MonExitWorker + + ; This is the last count we held on this lock, so release the lock. +MonExitLastRecursion: + dec dword ptr [esi+Thread_m_dwLockCount] + mov dword ptr [ARGUMENT_REG2+AwareLock_m_HoldingThread],0 + +MonExitRetry: + mov eax, [ARGUMENT_REG2+AwareLock_m_MonitorHeld] + lea esi, [eax-1] + lock cmpxchg [ARGUMENT_REG2+AwareLock_m_MonitorHeld], esi + jne MonExitRetryHelper + pop esi + test eax,0FFFFFFFEh + jne MonExitMustSignal + + ret + +MonExitMustSignal: + mov ARGUMENT_REG1, ARGUMENT_REG2 + jmp JITutil_MonSignal + +MonExitRetryHelper: + jmp MonExitRetry + +@JIT_MonExitWorker@4 endp + +;********************************************************************** +; This is a frameless helper for entering a static monitor on a class. +; The methoddesc is in ARGUMENT_REG1. This tries the normal case (no +; blocking or object allocation) in line and calls a framed helper +; for the other cases. +; Note we are changing the methoddesc parameter to a pointer to the +; AwareLock. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonEnterStatic@4 proc public + ; We need another scratch register for what follows, so save EBX now so + ; we can use it for that purpose. + push ebx + + ; Attempt to acquire the lock +MonEnterStaticRetry: + mov eax, [ARGUMENT_REG1+AwareLock_m_MonitorHeld] + test eax,eax + jne MonEnterStaticHaveWaiters + + ; Common case, lock isn't held and there are no waiters. Attempt to + ; gain ownership ourselves. + mov ebx,1 + lock cmpxchg [ARGUMENT_REG1+AwareLock_m_MonitorHeld],ebx + jnz MonEnterStaticRetryHelper + + pop ebx + + ; Success. Save the thread object in the lock and increment the use count. + call _GetThread@0 + mov [ARGUMENT_REG1+AwareLock_m_HoldingThread], eax + inc dword ptr [ARGUMENT_REG1+AwareLock_m_Recursion] + inc dword ptr [eax+Thread_m_dwLockCount] + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG1 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + ret + + ; It's possible to get here with waiters but no lock held, but in this + ; case a signal is about to be fired which will wake up a waiter. So + ; for fairness sake we should wait too. + ; Check first for recursive lock attempts on the same thread. +MonEnterStaticHaveWaiters: + ; Get thread but preserve EAX (contains cached contents of m_MonitorHeld). + push eax + call _GetThread@0 + mov ebx,eax + pop eax + + ; Is mutex already owned by current thread? + cmp [ARGUMENT_REG1+AwareLock_m_HoldingThread],ebx + jne MonEnterStaticPrepareToWait + + ; Yes, bump our use count. + inc dword ptr [ARGUMENT_REG1+AwareLock_m_Recursion] +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG1 ; AwareLock + push [esp+4] ; return address + call EnterSyncHelper +endif ;TRACK_SYNC +endif ;MON_DEBUG + pop ebx + ret + +MonEnterStaticPrepareToWait: + pop ebx + + ; ARGUMENT_REG1 should have AwareLock. Call contention helper. + jmp JITutil_MonContention + +MonEnterStaticRetryHelper: + jmp MonEnterStaticRetry +@JIT_MonEnterStatic@4 endp + +;********************************************************************** +; A frameless helper for exiting a static monitor on a class. +; The methoddesc is in ARGUMENT_REG1. This tries the normal case (no +; blocking or object allocation) in line and calls a framed helper +; for the other cases. +; Note we are changing the methoddesc parameter to a pointer to the +; AwareLock. +; ***** NOTE: if you make any changes to this routine, build with MON_DEBUG undefined +; to make sure you don't break the non-debug build. This is very fragile code. +; Also, propagate the changes to jithelp.s which contains the same helper and assembly code +; (in AT&T syntax) for gnu assembler. +@JIT_MonExitStatic@4 proc public + +ifdef MON_DEBUG +ifdef TRACK_SYNC + push ARGUMENT_REG1 ; preserve regs + + push ARGUMENT_REG1 ; AwareLock + push [esp+8] ; return address + call LeaveSyncHelper + + pop [ARGUMENT_REG1] ; restore regs +endif ;TRACK_SYNC +endif ;MON_DEBUG + + ; Check if lock is held. + call _GetThread@0 + cmp [ARGUMENT_REG1+AwareLock_m_HoldingThread],eax + jne MonExitStaticLockError + + ; Reduce our recursion count. + dec dword ptr [ARGUMENT_REG1+AwareLock_m_Recursion] + jz MonExitStaticLastRecursion + + ret + + ; This is the last count we held on this lock, so release the lock. +MonExitStaticLastRecursion: + ; eax must have the thread object + dec dword ptr [eax+Thread_m_dwLockCount] + mov dword ptr [ARGUMENT_REG1+AwareLock_m_HoldingThread],0 + push ebx + +MonExitStaticRetry: + mov eax, [ARGUMENT_REG1+AwareLock_m_MonitorHeld] + lea ebx, [eax-1] + lock cmpxchg [ARGUMENT_REG1+AwareLock_m_MonitorHeld],ebx + jne MonExitStaticRetryHelper + pop ebx + test eax,0FFFFFFFEh + jne MonExitStaticMustSignal + + ret + +MonExitStaticMustSignal: + jmp JITutil_MonSignal + +MonExitStaticRetryHelper: + jmp MonExitStaticRetry + ; Throw a synchronization lock exception. +MonExitStaticLockError: + mov ARGUMENT_REG1, CORINFO_SynchronizationLockException_ASM + jmp JIT_InternalThrow + +@JIT_MonExitStatic@4 endp + +; PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code. +; + +_JIT_PatchedCodeStart@0 proc public +ret +_JIT_PatchedCodeStart@0 endp + +; +; Optimized TLS getters +; + + ALIGN 4 + +ifndef FEATURE_IMPLICIT_TLS +_GetThread@0 proc public + ; This will be overwritten at runtime with optimized GetThread implementation + jmp short _GetTLSDummy@0 + ; Just allocate space that will be filled in at runtime + db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) +_GetThread@0 endp + + ALIGN 4 + +_GetAppDomain@0 proc public + ; This will be overwritten at runtime with optimized GetAppDomain implementation + jmp short _GetTLSDummy@0 + ; Just allocate space that will be filled in at runtime + db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) +_GetAppDomain@0 endp + +_GetTLSDummy@0 proc public + xor eax,eax + ret +_GetTLSDummy@0 endp + + ALIGN 4 + +_ClrFlsGetBlock@0 proc public + ; This will be overwritten at runtime with optimized ClrFlsGetBlock implementation + jmp short _GetTLSDummy@0 + ; Just allocate space that will be filled in at runtime + db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) +_ClrFlsGetBlock@0 endp +endif + +;********************************************************************** +; Write barriers generated at runtime + +PUBLIC _JIT_PatchedWriteBarrierStart@0 +_JIT_PatchedWriteBarrierStart@0 PROC +ret +_JIT_PatchedWriteBarrierStart@0 ENDP + +PatchedWriteBarrierHelper MACRO rg + ALIGN 8 +PUBLIC _JIT_WriteBarrier&rg&@0 +_JIT_WriteBarrier&rg&@0 PROC + ; Just allocate space that will be filled in at runtime + db (48) DUP (0CCh) +_JIT_WriteBarrier&rg&@0 ENDP + +ENDM + +PatchedWriteBarrierHelper <EAX> +PatchedWriteBarrierHelper <EBX> +PatchedWriteBarrierHelper <ECX> +PatchedWriteBarrierHelper <ESI> +PatchedWriteBarrierHelper <EDI> +PatchedWriteBarrierHelper <EBP> + +PUBLIC _JIT_PatchedWriteBarrierLast@0 +_JIT_PatchedWriteBarrierLast@0 PROC +ret +_JIT_PatchedWriteBarrierLast@0 ENDP + +;********************************************************************** +; PrecodeRemotingThunk is patched at runtime to activate it +ifdef FEATURE_REMOTING + ALIGN 16 +_PrecodeRemotingThunk@0 proc public + + ret ; This is going to be patched to "test ecx,ecx" + nop + + jz RemotingDone ; predicted not taken + + cmp dword ptr [ecx],11111111h ; This is going to be patched to address of the transparent proxy + je RemotingCheck ; predicted not taken + +RemotingDone: + ret + +RemotingCheck: + push eax ; save method desc + mov eax, dword ptr [ecx + TransparentProxyObject___stubData] + call [ecx + TransparentProxyObject___stub] + test eax, eax + jnz RemotingCtxMismatch + mov eax, [esp] + mov ax, [eax + MethodDesc_m_wFlags] + and ax, MethodDesc_mdcClassification + cmp ax, MethodDesc_mcComInterop + je ComPlusCall + pop eax ; throw away method desc + jmp RemotingDone + +RemotingCtxMismatch: + pop eax ; restore method desc + add esp, 4 ; pop return address into the precode + jmp _TransparentProxyStub_CrossContext@0 + +ComPlusCall: + pop eax ; restore method desc + mov [esp],eax ; replace return address into the precode with method desc (argument for TP stub) + jmp _InContextTPQuickDispatchAsmStub@0 + +_PrecodeRemotingThunk@0 endp +endif ; FEATURE_REMOTING + +_JIT_PatchedCodeLast@0 proc public +ret +_JIT_PatchedCodeLast@0 endp + +; This is the first function outside the "keep together range". Used by BBT scripts. +_JIT_PatchedCodeEnd@0 proc public +ret +_JIT_PatchedCodeEnd@0 endp + +; This is the ASM portion of JIT_IsInstanceOfInterface. For all the bizarre cases, it quickly +; fails and falls back on the JITutil_IsInstanceOfAny helper. So all failure cases take +; the slow path, too. +; +; ARGUMENT_REG1 = array or interface to check for. +; ARGUMENT_REG2 = instance to be cast. + + ALIGN 16 +PUBLIC @JIT_IsInstanceOfInterface@8 +@JIT_IsInstanceOfInterface@8 PROC + test ARGUMENT_REG2, ARGUMENT_REG2 + jz IsNullInst + + mov eax, [ARGUMENT_REG2] ; get MethodTable + + push ebx + push esi + movzx ebx, word ptr [eax+MethodTable_m_wNumInterfaces] + + ; check if this MT implements any interfaces + test ebx, ebx + jz IsInstanceOfInterfaceDoBizarre + + ; move Interface map ptr into eax + mov eax, [eax+MethodTable_m_pInterfaceMap] + +IsInstanceOfInterfaceTop: + ; eax -> current InterfaceInfo_t entry in interface map list +ifdef FEATURE_PREJIT + mov esi, [eax] + test esi, 1 + ; Move the deference out of line so that this jump is correctly predicted for the case + ; when there is no indirection + jnz IsInstanceOfInterfaceIndir + cmp ARGUMENT_REG1, esi +else + cmp ARGUMENT_REG1, [eax] +endif + je IsInstanceOfInterfaceFound + +IsInstanceOfInterfaceNext: + add eax, SIZEOF_InterfaceInfo_t + dec ebx + jnz IsInstanceOfInterfaceTop + + ; fall through to DoBizarre + +IsInstanceOfInterfaceDoBizarre: + pop esi + pop ebx + mov eax, [ARGUMENT_REG2] ; get MethodTable + test dword ptr [eax+MethodTable_m_dwFlags], NonTrivialInterfaceCastFlags + jnz IsInstanceOfInterfaceNonTrivialCast + +IsNullInst: + xor eax,eax + ret + +ifdef FEATURE_PREJIT +IsInstanceOfInterfaceIndir: + cmp ARGUMENT_REG1,[esi-1] + jne IsInstanceOfInterfaceNext +endif + +IsInstanceOfInterfaceFound: + pop esi + pop ebx + mov eax, ARGUMENT_REG2 ; the successful instance + ret + +IsInstanceOfInterfaceNonTrivialCast: + jmp @JITutil_IsInstanceOfInterface@8 + +@JIT_IsInstanceOfInterface@8 endp + +; This is the ASM portion of JIT_ChkCastInterface. For all the bizarre cases, it quickly +; fails and falls back on the JITutil_ChkCastAny helper. So all failure cases take +; the slow path, too. +; +; ARGUMENT_REG1 = array or interface to check for. +; ARGUMENT_REG2 = instance to be cast. + + ALIGN 16 +PUBLIC @JIT_ChkCastInterface@8 +@JIT_ChkCastInterface@8 PROC + test ARGUMENT_REG2, ARGUMENT_REG2 + jz ChkCastInterfaceIsNullInst + + mov eax, [ARGUMENT_REG2] ; get MethodTable + + push ebx + push esi + movzx ebx, word ptr [eax+MethodTable_m_wNumInterfaces] + + ; speculatively move Interface map ptr into eax + mov eax, [eax+MethodTable_m_pInterfaceMap] + + ; check if this MT implements any interfaces + test ebx, ebx + jz ChkCastInterfaceDoBizarre + +ChkCastInterfaceTop: + ; eax -> current InterfaceInfo_t entry in interface map list +ifdef FEATURE_PREJIT + mov esi, [eax] + test esi, 1 + ; Move the deference out of line so that this jump is correctly predicted for the case + ; when there is no indirection + jnz ChkCastInterfaceIndir + cmp ARGUMENT_REG1, esi +else + cmp ARGUMENT_REG1, [eax] +endif + je ChkCastInterfaceFound + +ChkCastInterfaceNext: + add eax, SIZEOF_InterfaceInfo_t + dec ebx + jnz ChkCastInterfaceTop + + ; fall through to DoBizarre + +ChkCastInterfaceDoBizarre: + pop esi + pop ebx + jmp @JITutil_ChkCastInterface@8 + +ifdef FEATURE_PREJIT +ChkCastInterfaceIndir: + cmp ARGUMENT_REG1,[esi-1] + jne ChkCastInterfaceNext +endif + +ChkCastInterfaceFound: + pop esi + pop ebx + +ChkCastInterfaceIsNullInst: + mov eax, ARGUMENT_REG2 ; either null, or the successful instance + ret + +@JIT_ChkCastInterface@8 endp + + end |