diff options
Diffstat (limited to 'src/vm/arm/asmhelpers.S')
-rw-r--r-- | src/vm/arm/asmhelpers.S | 1474 |
1 files changed, 1474 insertions, 0 deletions
diff --git a/src/vm/arm/asmhelpers.S b/src/vm/arm/asmhelpers.S new file mode 100644 index 0000000000..65dc513cce --- /dev/null +++ b/src/vm/arm/asmhelpers.S @@ -0,0 +1,1474 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// ==++== +// + +// +// ==--== +#include "unixasmmacros.inc" +#include "asmconstants.h" + +.syntax unified +.thumb + +// LPVOID __stdcall GetCurrentIP(void)// + LEAF_ENTRY GetCurrentIP, _TEXT + mov r0, lr + bx lr + LEAF_END GetCurrentIP, _TEXT + +// LPVOID __stdcall GetCurrentSP(void)// + LEAF_ENTRY GetCurrentSP, _TEXT + mov r0, sp + bx lr + LEAF_END GetCurrentSP, _TEXT + +//----------------------------------------------------------------------------- +// This helper routine enregisters the appropriate arguments and makes the +// actual call. +//----------------------------------------------------------------------------- +//void CallDescrWorkerInternal(CallDescrData * pCallDescrData)// + NESTED_ENTRY CallDescrWorkerInternal,_TEXT,NoHandler + PROLOG_PUSH "{r4,r5,r7,lr}" + PROLOG_STACK_SAVE_OFFSET r7, #8 + + mov r5,r0 // save pCallDescrData in r5 + + ldr r1, [r5,#CallDescrData__numStackSlots] + cbz r1, LOCAL_LABEL(Ldonestack) + + // Add frame padding to ensure frame size is a multiple of 8 (a requirement of the OS ABI). + // We push four registers (above) and numStackSlots arguments (below). If this comes to an odd number + // of slots we must pad with another. This simplifies to "if the low bit of numStackSlots is set, + // extend the stack another four bytes". + lsls r2, r1, #2 + and r3, r2, #4 + sub sp, sp, r3 + + // This loop copies numStackSlots words + // from [pSrcEnd-4,pSrcEnd-8,...] to [sp-4,sp-8,...] + ldr r0, [r5,#CallDescrData__pSrc] + add r0,r0,r2 +LOCAL_LABEL(Lstackloop): + ldr r2, [r0,#-4]! + str r2, [sp,#-4]! + subs r1, r1, #1 + bne LOCAL_LABEL(Lstackloop) +LOCAL_LABEL(Ldonestack): + + // If FP arguments are supplied in registers (r3 != NULL) then initialize all of them from the pointer + // given in r3. Do not use "it" since it faults in floating point even when the instruction is not executed. + ldr r3, [r5,#CallDescrData__pFloatArgumentRegisters] + cbz r3, LOCAL_LABEL(LNoFloatingPoint) + vldm r3, {s0-s15} +LOCAL_LABEL(LNoFloatingPoint): + + // Copy [pArgumentRegisters, ..., pArgumentRegisters + 12] + // into r0, ..., r3 + + ldr r4, [r5,#CallDescrData__pArgumentRegisters] + ldm r4, {r0-r3} + + CHECK_STACK_ALIGNMENT + + // call pTarget + // Note that remoting expect target in r4. + ldr r4, [r5,#CallDescrData__pTarget] + blx r4 + + ldr r3, [r5,#CallDescrData__fpReturnSize] + + // Save FP return value if appropriate + cbz r3, LOCAL_LABEL(LFloatingPointReturnDone) + + // Float return case + // Do not use "it" since it faults in floating point even when the instruction is not executed. + cmp r3, #4 + bne LOCAL_LABEL(LNoFloatReturn) + vmov r0, s0 + b LOCAL_LABEL(LFloatingPointReturnDone) +LOCAL_LABEL(LNoFloatReturn): + + // Double return case + // Do not use "it" since it faults in floating point even when the instruction is not executed. + cmp r3, #8 + bne LOCAL_LABEL(LNoDoubleReturn) + vmov r0, r1, s0, s1 + b LOCAL_LABEL(LFloatingPointReturnDone) +LOCAL_LABEL(LNoDoubleReturn): + + add r2, r5, #CallDescrData__returnValue + + cmp r3, #16 + bne LOCAL_LABEL(LNoFloatHFAReturn) + vstm r2, {s0-s3} + b LOCAL_LABEL(LReturnDone) +LOCAL_LABEL(LNoFloatHFAReturn): + + cmp r3, #32 + bne LOCAL_LABEL(LNoDoubleHFAReturn) + vstm r2, {d0-d3} + b LOCAL_LABEL(LReturnDone) +LOCAL_LABEL(LNoDoubleHFAReturn): + + EMIT_BREAKPOINT // Unreachable + +LOCAL_LABEL(LFloatingPointReturnDone): + + // Save return value into retbuf + str r0, [r5, #(CallDescrData__returnValue + 0)] + str r1, [r5, #(CallDescrData__returnValue + 4)] + +LOCAL_LABEL(LReturnDone): + +#ifdef _DEBUG + // trash the floating point registers to ensure that the HFA return values + // won't survive by accident + vldm sp, {d0-d3} +#endif + + EPILOG_STACK_RESTORE_OFFSET r7, #8 + EPILOG_POP "{r4,r5,r7,pc}" + + NESTED_END CallDescrWorkerInternal,_TEXT + + +//----------------------------------------------------------------------------- +// This helper routine is where returns for irregular tail calls end up +// so they can dynamically pop their stack arguments. +//----------------------------------------------------------------------------- +// +// Stack Layout (stack grows up, 0 at the top, offsets relative to frame pointer, r7): +// +// sp -> callee stack arguments +// : +// : +// -0Ch gsCookie +// TailCallHelperFrame -> +// -08h __VFN_table +// -04h m_Next +// r7 -> +// +00h m_calleeSavedRgisters.r4 +// +04h .r5 +// +08h .r6 +// +0Ch .r7 +// +10h .r8 +// +14h .r9 +// +18h .r10 +// r11-> +// +1Ch .r11 +// +20h .r14 -or- m_ReturnAddress +// +// r6 -> GetThread() +// r5 -> r6->m_pFrame (old Frame chain head) +// r11 is used to preserve the ETW call stack + + NESTED_ENTRY TailCallHelperStub, _TEXT, NoHandler + // + // This prolog is never executed, but we keep it here for reference + // and for the unwind data it generates + // + + // Spill callee saved registers and return address. + PROLOG_PUSH "{r4-r11,lr}" + + PROLOG_STACK_SAVE_OFFSET r7, #12 + + // + // This is the code that would have to run to setup this frame + // like the C++ helper does before calling RtlRestoreContext + // + // Allocate space for the rest of the frame and GSCookie. + // PROLOG_STACK_ALLOC 0x0C + // + // Set r11 for frame chain + //add r11, r7, 0x1C + // + // Set the vtable for TailCallFrame + //bl TCF_GETMETHODFRAMEVPTR + //str r0, [r7, #-8] + // + // Initialize the GSCookie within the Frame + //ldr r0, =s_gsCookie + //str r0, [r7, #-0x0C] + // + // Link the TailCallFrameinto the Frame chain + // and initialize r5 & r6 for unlinking later + //CALL_GETTHREAD + //mov r6, r0 + //ldr r5, [r6, #Thread__m_pFrame] + //str r5, [r7, #-4] + //sub r0, r7, 8 + //str r0, [r6, #Thread__m_pFrame] + // + // None of the previous stuff is ever executed, + // but we keep it here for reference + // + + // + // Here's the pretend call (make it real so the unwinder + // doesn't think we're in the prolog) + // + bl C_FUNC(TailCallHelperStub) + // + // with the real return address pointing to this real epilog + // +C_FUNC(JIT_TailCallHelperStub_ReturnAddress): +.global C_FUNC(JIT_TailCallHelperStub_ReturnAddress) + + // + // Our epilog (which also unlinks the StubHelperFrame) + // Be careful not to trash the return registers + // + +#ifdef _DEBUG + ldr r3, =s_gsCookie + ldr r3, [r3] + ldr r2, [r7, #-0x0C] + cmp r2, r3 + beq LOCAL_LABEL(GoodGSCookie) + bl C_FUNC(DoJITFailFast) +LOCAL_LABEL(GoodGSCookie): +#endif // _DEBUG + + // + // unlink the TailCallFrame + // + str r5, [r6, #Thread__m_pFrame] + + // + // epilog + // + EPILOG_STACK_RESTORE_OFFSET r7, #12 + EPILOG_POP "{r4-r11,lr}" + bx lr + + NESTED_END TailCallHelperStub, _TEXT + +// ------------------------------------------------------------------ + +// void LazyMachStateCaptureState(struct LazyMachState *pState)// + LEAF_ENTRY LazyMachStateCaptureState, _TEXT + + // marks that this is not yet valid + mov r1, #0 + str r1, [r0, #MachState__isValid] + + str lr, [r0, #LazyMachState_captureIp] + str sp, [r0, #LazyMachState_captureSp] + + add r1, r0, #LazyMachState_captureR4_R11 + stm r1, {r4-r11} + + mov pc, lr + + LEAF_END LazyMachStateCaptureState, _TEXT + +// void SinglecastDelegateInvokeStub(Delegate *pThis) + LEAF_ENTRY SinglecastDelegateInvokeStub, _TEXT + cmp r0, #0 + beq LOCAL_LABEL(LNullThis) + + ldr r12, [r0, #DelegateObject___methodPtr] + ldr r0, [r0, #DelegateObject___target] + + bx r12 + +LOCAL_LABEL(LNullThis): + mov r0, #CORINFO_NullReferenceException_ASM + b C_FUNC(JIT_InternalThrow) + + LEAF_END SinglecastDelegateInvokeStub, _TEXT + +// +// r12 = UMEntryThunk* +// + NESTED_ENTRY TheUMEntryPrestub,_TEXT,NoHandler + + PROLOG_PUSH "{r0-r4,r7,r8,lr}" // add r8 to make stack aligned by 8B + PROLOG_STACK_SAVE_OFFSET r7, #20 + vpush {d0-d7} + + CHECK_STACK_ALIGNMENT + + mov r0, r12 + bl C_FUNC(TheUMEntryPrestubWorker) + + // Record real target address in r12. + mov r12, r0 + + // Epilog + vpop {d0-d7} + pop {r0-r4,r7,r8,lr} + bx r12 + + NESTED_END TheUMEntryPrestub,_TEXT + +// +// r12 = UMEntryThunk* +// + NESTED_ENTRY UMThunkStub,_TEXT,UnhandledExceptionHandlerUnix + PROLOG_PUSH "{r4,r5,r7,r11,lr}" + PROLOG_STACK_SAVE_OFFSET r7, #8 + + alloc_stack 4 * 5 + stm sp, {r0-r3,r12} + + //GBLA UMThunkStub_HiddenArgOffest // offset of saved UMEntryThunk * + //GBLA UMThunkStub_StackArgsOffest // offset of original stack args + //GBLA UMThunkStub_StackArgsSize // total size of UMThunkStub frame +UMThunkStub_HiddenArgOffset = (-3)*4 +UMThunkStub_StackArgsOffset = 3*4 +UMThunkStub_StackArgsSize = 10*4 + + CHECK_STACK_ALIGNMENT + + bl C_FUNC(GetThread) + cbz r0, LOCAL_LABEL(UMThunkStub_DoThreadSetup) + +LOCAL_LABEL(UMThunkStub_HaveThread): + mov r5, r0 // r5 = Thread * + + ldr r2, =g_TrapReturningThreads + + mov r4, 1 + str r4, [r5, #Thread__m_fPreemptiveGCDisabled] + + ldr r3, [r2] + cbnz r3, LOCAL_LABEL(UMThunkStub_DoTrapReturningThreads) + +LOCAL_LABEL(UMThunkStub_InCooperativeMode): + ldr r12, [r7, #UMThunkStub_HiddenArgOffset] + + ldr r0, [r5, #Thread__m_pDomain] + ldr r1, [r12, #UMEntryThunk__m_dwDomainId] + ldr r0, [r0, #AppDomain__m_dwId] + ldr r3, [r12, #UMEntryThunk__m_pUMThunkMarshInfo] + cmp r0, r1 + bne LOCAL_LABEL(UMThunkStub_WrongAppDomain) + + ldr r2, [r3, #UMThunkMarshInfo__m_cbActualArgSize] + cbz r2, LOCAL_LABEL(UMThunkStub_ArgumentsSetup) + + add r0, r7, #UMThunkStub_StackArgsOffset // Source pointer + add r0, r0, r2 + lsr r1, r2, #2 // Count of stack slots to copy + + and r2, r2, #4 // Align the stack + sub sp, sp, r2 + +LOCAL_LABEL(UMThunkStub_StackLoop): + ldr r2, [r0,#-4]! + str r2, [sp,#-4]! + subs r1, r1, #1 + bne LOCAL_LABEL(UMThunkStub_StackLoop) + +LOCAL_LABEL(UMThunkStub_ArgumentsSetup): + ldr r4, [r3, #UMThunkMarshInfo__m_pILStub] + + // reload argument registers + sub r0, r7, #28 + ldm r0, {r0-r3} + + CHECK_STACK_ALIGNMENT + + blx r4 + +LOCAL_LABEL(UMThunkStub_PostCall): + mov r4, 0 + str r4, [r5, #Thread__m_fPreemptiveGCDisabled] + + EPILOG_STACK_RESTORE_OFFSET r7, #8 + EPILOG_POP "{r4,r5,r7,r11,pc}" + +LOCAL_LABEL(UMThunkStub_DoThreadSetup): + sub sp, #SIZEOF__FloatArgumentRegisters + vstm sp, {d0-d7} + bl C_FUNC(CreateThreadBlockThrow) + vldm sp, {d0-d7} + add sp, #SIZEOF__FloatArgumentRegisters + b LOCAL_LABEL(UMThunkStub_HaveThread) + +LOCAL_LABEL(UMThunkStub_DoTrapReturningThreads): + sub sp, #SIZEOF__FloatArgumentRegisters + vstm sp, {d0-d7} + mov r0, r5 // Thread* pThread + ldr r1, [r7, #UMThunkStub_HiddenArgOffset] // UMEntryThunk* pUMEntry + bl C_FUNC(UMThunkStubRareDisableWorker) + vldm sp, {d0-d7} + add sp, #SIZEOF__FloatArgumentRegisters + b LOCAL_LABEL(UMThunkStub_InCooperativeMode) + +LOCAL_LABEL(UMThunkStub_WrongAppDomain): + sub sp, #SIZEOF__FloatArgumentRegisters + vstm sp, {d0-d7} + + ldr r0, [r7, #UMThunkStub_HiddenArgOffset] // UMEntryThunk* pUMEntry + mov r2, r7 // void * pArgs + // remaining arguments are unused + bl C_FUNC(UM2MDoADCallBack) + + // Restore non-FP return value. + ldr r0, [r7, #0] + ldr r1, [r7, #4] + + // Restore FP return value or HFA. + vldm sp, {d0-d3} + b LOCAL_LABEL(UMThunkStub_PostCall) + + NESTED_END UMThunkStub,_TEXT + +// UM2MThunk_WrapperHelper(void *pThunkArgs, // r0 +// int cbStackArgs, // r1 (unused) +// void *pAddr, // r2 (unused) +// UMEntryThunk *pEntryThunk, // r3 +// Thread *pThread) // [sp, #0] + + NESTED_ENTRY UM2MThunk_WrapperHelper, _TEXT, NoHandler + + PROLOG_PUSH "{r4-r7,r11,lr}" + PROLOG_STACK_SAVE_OFFSET r7, #12 + + CHECK_STACK_ALIGNMENT + + mov r12, r3 // r12 = UMEntryThunk * + + // + // Note that layout of the arguments is given by UMThunkStub frame + // + mov r5, r0 // r5 = pArgs + + ldr r3, [r12, #UMEntryThunk__m_pUMThunkMarshInfo] + + ldr r2, [r3, #UMThunkMarshInfo__m_cbActualArgSize] + cbz r2, LOCAL_LABEL(UM2MThunk_WrapperHelper_ArgumentsSetup) + + add r0, r5, #UMThunkStub_StackArgsSize // Source pointer + add r0, r0, r2 + lsr r1, r2, #2 // Count of stack slots to copy + + and r2, r2, #4 // Align the stack + sub sp, sp, r2 + +LOCAL_LABEL(UM2MThunk_WrapperHelper_StackLoop): + ldr r2, [r0,#-4]! + str r2, [sp,#-4]! + subs r1, r1, #1 + bne LOCAL_LABEL(UM2MThunk_WrapperHelper_StackLoop) + +LOCAL_LABEL(UM2MThunk_WrapperHelper_ArgumentsSetup): + ldr r4, [r3, #UMThunkMarshInfo__m_pILStub] + + // reload floating point registers + sub r6, r5, #SIZEOF__FloatArgumentRegisters + vldm r6, {d0-d7} + + // reload argument registers + ldm r5, {r0-r3} + + CHECK_STACK_ALIGNMENT + + blx r4 + + // Save non-floating point return + str r0, [r5, #0] + str r1, [r5, #4] + + // Save FP return value or HFA. + vstm r6, {d0-d3} + +#ifdef _DEBUG + // trash the floating point registers to ensure that the HFA return values + // won't survive by accident + vldm sp, {d0-d3} +#endif + + EPILOG_STACK_RESTORE_OFFSET r7, #12 + EPILOG_POP "{r4-r7,r11,pc}" + + NESTED_END UM2MThunk_WrapperHelper, _TEXT + +// ------------------------------------------------------------------ + + NESTED_ENTRY ThePreStub, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + add r0, sp, #__PWTB_TransitionBlock // pTransitionBlock + mov r1, r12 // pMethodDesc + + bl C_FUNC(PreStubWorker) + + mov r12, r0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + bx r12 + + NESTED_END ThePreStub, _TEXT + +// ------------------------------------------------------------------ +// This method does nothing. It's just a fixed function for the debugger to put a breakpoint on. + LEAF_ENTRY ThePreStubPatch, _TEXT + nop +ThePreStubPatchLabel: + .global ThePreStubPatchLabel + bx lr + LEAF_END ThePreStubPatch, _TEXT + +// ------------------------------------------------------------------ +// The call in ndirect import precode points to this function. + NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler + + PROLOG_PUSH "{r0-r4,r7,r8,lr}" // Spill general argument registers, return address and + PROLOG_STACK_SAVE_OFFSET r7, #20 + // arbitrary register to keep stack aligned + vpush {d0-d7} // Spill floating point argument registers + + CHECK_STACK_ALIGNMENT + + mov r0, r12 + bl C_FUNC(NDirectImportWorker) + mov r12, r0 + + vpop {d0-d7} + pop {r0-r4,r7,r8,lr} + + // If we got back from NDirectImportWorker, the MD has been successfully + // linked. Proceed to execute the original DLL call. + bx r12 + + NESTED_END NDirectImportThunk, _TEXT + +// ------------------------------------------------------------------ +// The call in fixup precode initally points to this function. +// The pupose of this function is to load the MethodDesc and forward the call the prestub. + NESTED_ENTRY PrecodeFixupThunk, _TEXT, NoHandler + + // r12 = FixupPrecode * + + PROLOG_PUSH "{r0-r1}" + + // Inline computation done by FixupPrecode::GetMethodDesc() + ldrb r0, [r12, #3] // m_PrecodeChunkIndex + ldrb r1, [r12, #2] // m_MethodDescChunkIndex + + add r12,r12,r0,lsl #3 + add r0,r12,r0,lsl #2 + ldr r0, [r0,#8] + add r12,r0,r1,lsl #2 + + EPILOG_POP "{r0-r1}" + b C_FUNC(ThePreStub) + + NESTED_END PrecodeFixupThunk, _TEXT + +// ------------------------------------------------------------------ +// void ResolveWorkerAsmStub(r0, r1, r2, r3, r4:IndirectionCellAndFlags, r12:DispatchToken) +// +// The stub dispatch thunk which transfers control to VSD_ResolveWorker. + NESTED_ENTRY ResolveWorkerAsmStub, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + add r0, sp, #__PWTB_TransitionBlock // pTransitionBlock + mov r2, r12 // token + + // indirection cell in r4 - should be consistent with REG_ARM_STUB_SPECIAL + bic r1, r4, #3 // indirection cell + and r3, r4, #3 // flags + + bl C_FUNC(VSD_ResolveWorker) + + mov r12, r0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + bx r12 + + NESTED_END ResolveWorkerAsmStub, _TEXT + +// ------------------------------------------------------------------ +// void ResolveWorkerChainLookupAsmStub(r0, r1, r2, r3, r4:IndirectionCellAndFlags, r12:DispatchToken) + NESTED_ENTRY ResolveWorkerChainLookupAsmStub, _TEXT, NoHandler + + // ARMSTUB TODO: implement chained lookup + b C_FUNC(ResolveWorkerAsmStub) + + NESTED_END ResolveWorkerChainLookupAsmStub, _TEXT + + // + // If a preserved register were pushed onto the stack between + // the managed caller and the H_M_F, _R4_R11 will point to its + // location on the stack and it would have been updated on the + // stack by the GC already and it will be popped back into the + // appropriate register when the appropriate epilog is run. + // + // Otherwise, the register is preserved across all the code + // in this HCALL or FCALL, so we need to update those registers + // here because the GC will have updated our copies in the + // frame. + // + // So, if _R4_R11 points into the MachState, we need to update + // the register here. That's what this macro does. + // + + .macro RestoreRegMS regIndex, reg + + // Incoming: + // + // R0 = address of MachState + // + // $regIndex: Index of the register (R4-R11). For R4, index is 4. + // For R5, index is 5, and so on. + // + // $reg: Register name (e.g. R4, R5, etc) + // + // Get the address of the specified captured register from machine state + add r2, r0, #(MachState__captureR4_R11 + ((\regIndex-4)*4)) + + // Get the address of the specified preserved register from machine state + ldr r3, [r0, #(MachState___R4_R11 + ((\regIndex-4)*4))] + + cmp r2, r3 + bne 0f + ldr \reg, [r2] +0: + + .endm + +// +// EXTERN_C void ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID); +// +NESTED_ENTRY ProfileEnterNaked, _TEXT, NoHandler + PROLOG_PUSH "{r4, r5, r7, r11, lr}" + PROLOG_STACK_SAVE_OFFSET r7, #8 + + // fields of PLATFORM_SPECIFIC_DATA, in reverse order + + // UINT32 r0; // Keep r0 & r1 contiguous to make returning 64-bit results easier + // UINT32 r1; + // void *R11; + // void *Pc; + // union // Float arg registers as 32-bit (s0-s15) and 64-bit (d0-d7) + // { + // UINT32 s[16]; + // UINT64 d[8]; + // }; + // FunctionID functionId; + // void *probeSp; // stack pointer of managed function + // void *profiledSp; // location of arguments on stack + // LPVOID hiddenArg; + // UINT32 flags; + movw r4, #1 + push { /* flags */ r4 } + movw r4, #0 + push { /* hiddenArg */ r4 } + add r5, r11, #8 + push { /* profiledSp */ r5 } + add r5, sp, #32 + push { /* probeSp */ r5 } + push { /* functionId */ r0 } + vpush.64 { d0 - d7 } + push { lr } + push { r11 } + push { /* return value, r4 is NULL */ r4 } + push { /* return value, r4 is NULL */ r4 } + mov r1, sp + bl C_FUNC(ProfileEnter) + EPILOG_STACK_RESTORE_OFFSET r7, #8 + EPILOG_POP "{r4, r5, r7, r11, pc}" +NESTED_END ProfileEnterNaked, _TEXT + +// +// EXTERN_C void ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID); +// +NESTED_ENTRY ProfileLeaveNaked, _TEXT, NoHandler + PROLOG_PUSH "{r1, r2, r4, r5, r7, r11, lr}" + PROLOG_STACK_SAVE_OFFSET r7, #16 + + // fields of PLATFORM_SPECIFIC_DATA, in reverse order + + // UINT32 r0; // Keep r0 & r1 contiguous to make returning 64-bit results easier + // UINT32 r1; + // void *R11; + // void *Pc; + // union // Float arg registers as 32-bit (s0-s15) and 64-bit (d0-d7) + // { + // UINT32 s[16]; + // UINT64 d[8]; + // }; + // FunctionID functionId; + // void *probeSp; // stack pointer of managed function + // void *profiledSp; // location of arguments on stack + // LPVOID hiddenArg; + // UINT32 flags; + movw r4, #2 + push { /* flags */ r4 } + movw r4, #0 + push { /* hiddenArg */ r4 } + add r5, r11, #8 + push { /* profiledSp */ r5 } + add r5, sp, #40 + push { /* probeSp */ r5 } + push { /* functionId */ r0 } + vpush.64 { d0 - d7 } + push { lr } + push { r11 } + push { r1 } + push { r2 } + mov r1, sp + bl C_FUNC(ProfileLeave) + EPILOG_STACK_RESTORE_OFFSET r7, #16 + EPILOG_POP "{r1, r2, r4, r5, r7, r11, pc}" +NESTED_END ProfileLeaveNaked, _TEXT + +// EXTERN_C int __fastcall HelperMethodFrameRestoreState( +// INDEBUG_COMMA(HelperMethodFrame *pFrame) +// MachState *pState +// ) + LEAF_ENTRY HelperMethodFrameRestoreState, _TEXT + +#ifdef _DEBUG + mov r0, r1 +#endif + + // If machine state is invalid, then simply exit + ldr r1, [r0, #MachState__isValid] + cmp r1, #0 + beq LOCAL_LABEL(Done) + + RestoreRegMS 4, R4 + RestoreRegMS 5, R5 + RestoreRegMS 6, R6 + RestoreRegMS 7, R7 + RestoreRegMS 8, R8 + RestoreRegMS 9, R9 + RestoreRegMS 10, R10 + RestoreRegMS 11, R11 +LOCAL_LABEL(Done): + // Its imperative that the return value of HelperMethodFrameRestoreState is zero + // as it is used in the state machine to loop until it becomes zero. + // Refer to HELPER_METHOD_FRAME_END macro for details. + mov r0,#0 + bx lr + + LEAF_END HelperMethodFrameRestoreState, _TEXT + +#if 0 +// ------------------------------------------------------------------ +// Macro to generate Redirection Stubs +// +// $reason : reason for redirection +// Eg. GCThreadControl +// NOTE: If you edit this macro, make sure you update GetCONTEXTFromRedirectedStubStackFrame. +// This function is used by both the personality routine and the debugger to retrieve the original CONTEXT. + .macro GenerateRedirectedHandledJITCaseStub reason + + NESTED_ENTRY RedirectedHandledJITCaseFor\reason\()_Stub, _TEXT, NoHandler + + PROLOG_PUSH "{r7,lr}" // return address + PROLOG_STACK_SAVE r7 + alloc_stack 4 // stack slot to save the CONTEXT * + + //REDIRECTSTUB_SP_OFFSET_CONTEXT is defined in asmconstants.h + //If CONTEXT is not saved at 0 offset from SP it must be changed as well. + //ASSERT REDIRECTSTUB_SP_OFFSET_CONTEXT == 0 + + // Runtime check for 8-byte alignment. This check is necessary as this function can be + // entered before complete execution of the prolog of another function. + and r0, r7, #4 + sub sp, sp, r0 + + // stack must be 8 byte aligned + CHECK_STACK_ALIGNMENT + + // + // Save a copy of the redirect CONTEXT*. + // This is needed for the debugger to unwind the stack. + // + bl GetCurrentSavedRedirectContext + str r0, [r7] + + // + // Fetch the interrupted pc and save it as our return address. + // + ldr r1, [r0, #CONTEXT_Pc] + str r1, [r7, #8] + + // + // Call target, which will do whatever we needed to do in the context + // of the target thread, and will RtlRestoreContext when it is done. + // + bl _RedirectedHandledJITCaseFor\reason\()_Stub@Thread@@CAXXZ + + EMIT_BREAKPOINT // Unreachable + +// Put a label here to tell the debugger where the end of this function is. +RedirectedHandledJITCaseFor\reason\()_StubEnd: + .global RedirectedHandledJITCaseFor\reason\()_StubEnd + + NESTED_END RedirectedHandledJITCaseFor\reason\()_Stub, _TEXT + + .endm + +// ------------------------------------------------------------------ +// Redirection Stub for GC in fully interruptible method + GenerateRedirectedHandledJITCaseStub GCThreadControl +// ------------------------------------------------------------------ + GenerateRedirectedHandledJITCaseStub DbgThreadControl +// ------------------------------------------------------------------ + GenerateRedirectedHandledJITCaseStub UserSuspend +// ------------------------------------------------------------------ + GenerateRedirectedHandledJITCaseStub YieldTask + +#ifdef _DEBUG +// ------------------------------------------------------------------ +// Redirection Stub for GC Stress + GenerateRedirectedHandledJITCaseStub GCStress +#endif + +#endif + +// ------------------------------------------------------------------ +// Functions to probe for stack space +// Input reg r4 = amount of stack to probe for +// value of reg r4 is preserved on exit from function +// r12 is trashed +// The below two functions were copied from vctools\crt\crtw32\startup\arm\chkstk.asm + + NESTED_ENTRY checkStack, _TEXT, NoHandler + subs r12,sp,r4 + mrc p15,#0,r4,c13,c0,#2 // get TEB * + ldr r4,[r4,#8] // get Stack limit + bcc LOCAL_LABEL(checkStack_neg) // if r12 is less then 0 set it to 0 +LOCAL_LABEL(checkStack_label1): + cmp r12, r4 + bcc C_FUNC(stackProbe) // must probe to extend guardpage if r12 is beyond stackLimit + sub r4, sp, r12 // restore value of r4 + bx lr +LOCAL_LABEL(checkStack_neg): + mov r12, #0 + b LOCAL_LABEL(checkStack_label1) + NESTED_END checkStack, _TEXT + + NESTED_ENTRY stackProbe, _TEXT, NoHandler + PROLOG_PUSH "{r5,r6}" + mov r6, r12 + bfc r6, #0, #0xc // align down (4K) +LOCAL_LABEL(stackProbe_loop): + sub r4,r4,#0x1000 // dec stack Limit by 4K as page size is 4K + ldr r5,[r4] // try to read ... this should move the guard page + cmp r4,r6 + bne LOCAL_LABEL(stackProbe_loop) + EPILOG_POP "{r5,r6}" + sub r4,sp,r12 + bx lr + NESTED_END stackProbe, _TEXT + +//------------------------------------------------ +// VirtualMethodFixupStub +// +// In NGEN images, virtual slots inherited from cross-module dependencies +// point to a jump thunk that calls into the following function that will +// call into a VM helper. The VM helper is responsible for patching up +// thunk, upon executing the precode, so that all subsequent calls go directly +// to the actual method body. +// +// This is done lazily for performance reasons. +// +// On entry: +// +// R0 = "this" pointer +// R12 = Address of thunk + 4 + + NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler + + // Save arguments and return address + PROLOG_PUSH "{r0-r3, r7,r8, lr}" // keep increase by 8B for alignment + PROLOG_STACK_SAVE_OFFSET r7, #20 + + // Align stack + alloc_stack SIZEOF__FloatArgumentRegisters + 4 + vstm sp, {d0-d7} + + + CHECK_STACK_ALIGNMENT + + // R12 contains an address that is 4 bytes ahead of + // where the thunk starts. Refer to ZapImportVirtualThunk::Save + // for details on this. + // + // Move the correct thunk start address in R1 + sub r1, r12, #4 + + // Call the helper in the VM to perform the actual fixup + // and tell us where to tail call. R0 already contains + // the this pointer. + bl C_FUNC(VirtualMethodFixupWorker) + + // On return, R0 contains the target to tailcall to + mov r12, r0 + + // pop the stack and restore original register state + vldm sp, {d0-d7} + free_stack SIZEOF__FloatArgumentRegisters + 4 + pop {r0-r3, r7,r8, lr} + + PATCH_LABEL VirtualMethodFixupPatchLabel + + // and tailcall to the actual method + bx r12 + + NESTED_END VirtualMethodFixupStub, _TEXT + +//------------------------------------------------ +// ExternalMethodFixupStub +// +// In NGEN images, calls to cross-module external methods initially +// point to a jump thunk that calls into the following function that will +// call into a VM helper. The VM helper is responsible for patching up the +// thunk, upon executing the precode, so that all subsequent calls go directly +// to the actual method body. +// +// This is done lazily for performance reasons. +// +// On entry: +// +// R12 = Address of thunk + 4 + + NESTED_ENTRY ExternalMethodFixupStub, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + add r0, sp, #__PWTB_TransitionBlock // pTransitionBlock + + // Adjust (read comment above for details) and pass the address of the thunk + sub r1, r12, #4 // pThunk + + mov r2, #0 // sectionIndex + mov r3, #0 // pModule + bl C_FUNC(ExternalMethodFixupWorker) + + // mov the address we patched to in R12 so that we can tail call to it + mov r12, r0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + PATCH_LABEL ExternalMethodFixupPatchLabel + bx r12 + + NESTED_END ExternalMethodFixupStub, _TEXT + +//------------------------------------------------ +// StubDispatchFixupStub +// +// In NGEN images, calls to interface methods initially +// point to a jump thunk that calls into the following function that will +// call into a VM helper. The VM helper is responsible for patching up the +// thunk with actual stub dispatch stub. +// +// On entry: +// +// R4 = Address of indirection cell + + NESTED_ENTRY StubDispatchFixupStub, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + // address of StubDispatchFrame + add r0, sp, #__PWTB_TransitionBlock // pTransitionBlock + mov r1, r4 // siteAddrForRegisterIndirect + mov r2, #0 // sectionIndex + mov r3, #0 // pModule + + bl C_FUNC(StubDispatchFixupWorker) + + // mov the address we patched to in R12 so that we can tail call to it + mov r12, r0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + PATCH_LABEL StubDispatchFixupPatchLabel + bx r12 + + NESTED_END StubDispatchFixupStub, _TEXT + +//------------------------------------------------ +// JIT_RareDisableHelper +// +// The JIT expects this helper to preserve registers used for return values +// + NESTED_ENTRY JIT_RareDisableHelper, _TEXT, NoHandler + + PROLOG_PUSH "{r0-r1, r7,r8, r11, lr}" // save integer return value + PROLOG_STACK_SAVE_OFFSET r7, #8 + vpush {d0-d3} // floating point return value + + CHECK_STACK_ALIGNMENT + + bl C_FUNC(JIT_RareDisableHelperWorker) + + vpop {d0-d3} + EPILOG_POP "{r0-r1, r7,r8, r11, pc}" + + NESTED_END JIT_RareDisableHelper, _TEXT + + +#ifdef FEATURE_CORECLR +// +// JIT Static access helpers for single appdomain case +// + +// ------------------------------------------------------------------ +// void* JIT_GetSharedNonGCStaticBase(SIZE_T moduleDomainID, DWORD dwClassDomainID) + + LEAF_ENTRY JIT_GetSharedNonGCStaticBase_SingleAppDomain, _TEXT + + // If class is not initialized, bail to C++ helper + add r2, r0, #DomainLocalModule__m_pDataBlob + ldrb r2, [r2, r1] + tst r2, #1 + beq LOCAL_LABEL(CallCppHelper1) + + bx lr + +LOCAL_LABEL(CallCppHelper1): + // Tail call JIT_GetSharedNonGCStaticBase_Helper + b C_FUNC(JIT_GetSharedNonGCStaticBase_Helper) + LEAF_END JIT_GetSharedNonGCStaticBase_SingleAppDomain, _TEXT + + +// ------------------------------------------------------------------ +// void* JIT_GetSharedNonGCStaticBaseNoCtor(SIZE_T moduleDomainID, DWORD dwClassDomainID) + + LEAF_ENTRY JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain, _TEXT + + bx lr + LEAF_END JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain, _TEXT + + +// ------------------------------------------------------------------ +// void* JIT_GetSharedGCStaticBase(SIZE_T moduleDomainID, DWORD dwClassDomainID) + + LEAF_ENTRY JIT_GetSharedGCStaticBase_SingleAppDomain, _TEXT + + // If class is not initialized, bail to C++ helper + add r2, r0, #DomainLocalModule__m_pDataBlob + ldrb r2, [r2, r1] + tst r2, #1 + beq LOCAL_LABEL(CallCppHelper3) + + ldr r0, [r0, #DomainLocalModule__m_pGCStatics] + bx lr + +LOCAL_LABEL(CallCppHelper3): + // Tail call Jit_GetSharedGCStaticBase_Helper + b C_FUNC(JIT_GetSharedGCStaticBase_Helper) + LEAF_END JIT_GetSharedGCStaticBase_SingleAppDomain, _TEXT + + +// ------------------------------------------------------------------ +// void* JIT_GetSharedGCStaticBaseNoCtor(SIZE_T moduleDomainID, DWORD dwClassDomainID) + + LEAF_ENTRY JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain, _TEXT + + ldr r0, [r0, #DomainLocalModule__m_pGCStatics] + bx lr + LEAF_END JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain, _TEXT + +#endif + +// ------------------------------------------------------------------ +// __declspec(naked) void F_CALL_CONV JIT_Stelem_Ref(PtrArray* array, unsigned idx, Object* val) + LEAF_ENTRY JIT_Stelem_Ref, _TEXT + + // We retain arguments as they were passed and use r0 == array// r1 == idx// r2 == val + + // check for null array + cbz r0, LOCAL_LABEL(ThrowNullReferenceException) + + // idx bounds check + ldr r3,[r0,#ArrayBase__m_NumComponents] + cmp r3,r1 + bls LOCAL_LABEL(ThrowIndexOutOfRangeException) + + // fast path to null assignment (doesn't need any write-barriers) + cbz r2, LOCAL_LABEL(AssigningNull) + + // Verify the array-type and val-type matches before writing + ldr r12, [r0] // r12 = array MT + ldr r3, [r2] // r3 = val->GetMethodTable() + ldr r12, [r12, #MethodTable__m_ElementType] // array->GetArrayElementTypeHandle() + cmp r3, r12 + beq C_FUNC(JIT_Stelem_DoWrite) + + // Types didnt match but allow writing into an array of objects + ldr r3, =g_pObjectClass + ldr r3, [r3] // r3 = *g_pObjectClass + cmp r3, r12 // array type matches with Object* + beq C_FUNC(JIT_Stelem_DoWrite) + + // array type and val type do not exactly match. Raise frame and do detailed match + b C_FUNC(JIT_Stelem_Ref_NotExactMatch) + +LOCAL_LABEL(AssigningNull): + // Assigning null doesn't need write barrier + adds r0, r1, LSL #2 // r0 = r0 + (r1 x 4) = array->m_array[idx] + str r2, [r0, #PtrArray__m_Array] // array->m_array[idx] = val + bx lr + +LOCAL_LABEL(ThrowNullReferenceException): + // Tail call JIT_InternalThrow(NullReferenceException) + ldr r0, =CORINFO_NullReferenceException_ASM + b C_FUNC(JIT_InternalThrow) + +LOCAL_LABEL(ThrowIndexOutOfRangeException): + // Tail call JIT_InternalThrow(NullReferenceException) + ldr r0, =CORINFO_IndexOutOfRangeException_ASM + b C_FUNC(JIT_InternalThrow) + + LEAF_END JIT_Stelem_Ref, _TEXT + +// ------------------------------------------------------------------ +// __declspec(naked) void F_CALL_CONV JIT_Stelem_Ref_NotExactMatch(PtrArray* array, +// unsigned idx, Object* val) +// r12 = array->GetArrayElementTypeHandle() +// + NESTED_ENTRY JIT_Stelem_Ref_NotExactMatch, _TEXT, NoHandler + push {lr} + push {r0-r2} + + CHECK_STACK_ALIGNMENT + + // allow in case val can be casted to array element type + // call ObjIsInstanceOfNoGC(val, array->GetArrayElementTypeHandle()) + mov r1, r12 // array->GetArrayElementTypeHandle() + mov r0, r2 + bl C_FUNC(ObjIsInstanceOfNoGC) + cmp r0, TypeHandle_CanCast + beq LOCAL_LABEL(DoWrite) // ObjIsInstance returned TypeHandle::CanCast + + // check via raising frame +LOCAL_LABEL(NeedFrame): + mov r1, sp // r1 = &array + adds r0, sp, #8 // r0 = &val + bl C_FUNC(ArrayStoreCheck) // ArrayStoreCheck(&val, &array) + +LOCAL_LABEL(DoWrite): + pop {r0-r2} + pop {lr} + b C_FUNC(JIT_Stelem_DoWrite) + + NESTED_END JIT_Stelem_Ref_NotExactMatch, _TEXT + +// ------------------------------------------------------------------ +// __declspec(naked) void F_CALL_CONV JIT_Stelem_DoWrite(PtrArray* array, unsigned idx, Object* val) + LEAF_ENTRY JIT_Stelem_DoWrite, _TEXT + + // Setup args for JIT_WriteBarrier. r0 = &array->m_array[idx]// r1 = val + adds r0, #PtrArray__m_Array // r0 = &array->m_array + adds r0, r1, LSL #2 + mov r1, r2 // r1 = val + + // Branch to the write barrier (which is already correctly overwritten with + // single or multi-proc code based on the current CPU + b C_FUNC(JIT_WriteBarrier) + + LEAF_END JIT_Stelem_DoWrite, _TEXT + +#define __wbScratch r3 +#define pShadow r7 + + .macro START_WRITE_BARRIER name + __\name\()__g_lowest_address_offset = 0xffff + __\name\()__g_highest_address_offset = 0xffff + __\name\()__g_ephemeral_low_offset = 0xffff + __\name\()__g_ephemeral_high_offset = 0xffff + __\name\()__g_card_table_offset = 0xffff + .endm + + .macro LOAD_GC_GLOBAL name, regName, globalName +\name\()__\globalName\()_offset: + __\name\()__\globalName\()_offset = (\name\()__\globalName\()_offset - \name) + movw \regName, #0 + movt \regName, #0 + .endm + + .macro UPDATE_GC_SHADOW name, ptrReg, valReg + // Todo: implement, debugging helper + .endm + + .macro UPDATE_CARD_TABLE name, ptrReg, valReg, mp, postGrow, tmpReg + + LOAD_GC_GLOBAL \name, __wbScratch, g_ephemeral_low + cmp \valReg, __wbScratch + blo 0f + + .if(\postGrow) + LOAD_GC_GLOBAL \name, __wbScratch, g_ephemeral_high + cmp \valReg, __wbScratch + bhs 0f + .endif + + LOAD_GC_GLOBAL \name, __wbScratch, g_card_table + add __wbScratch, __wbScratch, \ptrReg, lsr #10 + + .if(\mp) + ldrb \tmpReg, [__wbScratch] + cmp \tmpReg, #0xff + itt ne + movne \tmpReg, 0xff + strbne \tmpReg, [__wbScratch] + .else + mov \tmpReg, #0xff + strb \tmpReg, [__wbScratch] + .endif + +0: + .endm + + .macro CHECK_GC_HEAP_RANGE name, ptrReg, label + LOAD_GC_GLOBAL \name, __wbScratch, g_lowest_address + cmp \ptrReg, __wbScratch + blo \label + LOAD_GC_GLOBAL \name, __wbScratch, g_highest_address + cmp \ptrReg, __wbScratch + bhs \label + .endm + + .macro JIT_WRITEBARRIER name, mp, post + LEAF_ENTRY \name, _TEXT + START_WRITE_BARRIER \name + .if(\mp) + dmb + .endif + + str r1, [r0] + UPDATE_GC_SHADOW \name, r0, r1 + UPDATE_CARD_TABLE \name, r0, r1, \mp, \post, r0 + bx lr + LEAF_END_MARKED \name, _TEXT + .endm + + .macro JIT_CHECKEDWRITEBARRIER_SP name, post + LEAF_ENTRY \name, _TEXT + START_WRITE_BARRIER \name + str r1, [r0] + CHECK_GC_HEAP_RANGE \name, r0, 1f + UPDATE_GC_SHADOW \name, r0, r1 + UPDATE_CARD_TABLE \name, r0, r1, 0, \post, r0 +1: + bx lr + LEAF_END_MARKED \name, _TEXT + .endm + + .macro JIT_CHECKEDWRITEBARRIER_MP name, post + LEAF_ENTRY \name, _TEXT + START_WRITE_BARRIER \name + dmb + str r1, [r0] + CHECK_GC_HEAP_RANGE \name, r0, 1f + UPDATE_GC_SHADOW \name, r0, r1 + UPDATE_CARD_TABLE \name, r0, r1, 1, \post, r0 + bx lr +1: + str r1, [r0] + bx lr + LEAF_END_MARKED \name, _TEXT + .endm + + .macro JIT_BYREFWRITEBARRIER name, mp, post + LEAF_ENTRY \name, _TEXT + START_WRITE_BARRIER \name + .if(\mp) + dmb + .endif + + ldr r2, [r1] + str r2, [r0] + CHECK_GC_HEAP_RANGE \name, r0, 1f + UPDATE_GC_SHADOW \name, r0, r2 + UPDATE_CARD_TABLE \name, r0, r2, \mp, \post, r2 +1: + add r0, #4 + add r1, #4 + bx lr + LEAF_END_MARKED \name, _TEXT + .endm + + .macro JIT_WRITEBARRIER_DESCRIPTOR name + .word \name + .word \name\()_End + .word __\name\()__g_lowest_address_offset + .word __\name\()__g_highest_address_offset + .word __\name\()__g_ephemeral_low_offset + .word __\name\()__g_ephemeral_high_offset + .word __\name\()__g_card_table_offset + .endm + + // There 4 versions of each write barriers. A 2x2 combination of multi-proc/single-proc and pre/post grow version + JIT_WRITEBARRIER JIT_WriteBarrier_SP_Pre, 0, 0 + JIT_WRITEBARRIER JIT_WriteBarrier_SP_Post, 0, 1 + JIT_WRITEBARRIER JIT_WriteBarrier_MP_Pre, 1, 0 + JIT_WRITEBARRIER JIT_WriteBarrier_MP_Post, 1, 1 + + JIT_CHECKEDWRITEBARRIER_SP JIT_CheckedWriteBarrier_SP_Pre, 0 + JIT_CHECKEDWRITEBARRIER_SP JIT_CheckedWriteBarrier_SP_Post, 1 + JIT_CHECKEDWRITEBARRIER_MP JIT_CheckedWriteBarrier_MP_Pre, 0 + JIT_CHECKEDWRITEBARRIER_MP JIT_CheckedWriteBarrier_MP_Post, 1 + + JIT_BYREFWRITEBARRIER JIT_ByRefWriteBarrier_SP_Pre, 0, 0 + JIT_BYREFWRITEBARRIER JIT_ByRefWriteBarrier_SP_Post, 0, 1 + JIT_BYREFWRITEBARRIER JIT_ByRefWriteBarrier_MP_Pre, 1, 0 + JIT_BYREFWRITEBARRIER JIT_ByRefWriteBarrier_MP_Post, 1, 1 + +// .section .clrwb, "d" +g_rgWriteBarrierDescriptors: + + JIT_WRITEBARRIER_DESCRIPTOR JIT_WriteBarrier_SP_Pre + JIT_WRITEBARRIER_DESCRIPTOR JIT_WriteBarrier_SP_Post + JIT_WRITEBARRIER_DESCRIPTOR JIT_WriteBarrier_MP_Pre + JIT_WRITEBARRIER_DESCRIPTOR JIT_WriteBarrier_MP_Post + + JIT_WRITEBARRIER_DESCRIPTOR JIT_CheckedWriteBarrier_SP_Pre + JIT_WRITEBARRIER_DESCRIPTOR JIT_CheckedWriteBarrier_SP_Post + JIT_WRITEBARRIER_DESCRIPTOR JIT_CheckedWriteBarrier_MP_Pre + JIT_WRITEBARRIER_DESCRIPTOR JIT_CheckedWriteBarrier_MP_Post + + JIT_WRITEBARRIER_DESCRIPTOR JIT_ByRefWriteBarrier_SP_Pre + JIT_WRITEBARRIER_DESCRIPTOR JIT_ByRefWriteBarrier_SP_Post + JIT_WRITEBARRIER_DESCRIPTOR JIT_ByRefWriteBarrier_MP_Pre + JIT_WRITEBARRIER_DESCRIPTOR JIT_ByRefWriteBarrier_MP_Post + + // Sentinel value + .word 0 + +// .text + + .global g_rgWriteBarrierDescriptors + +#ifdef FEATURE_READYTORUN + + NESTED_ENTRY DelayLoad_MethodCall_FakeProlog, _TEXT, NoHandler + + // Match what the lazy thunk has pushed. The actual method arguments will be spilled later. + push {r1-r3} + + // This is where execution really starts. +DelayLoad_MethodCall: + .global DelayLoad_MethodCall + + push {r0} + + PROLOG_WITH_TRANSITION_BLOCK 0x0, 1, DoNotPushArgRegs + + // Load the helper arguments + ldr r5, [sp,#(__PWTB_TransitionBlock+10*4)] // pModule + ldr r6, [sp,#(__PWTB_TransitionBlock+11*4)] // sectionIndex + ldr r7, [sp,#(__PWTB_TransitionBlock+12*4)] // indirection + + // Spill the actual method arguments + str r1, [sp,#(__PWTB_TransitionBlock+10*4)] + str r2, [sp,#(__PWTB_TransitionBlock+11*4)] + str r3, [sp,#(__PWTB_TransitionBlock+12*4)] + + add r0, sp, #__PWTB_TransitionBlock // pTransitionBlock + + mov r1, r7 // pIndirection + mov r2, r6 // sectionIndex + mov r3, r5 // pModule + + bl C_FUNC(ExternalMethodFixupWorker) + + // mov the address we patched to in R12 so that we can tail call to it + mov r12, r0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + + // Share the patch label + b C_FUNC(ExternalMethodFixupPatchLabel) + + NESTED_END DelayLoad_MethodCall_FakeProlog, _TEXT + + + .macro DynamicHelper frameFlags, suffix + +__FakePrologName="DelayLoad_Helper\suffix\()_FakeProlog" + + NESTED_ENTRY DelayLoad_Helper\suffix\()_FakeProlog, _TEXT, NoHandler + + // Match what the lazy thunk has pushed. The actual method arguments will be spilled later. + push {r1-r3} + + // This is where execution really starts. +DelayLoad_Helper\suffix: + .global DelayLoad_Helper\suffix + + push {r0} + + PROLOG_WITH_TRANSITION_BLOCK 0x4, 0, DoNotPushArgRegs + + // Load the helper arguments + ldr r5, [sp,#(__PWTB_TransitionBlock+10*4)] // pModule + ldr r6, [sp,#(__PWTB_TransitionBlock+11*4)] // sectionIndex + ldr r7, [sp,#(__PWTB_TransitionBlock+12*4)] // indirection + + // Spill the actual method arguments + str r1, [sp,#(__PWTB_TransitionBlock+10*4)] + str r2, [sp,#(__PWTB_TransitionBlock+11*4)] + str r3, [sp,#(__PWTB_TransitionBlock+12*4)] + + add r0, sp, #__PWTB_TransitionBlock // pTransitionBlock + + mov r1, r7 // pIndirection + mov r2, r6 // sectionIndex + mov r3, r5 // pModule + + mov r4, \frameFlags + str r4, [sp,#0] + + bl C_FUNC(DynamicHelperWorker) + + cbnz r0, 0f + ldr r0, [sp,#(__PWTB_TransitionBlock+9*4)] // The result is stored in the argument area of the transition block + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +0: + mov r12, r0 + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + bx r12 + + NESTED_END DelayLoad_Helper\suffix\()_FakeProlog, _TEXT + + .endm + + DynamicHelper DynamicHelperFrameFlags_Default + DynamicHelper DynamicHelperFrameFlags_ObjectArg, _Obj + DynamicHelper DynamicHelperFrameFlags_ObjectArg | DynamicHelperFrameFlags_ObjectArg2, _ObjObj + +#endif // FEATURE_READYTORUN + +#ifdef FEATURE_HIJACK + +// ------------------------------------------------------------------ +// Hijack function for functions which return a value type + NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler + PROLOG_PUSH "{r0,r4-r11,lr}" + + PROLOG_VPUSH "{d0-d3}" // saving as d0-d3 can have the floating point return value + PROLOG_PUSH "{r1}" // saving as r1 can have partial return value when return is > 32 bits + alloc_stack 4 // 8 byte align + + CHECK_STACK_ALIGNMENT + + add r0, sp, #40 + bl C_FUNC(OnHijackWorker) + + free_stack 4 + EPILOG_POP "{r1}" + EPILOG_VPOP "{d0-d3}" + + EPILOG_POP "{r0,r4-r11,pc}" + NESTED_END OnHijackTripThread, _TEXT +#endif + |