1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
|
; Licensed to the .NET Foundation under one or more agreements.
; The .NET Foundation licenses this file to you under the MIT license.
; See the LICENSE file in the project root for more information.
; ==++==
;
;
; ==--==
include <AsmMacros.inc>
include AsmConstants.inc
gfHostConfig equ ?g_fHostConfig@@3KA
extern CreateThreadBlockThrow:proc
extern TheUMEntryPrestubWorker:proc
extern UMEntryPrestubUnwindFrameChainHandler:proc
extern UMThunkStubUnwindFrameChainHandler:proc
extern g_TrapReturningThreads:dword
extern UMThunkStubRareDisableWorker:proc
extern ReversePInvokeBadTransition:proc
;
; METHODDESC_REGISTER: UMEntryThunk*
;
NESTED_ENTRY TheUMEntryPrestub, _TEXT, UMEntryPrestubUnwindFrameChainHandler
TheUMEntryPrestub_STACK_FRAME_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES
; XMM save area
TheUMEntryPrestub_XMM_SAVE_OFFSET = TheUMEntryPrestub_STACK_FRAME_SIZE
TheUMEntryPrestub_STACK_FRAME_SIZE = TheUMEntryPrestub_STACK_FRAME_SIZE + SIZEOF_MAX_FP_ARG_SPILL
; Ensure that the new rsp will be 16-byte aligned. Note that the caller has
; already pushed the return address.
if ((TheUMEntryPrestub_STACK_FRAME_SIZE + 8) MOD 16) ne 0
TheUMEntryPrestub_STACK_FRAME_SIZE = TheUMEntryPrestub_STACK_FRAME_SIZE + 8
endif
alloc_stack TheUMEntryPrestub_STACK_FRAME_SIZE
save_reg_postrsp rcx, TheUMEntryPrestub_STACK_FRAME_SIZE + 8h
save_reg_postrsp rdx, TheUMEntryPrestub_STACK_FRAME_SIZE + 10h
save_reg_postrsp r8, TheUMEntryPrestub_STACK_FRAME_SIZE + 18h
save_reg_postrsp r9, TheUMEntryPrestub_STACK_FRAME_SIZE + 20h
save_xmm128_postrsp xmm0, TheUMEntryPrestub_XMM_SAVE_OFFSET
save_xmm128_postrsp xmm1, TheUMEntryPrestub_XMM_SAVE_OFFSET + 10h
save_xmm128_postrsp xmm2, TheUMEntryPrestub_XMM_SAVE_OFFSET + 20h
save_xmm128_postrsp xmm3, TheUMEntryPrestub_XMM_SAVE_OFFSET + 30h
END_PROLOGUE
;
; Do prestub-specific stuff
;
mov rcx, METHODDESC_REGISTER
call TheUMEntryPrestubWorker
;
; we're going to tail call to the exec stub that we just setup
;
mov rcx, [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 8h]
mov rdx, [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 10h]
mov r8, [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 18h]
mov r9, [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 20h]
movdqa xmm0, xmmword ptr [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET]
movdqa xmm1, xmmword ptr [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET + 10h]
movdqa xmm2, xmmword ptr [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET + 20h]
movdqa xmm3, xmmword ptr [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET + 30h]
;
; epilogue
;
add rsp, TheUMEntryPrestub_STACK_FRAME_SIZE
TAILJMP_RAX
NESTED_END TheUMEntryPrestub, _TEXT
;
; METHODDESC_REGISTER: UMEntryThunk*
;
NESTED_ENTRY UMThunkStub, _TEXT, UMThunkStubUnwindFrameChainHandler
UMThunkStubAMD64_STACK_FRAME_SIZE = 0
; number of integer registers saved in prologue
UMThunkStubAMD64_NUM_REG_PUSHES = 2
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + (UMThunkStubAMD64_NUM_REG_PUSHES * 8)
; rare path spill area
UMThunkStubAMD64_RARE_PATH_SPILL_SIZE = 10h
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_RARE_PATH_SPILL_SIZE
UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE
; HOST_NOTIFY_FLAG
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8
UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE
; XMM save area
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + SIZEOF_MAX_FP_ARG_SPILL
; Ensure that the offset of the XMM save area will be 16-byte aligned.
if ((UMThunkStubAMD64_STACK_FRAME_SIZE + 8) MOD 16) ne 0 ; +8 for caller-pushed return address
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8
endif
UMThunkStubAMD64_XMM_SAVE_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE
; Add in the callee scratch area size.
UMThunkStubAMD64_CALLEE_SCRATCH_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE
; Now we have the full size of the stack frame. The offsets have been computed relative to the
; top, so negate them to make them relative to the post-prologue rsp.
UMThunkStubAMD64_FRAME_OFFSET = UMThunkStubAMD64_CALLEE_SCRATCH_SIZE
UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET
UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET
UMThunkStubAMD64_XMM_SAVE_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_XMM_SAVE_NEGOFFSET
UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 - UMThunkStubAMD64_FRAME_OFFSET ; +8 for return address
UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE - (UMThunkStubAMD64_NUM_REG_PUSHES * 8)
.errnz UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET, update UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET
;
; [ callee scratch ] <-- new RSP
; [ callee scratch ]
; [ callee scratch ]
; [ callee scratch ]
; {optional stack args passed to callee}
; xmm0 <-- RBP
; xmm1
; xmm2
; xmm3
; {optional padding to align xmm regs}
; HOST_NOTIFY_FLAG (needs to make ReverseLeaveRuntime call flag)
; [rare path spill area]
; [rare path spill area]
; rbp save
; r12 save
; return address <-- entry RSP
; [rcx home]
; [rdx home]
; [r8 home]
; [r9 home]
; stack arg 0
; stack arg 1
; ...
push_nonvol_reg r12
push_nonvol_reg rbp ; stack_args
alloc_stack UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE
set_frame rbp, UMThunkStubAMD64_FRAME_OFFSET ; stack_args
mov byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 0 ; hosted
END_PROLOGUE
;
; Call GetThread()
;
INLINE_GETTHREAD r12 ; will not trash r10
test r12, r12
jz DoThreadSetup
HaveThread:
;FailFast if a native callable method invoked via ldftn and calli.
cmp dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1
jz InvalidTransition
;
; disable preemptive GC
;
mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1
;
; catch returning thread here if a GC is in progress
;
cmp [g_TrapReturningThreads], 0
jnz DoTrapReturningThreadsTHROW
InCooperativeMode:
mov r11, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo]
mov eax, [r11 + OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize] ; stack_args
test rax, rax ; stack_args
jnz CopyStackArgs ; stack_args
ArgumentsSetup:
mov rax, [r11 + OFFSETOF__UMThunkMarshInfo__m_pILStub] ; rax <- Stub*
call rax
PostCall:
;
; enable preemptive GC
;
mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 0
; epilog
lea rsp, [rbp - UMThunkStubAMD64_FRAME_OFFSET + UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE]
pop rbp ; stack_args
pop r12
ret
DoThreadSetup:
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9
; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
; initial measurements indidcate that this could be worth about a 5% savings in reverse
; pinvoke overhead.
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3
mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER
call CreateThreadBlockThrow
mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET]
mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h]
mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h]
mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h]
mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h]
; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h]
movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h]
movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h]
movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h]
mov r12, rax
jmp HaveThread
InvalidTransition:
; ReversePInvokeBadTransition will failfast
call ReversePInvokeBadTransition
DoTrapReturningThreadsTHROW:
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9
; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
; initial measurements indidcate that this could be worth about a 5% savings in reverse
; pinvoke overhead.
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3
mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER
mov rcx, r12 ; Thread* pThread
mov rdx, METHODDESC_REGISTER ; UMEntryThunk* pUMEntry
call UMThunkStubRareDisableWorker
mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET]
mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h]
mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h]
mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h]
mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h]
; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h]
movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h]
movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h]
movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h]
jmp InCooperativeMode
CopyStackArgs:
; rax = cbStackArgs (with 20h for register args subtracted out already)
sub rsp, rax
and rsp, -16
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
; rax = number of bytes
lea rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES]
lea rdx, [rsp + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE]
CopyLoop:
; rax = number of bytes
; rcx = src
; rdx = dest
; r8 = sratch
add rax, -8
mov r8, [rcx + rax]
mov [rdx + rax], r8
jnz CopyLoop
mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h]
mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h]
mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h]
jmp ArgumentsSetup
NESTED_END UMThunkStub, _TEXT
end
|