summaryrefslogtreecommitdiff
path: root/src/vm/amd64/jithelpers_fast.S
blob: 0ec63f7c5ca2e917d1a9edafe2ff4adf9dfbf2b0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

.intel_syntax noprefix
#include "unixasmmacros.inc"
#include "asmconstants.h"

// Mark start of the code region that we patch at runtime
LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
        ret
LEAF_END JIT_PatchedCodeStart, _TEXT


// There is an even more optimized version of these helpers possible which takes
// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
// that check (this is more significant in the JIT_WriteBarrier case).
//
// Additionally we can look into providing helpers which will take the src/dest from
// specific registers (like x86) which _could_ (??) make for easier register allocation
// for the JIT64, however it might lead to having to have some nasty code that treats
// these guys really special like... :(.
//
// Version that does the move, checks whether or not it's in the GC and whether or not
// it needs to have it's card updated
//
// void JIT_CheckedWriteBarrier(Object** dst, Object* src)
LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT

        // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
        // but if it isn't then it will just return.
        //
        // See if this is in GCHeap
        PREPARE_EXTERNAL_VAR g_lowest_address, rax
        cmp     rdi, [rax]
        // jb      NotInHeap
        .byte 0x72, 0x0e
        PREPARE_EXTERNAL_VAR g_highest_address, rax
        cmp     rdi, [rax]
        // jnb     NotInHeap
        .byte 0x73, 0x02
        
        // call C_FUNC(JIT_WriteBarrier)
        .byte 0xeb, 0x05

    NotInHeap:
        // See comment above about possible AV
        mov     [rdi], rsi
        ret
LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT


// This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow 
// or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_
// change at runtime as the GC changes. Initially it should simply be a copy of the 
// larger of the two functions (JIT_WriteBarrier_PostGrow) to ensure we have created
// enough space to copy that code in.
.balign 16
LEAF_ENTRY JIT_WriteBarrier, _TEXT
#ifdef _DEBUG
        // In debug builds, this just contains jump to the debug version of the write barrier by default
        jmp C_FUNC(JIT_WriteBarrier_Debug)
#endif

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        // JIT_WriteBarrier_WriteWatch_PostGrow64

        // Regarding patchable constants:
        // - 64-bit constants have to be loaded into a register
        // - The constants have to be aligned to 8 bytes so that they can be patched easily
        // - The constant loads have been located to minimize NOP padding required to align the constants
        // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
        //   non-volatile calling convention, this should be changed to use just one register.

        // Do the move into the GC .  It is correct to take an AV here, the EH code
        // figures out that this came from a WriteBarrier and correctly maps it back
        // to the managed method which called the WriteBarrier (see setup in
        // InitializeExceptionHandling, vm\exceptionhandling.cpp).
        mov     [rdi], rsi

        // Update the write watch table if necessary
        mov     rax, rdi
        movabs  r10, 0xF0F0F0F0F0F0F0F0
        shr     rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift
        NOP_2_BYTE // padding for alignment of constant
        movabs  r11, 0xF0F0F0F0F0F0F0F0
        add     rax, r10
        cmp     byte ptr [rax], 0h
        .byte 0x75, 0x06
        // jne     CheckCardTable
        mov     byte ptr [rax], 0FFh

        NOP_3_BYTE // padding for alignment of constant

        // Check the lower and upper ephemeral region bounds
    CheckCardTable:
        cmp     rsi, r11
        .byte 0x72,0x3D
        // jb      Exit

        NOP_3_BYTE // padding for alignment of constant

        movabs  r10, 0xF0F0F0F0F0F0F0F0

        cmp     rsi, r10
        .byte 0x73,0x2B
        // jae     Exit

        nop // padding for alignment of constant

        movabs  rax, 0xF0F0F0F0F0F0F0F0

        // Touch the card table entry, if not already dirty.
        shr     rdi, 0x0B
        cmp     byte ptr [rdi + rax], 0FFh
        .byte 0x75, 0x02
        // jne     UpdateCardTable
        REPRET

    UpdateCardTable:
        mov     byte ptr [rdi + rax], 0FFh

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
        NOP_2_BYTE // padding for alignment of constant
        shr     rdi, 0x0A

        movabs  rax, 0xF0F0F0F0F0F0F0F0
        cmp     byte ptr [rdi + rax], 0FFh

        .byte 0x75, 0x02
        // jne     UpdateCardBundle_WriteWatch_PostGrow64
        REPRET

    UpdateCardBundle_WriteWatch_PostGrow64:
        mov     byte ptr [rdi + rax], 0FFh
#endif

        ret

    .balign 16
    Exit:
        REPRET
#else
        // JIT_WriteBarrier_PostGrow64

        // Do the move into the GC .  It is correct to take an AV here, the EH code
        // figures out that this came from a WriteBarrier and correctly maps it back
        // to the managed method which called the WriteBarrier (see setup in
        // InitializeExceptionHandling, vm\exceptionhandling.cpp).
        mov     [rdi], rsi

        NOP_3_BYTE // padding for alignment of constant

        // Can't compare a 64 bit immediate, so we have to move them into a
        // register.  Values of these immediates will be patched at runtime.
        // By using two registers we can pipeline better.  Should we decide to use
        // a special non-volatile calling convention, this should be changed to
        // just one.

        movabs  rax, 0xF0F0F0F0F0F0F0F0

        // Check the lower and upper ephemeral region bounds
        cmp     rsi, rax
        // jb      Exit
        .byte 0x72, 0x36

        nop // padding for alignment of constant

        movabs  r8, 0xF0F0F0F0F0F0F0F0

        cmp     rsi, r8
        // jae     Exit
        .byte 0x73, 0x26

        nop // padding for alignment of constant

        movabs  rax, 0xF0F0F0F0F0F0F0F0

        // Touch the card table entry, if not already dirty.
        shr     rdi, 0Bh
        cmp     byte ptr [rdi + rax], 0FFh
        .byte 0x75, 0x02
        // jne     UpdateCardTable
        REPRET

    UpdateCardTable:
        mov     byte ptr [rdi + rax], 0FFh

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
        NOP_6_BYTE // padding for alignment of constant

        movabs  rax, 0xF0F0F0F0F0F0F0F0

        // Touch the card bundle, if not already dirty.
        // rdi is already shifted by 0xB, so shift by 0xA more
        shr     rdi, 0x0A
        cmp     byte ptr [rdi + rax], 0FFh

        .byte 0x75, 0x02 
        // jne     UpdateCardBundle
        REPRET

    UpdateCardBundle:
        mov     byte ptr [rdi + rax], 0FFh
#endif

        ret

    .balign 16
    Exit:
        REPRET
#endif

    // make sure this guy is bigger than any of the other guys
    .balign 16
        nop
LEAF_END_MARKED JIT_WriteBarrier, _TEXT

// Mark start of the code region that we patch at runtime
LEAF_ENTRY JIT_PatchedCodeLast, _TEXT
        ret
LEAF_END JIT_PatchedCodeLast, _TEXT

// JIT_ByRefWriteBarrier has weird symantics, see usage in StubLinkerX86.cpp
//
// Entry:
//   RDI - address of ref-field (assigned to)
//   RSI - address of the data  (source)
//
//   Note: RyuJIT assumes that all volatile registers can be trashed by
//   the CORINFO_HELP_ASSIGN_BYREF helper (i.e. JIT_ByRefWriteBarrier).
//   The precise set is defined by RBM_CALLEE_TRASH.
//
//   RCX is trashed
//   RAX is trashed
//   R10 is trashed
//   R11 is trashed on Debug build
// Exit:
//   RDI, RSI are incremented by SIZEOF(LPVOID)
LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
        mov     rcx, [rsi]

// If !WRITE_BARRIER_CHECK do the write first, otherwise we might have to do some ShadowGC stuff
#ifndef WRITE_BARRIER_CHECK
        // rcx is [rsi]
        mov     [rdi], rcx
#endif

        // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
        // but if it isn't then it will just return.
        //
        // See if this is in GCHeap
        PREPARE_EXTERNAL_VAR g_lowest_address, rax
        cmp     rdi, [rax]
        jb      NotInHeap_ByRefWriteBarrier
        PREPARE_EXTERNAL_VAR g_highest_address, rax
        cmp     rdi, [rax]
        jnb     NotInHeap_ByRefWriteBarrier

#ifdef WRITE_BARRIER_CHECK
        // **ALSO update the shadow GC heap if that is enabled**
        // Do not perform the work if g_GCShadow is 0
        PREPARE_EXTERNAL_VAR g_GCShadow, rax
        cmp     qword ptr [rax], 0
        je      NoShadow_ByRefWriteBarrier

        // If we end up outside of the heap don't corrupt random memory
        mov     r10, rdi
        PREPARE_EXTERNAL_VAR g_lowest_address, rax
        sub     r10, [rax]
        jb      NoShadow_ByRefWriteBarrier

        // Check that our adjusted destination is somewhere in the shadow gc
        PREPARE_EXTERNAL_VAR g_GCShadow, rax
        add     r10, [rax]
        PREPARE_EXTERNAL_VAR g_GCShadowEnd, rax
        cmp     r10, [rax]
        ja      NoShadow_ByRefWriteBarrier

        // Write ref into real GC
        mov     [rdi], rcx
        // Write ref into shadow GC
        mov     [r10], rcx

        // Ensure that the write to the shadow heap occurs before the read from
        // the GC heap so that race conditions are caught by INVALIDGCVALUE
        mfence

        // Check that GC/ShadowGC values match
        mov     r11, [rdi]
        mov     rax, [r10]
        cmp     rax, r11
        je      DoneShadow_ByRefWriteBarrier
        movabs  r11, INVALIDGCVALUE
        mov     [r10], r11

        jmp     DoneShadow_ByRefWriteBarrier

    // If we don't have a shadow GC we won't have done the write yet
    NoShadow_ByRefWriteBarrier:
        mov     [rdi], rcx

    // If we had a shadow GC then we already wrote to the real GC at the same time
    // as the shadow GC so we want to jump over the real write immediately above.
    // Additionally we know for sure that we are inside the heap and therefore don't
    // need to replicate the above checks.
    DoneShadow_ByRefWriteBarrier:
#endif

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        // Update the write watch table if necessary
        PREPARE_EXTERNAL_VAR g_sw_ww_enabled_for_gc_heap, rax
        cmp     byte ptr [rax], 0h
        je      CheckCardTable_ByRefWriteBarrier
        mov     rax, rdi
        shr     rax, 0Ch // SoftwareWriteWatch::AddressToTableByteIndexShift
        PREPARE_EXTERNAL_VAR g_sw_ww_table, r10
        add     rax, qword ptr [r10]
        cmp     byte ptr [rax], 0h
        jne     CheckCardTable_ByRefWriteBarrier
        mov     byte ptr [rax], 0FFh
#endif

    CheckCardTable_ByRefWriteBarrier:
        // See if we can just quick out
        PREPARE_EXTERNAL_VAR g_ephemeral_low, rax
        cmp     rcx, [rax]
        jb      Exit_ByRefWriteBarrier
        PREPARE_EXTERNAL_VAR g_ephemeral_high, rax
        cmp     rcx, [rax]
        jnb     Exit_ByRefWriteBarrier

        // move current rdi value into rcx and then increment the pointers
        mov     rcx, rdi
        add     rsi, 8h
        add     rdi, 8h

        // Check if we need to update the card table
        // Calc pCardByte
        shr     rcx, 0x0B
        
        PREPARE_EXTERNAL_VAR g_card_table, rax
        mov     rax, [rax]

        // Check if this card is dirty
        cmp     byte ptr [rcx + rax], 0FFh

        jne     UpdateCardTable_ByRefWriteBarrier
        REPRET

    UpdateCardTable_ByRefWriteBarrier:
        mov     byte ptr [rcx + rax], 0FFh

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
        // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already)
        shr     rcx, 0x0A
        
        PREPARE_EXTERNAL_VAR g_card_bundle_table, rax
        add     rcx, [rax]

        // Check if this bundle byte is dirty
        cmp     byte ptr [rcx], 0FFh

        jne     UpdateCardBundle_ByRefWriteBarrier
        REPRET

    UpdateCardBundle_ByRefWriteBarrier:
        mov     byte ptr [rcx], 0FFh
#endif

        ret

    .balign 16
    NotInHeap_ByRefWriteBarrier:
// If WRITE_BARRIER_CHECK then we won't have already done the mov and should do it here
// If !WRITE_BARRIER_CHECK we want _NotInHeap and _Leave to be the same and have both
// 16 byte aligned.
#ifdef WRITE_BARRIER_CHECK
        // rcx is [rsi]
        mov     [rdi], rcx
#endif
    Exit_ByRefWriteBarrier:
        // Increment the pointers before leaving
        add     rdi, 8h
        add     rsi, 8h
        ret
LEAF_END JIT_ByRefWriteBarrier, _TEXT

// TODO: put definition for this in asmconstants.h
#define CanCast 1

//__declspec(naked) void F_CALL_CONV JIT_Stelem_Ref(PtrArray* array, unsigned idx, Object* val)
.balign 16
LEAF_ENTRY JIT_Stelem_Ref, _TEXT
        // check for null PtrArray*
        test    rdi, rdi
        je      LOCAL_LABEL(ThrowNullReferenceException)

        // we only want the lower 32-bits of rsi, it might be dirty
        or      esi, esi

        // check that index is in bounds
        cmp     esi, dword ptr [rdi + OFFSETOF__PtrArray__m_NumComponents] // 8h -> array size offset
        jae     LOCAL_LABEL(ThrowIndexOutOfRangeException)

        // r10 = Array MT
        mov     r10, [rdi]

        // if we're assigning a null object* then we don't need a write barrier
        test    rdx, rdx
        jz      LOCAL_LABEL(AssigningNull)

#ifdef CHECK_APP_DOMAIN_LEAKS     
        // get Array TypeHandle
        mov     rcx, [r10 + OFFSETOF__MethodTable__m_ElementType]   // 10h -> typehandle offset, 
        // check for non-MT
        test    rcx, 2
        jnz     LOCAL_LABEL(NoCheck)

        // Check VMflags of element type
        mov     rcx, [rcx + OFFSETOF__MethodTable__m_pEEClass]
        mov     ecx, dword ptr [rcx + OFFSETOF__EEClass__m_wAuxFlags]
        test    ecx, EEClassFlags
        jnz     C_FUNC(ArrayStoreCheck_Helper)

    LOCAL_LABEL(NoCheck):
#endif

        mov     rcx, [r10 + OFFSETOF__MethodTable__m_ElementType]   // 10h -> typehandle offset

        // check for exact match
        cmp     rcx, [rdx]
        jne     LOCAL_LABEL(NotExactMatch)

    LOCAL_LABEL(DoWrite):
        lea     rdi, [rdi + 8*rsi]
        add     rdi, OFFSETOF__PtrArray__m_Array
        mov     rsi, rdx

        // JIT_WriteBarrier(Object** dst, Object* src)
        jmp     C_FUNC(JIT_WriteBarrier)

    LOCAL_LABEL(AssigningNull):
        // write barrier is not needed for assignment of NULL references
        mov     [rdi + 8*rsi + OFFSETOF__PtrArray__m_Array], rdx
        ret
            
    LOCAL_LABEL(NotExactMatch):
        PREPARE_EXTERNAL_VAR g_pObjectClass, r11
        cmp     rcx, [r11]
        je      LOCAL_LABEL(DoWrite)

        jmp     C_FUNC(JIT_Stelem_Ref__ObjIsInstanceOfNoGC_Helper)
                           
    LOCAL_LABEL(ThrowNullReferenceException):
        mov     rdi, CORINFO_NullReferenceException_ASM 
        jmp     C_FUNC(JIT_InternalThrow)
        
    LOCAL_LABEL(ThrowIndexOutOfRangeException):
        mov     rdi, CORINFO_IndexOutOfRangeException_ASM
        jmp     C_FUNC(JIT_InternalThrow)        
LEAF_END JIT_Stelem_Ref, _TEXT

LEAF_ENTRY JIT_Stelem_Ref__ObjIsInstanceOfNoGC_Helper, _TEXT
        push_nonvol_reg rbp
        mov     rbp, rsp
        set_cfa_register rbp, 16
        
        sub     rsp, 0x20
        mov     [rbp - 0x08], rdi
        mov     [rbp - 0x10], rsi
        mov     [rbp - 0x18], rdx

        // need to get TypeHandle before setting rcx to be the Obj* because that trashes the PtrArray*
        mov     rsi, rcx
        mov     rdi, rdx

        // TypeHandle::CastResult ObjIsInstanceOfNoGC(Object *pElement, TypeHandle toTypeHnd)
        call    C_FUNC(ObjIsInstanceOfNoGC)

        mov     rdi, [rbp - 0x08]
        mov     rsi, [rbp - 0x10]
        mov     rdx, [rbp - 0x18]

        RESET_FRAME_WITH_RBP
        
        cmp     eax, CanCast
        jne     LOCAL_LABEL(NeedCheck)

        lea     rdi, [rdi + 8*rsi]
        add     rdi, OFFSETOF__PtrArray__m_Array
        mov     rsi, rdx

        // JIT_WriteBarrier(Object** dst, Object* src)
        jmp     C_FUNC(JIT_WriteBarrier)

    LOCAL_LABEL(NeedCheck):
        jmp     C_FUNC(JIT_Stelem_Ref__ArrayStoreCheck_Helper)
LEAF_END JIT_Stelem_Ref__ObjIsInstanceOfNoGC_Helper, _TEXT

// Need to save reg to provide a stack address for the Object*
LEAF_ENTRY JIT_Stelem_Ref__ArrayStoreCheck_Helper, _TEXT
        push_nonvol_reg rbp
        mov     rbp, rsp
        set_cfa_register rbp, 16

        sub     rsp, 0x20
        mov     [rbp - 0x10], rdi
        mov     [rbp - 0x18], rsi
        mov     [rbp - 0x20], rdx

        mov     rdi, rsp
        lea     rsi, [rbp - 0x10]
        // HCIMPL2(FC_INNER_RET, ArrayStoreCheck, Object** pElement, PtrArray** pArray)
        call    C_FUNC(ArrayStoreCheck)
        mov     rdi, [rbp - 0x10]
        mov     rsi, [rbp - 0x18]
        mov     rdx, [rbp - 0x20]

        lea     rdi, [rdi + 8*rsi]
        add     rdi, OFFSETOF__PtrArray__m_Array        
        mov     rsi, rdx

        RESET_FRAME_WITH_RBP
        
        // JIT_WriteBarrier(Object** dst, Object* src)
        jmp     C_FUNC(JIT_WriteBarrier)
LEAF_END JIT_Stelem_Ref__ArrayStoreCheck_Helper, _TEXT