summaryrefslogtreecommitdiff
path: root/src/vm/arm64/asmhelpers.asm
blob: 27e284c0cdf355127d5e510d52be149951f58d58 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
; Licensed to the .NET Foundation under one or more agreements.
; The .NET Foundation licenses this file to you under the MIT license.
; See the LICENSE file in the project root for more information.

;; ==++==
;;

;;
;; ==--==
#include "ksarm64.h"
#include "asmconstants.h"
#include "asmmacros.h"

#ifdef FEATURE_PREJIT
    IMPORT VirtualMethodFixupWorker
    IMPORT StubDispatchFixupWorker
#endif
    IMPORT ExternalMethodFixupWorker
    IMPORT PreStubWorker
    IMPORT NDirectImportWorker
    IMPORT VSD_ResolveWorker
    IMPORT JIT_InternalThrow
    IMPORT ComPreStubWorker
    IMPORT COMToCLRWorker
    IMPORT CallDescrWorkerUnwindFrameChainHandler
    IMPORT UMEntryPrestubUnwindFrameChainHandler
    IMPORT UMThunkStubUnwindFrameChainHandler
    IMPORT TheUMEntryPrestubWorker
    IMPORT CreateThreadBlockThrow
    IMPORT UMThunkStubRareDisableWorker
    IMPORT GetCurrentSavedRedirectContext
    IMPORT LinkFrameAndThrow
    IMPORT FixContextHandler
    IMPORT OnHijackWorker
#ifdef FEATURE_READYTORUN
    IMPORT DynamicHelperWorker
#endif

    IMPORT ObjIsInstanceOfNoGC
    IMPORT ArrayStoreCheck
    SETALIAS g_pObjectClass,  ?g_pObjectClass@@3PEAVMethodTable@@EA 
    IMPORT  $g_pObjectClass

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
    IMPORT  g_sw_ww_table
#endif

    IMPORT  g_ephemeral_low
    IMPORT  g_ephemeral_high
    IMPORT  g_lowest_address
    IMPORT  g_highest_address
    IMPORT  g_card_table
    IMPORT  g_TrapReturningThreads
    IMPORT  g_dispatch_cache_chain_success_counter
#ifdef WRITE_BARRIER_CHECK
    SETALIAS g_GCShadow, ?g_GCShadow@@3PEAEEA
    SETALIAS g_GCShadowEnd, ?g_GCShadowEnd@@3PEAEEA

    IMPORT g_lowest_address
    IMPORT $g_GCShadow
    IMPORT $g_GCShadowEnd
#endif // WRITE_BARRIER_CHECK

    IMPORT JIT_GetSharedNonGCStaticBase_Helper
    IMPORT JIT_GetSharedGCStaticBase_Helper

#ifdef FEATURE_COMINTEROP
    IMPORT CLRToCOMWorker
#endif // FEATURE_COMINTEROP
    TEXTAREA

;; LPVOID __stdcall GetCurrentIP(void);
    LEAF_ENTRY GetCurrentIP
        mov     x0, lr
        ret     lr
    LEAF_END

;; LPVOID __stdcall GetCurrentSP(void);
    LEAF_ENTRY GetCurrentSP
        mov     x0, sp
        ret     lr
    LEAF_END

;;-----------------------------------------------------------------------------
;; This routine captures the machine state. It is used by helper method frame
;;-----------------------------------------------------------------------------
;;void LazyMachStateCaptureState(struct LazyMachState *pState);
        LEAF_ENTRY LazyMachStateCaptureState
        ;; marks that this is not yet valid
        mov     w1, #0
        str     w1, [x0, #MachState__isValid]

        str     lr, [x0, #LazyMachState_captureIp]

        ;; str instruction does not save sp register directly so move to temp register
        mov     x1, sp
        str     x1, [x0, #LazyMachState_captureSp]

        ;; save non-volatile registers that can contain object references
        add     x1, x0, #LazyMachState_captureX19_X29
        stp     x19, x20, [x1, #(16*0)]
        stp     x21, x22, [x1, #(16*1)]
        stp     x23, x24, [x1, #(16*2)]
        stp     x25, x26, [x1, #(16*3)]
        stp     x27, x28, [x1, #(16*4)]
        str     x29, [x1, #(16*5)]

        ret     lr
        LEAF_END

        ;
        ; If a preserved register were pushed onto the stack between
        ; the managed caller and the H_M_F, ptrX19_X29 will point to its
        ; location on the stack and it would have been updated on the
        ; stack by the GC already and it will be popped back into the
        ; appropriate register when the appropriate epilog is run.
        ;
        ; Otherwise, the register is preserved across all the code
        ; in this HCALL or FCALL, so we need to update those registers
        ; here because the GC will have updated our copies in the
        ; frame.
        ;
        ; So, if ptrX19_X29 points into the MachState, we need to update
        ; the register here.  That's what this macro does.
        ;

        MACRO
            RestoreRegMS $regIndex, $reg

        ; Incoming:
        ;
        ; x0 = address of MachState
        ;
        ; $regIndex: Index of the register (x19-x29). For x19, index is 19.
        ;            For x20, index is 20, and so on.
        ;
        ; $reg: Register name (e.g. x19, x20, etc)
        ;
        ; Get the address of the specified captured register from machine state
        add     x2, x0, #(MachState__captureX19_X29 + (($regIndex-19)*8))

        ; Get the content of specified preserved register pointer from machine state
        ldr     x3, [x0, #(MachState__ptrX19_X29 + (($regIndex-19)*8))]

        cmp     x2, x3
        bne     %FT0
        ldr     $reg, [x2]
0

        MEND

; EXTERN_C int __fastcall HelperMethodFrameRestoreState(
;         INDEBUG_COMMA(HelperMethodFrame *pFrame)
;         MachState *pState
;         )
        LEAF_ENTRY HelperMethodFrameRestoreState

#ifdef _DEBUG
        mov x0, x1
#endif

        ; If machine state is invalid, then simply exit
        ldr w1, [x0, #MachState__isValid]
        cmp w1, #0
        beq Done

        RestoreRegMS 19, X19
        RestoreRegMS 20, X20
        RestoreRegMS 21, X21
        RestoreRegMS 22, X22
        RestoreRegMS 23, X23
        RestoreRegMS 24, X24
        RestoreRegMS 25, X25
        RestoreRegMS 26, X26
        RestoreRegMS 27, X27
        RestoreRegMS 28, X28
        RestoreRegMS 29, X29

Done
        ; Its imperative that the return value of HelperMethodFrameRestoreState is zero
        ; as it is used in the state machine to loop until it becomes zero.
        ; Refer to HELPER_METHOD_FRAME_END macro for details.
        mov x0,#0
        ret lr

        LEAF_END

; ------------------------------------------------------------------
; The call in ndirect import precode points to this function.
        NESTED_ENTRY NDirectImportThunk

        PROLOG_SAVE_REG_PAIR           fp, lr, #-224!
        SAVE_ARGUMENT_REGISTERS        sp, 16
        SAVE_FLOAT_ARGUMENT_REGISTERS  sp, 96 

        mov     x0, x12
        bl      NDirectImportWorker
        mov     x12, x0

        ; pop the stack and restore original register state
        RESTORE_FLOAT_ARGUMENT_REGISTERS  sp, 96
        RESTORE_ARGUMENT_REGISTERS        sp, 16
        EPILOG_RESTORE_REG_PAIR           fp, lr, #224!

        ; If we got back from NDirectImportWorker, the MD has been successfully
        ; linked. Proceed to execute the original DLL call.
        EPILOG_BRANCH_REG x12

        NESTED_END

; ------------------------------------------------------------------
; The call in fixup precode initally points to this function.
; The purpose of this function is to load the MethodDesc and forward the call to prestub.
        NESTED_ENTRY PrecodeFixupThunk

        ; x12 = FixupPrecode *
        ; On Exit
        ; x12 = MethodDesc*
        ; x13, x14 Trashed
        ; Inline computation done by FixupPrecode::GetMethodDesc()
        ldrb    w13, [x12, #Offset_FixupPrecodeChunkIndex]            ; m_PrecodeChunkIndex
        ldrb    w14, [x12, #Offset_FixupPrecodeMethodDescChunkIndex]  ; m_MethodDescChunkIndex

        add     x12,x12,w13,uxtw #FixupPrecode_ALIGNMENT_SHIFT_1
        add     x13,x12,w13,uxtw #FixupPrecode_ALIGNMENT_SHIFT_2
        ldr     x13, [x13,#SIZEOF__FixupPrecode]
        add     x12,x13,w14,uxtw #MethodDesc_ALIGNMENT_SHIFT

        b ThePreStub

        NESTED_END

; ------------------------------------------------------------------
; The call in fixup precode initally points to this function.
; The purpose of this function is to load the MethodDesc and forward the call to prestub.
        NESTED_ENTRY PrecodeRelativeFixupThunk

        ; x11 = RelativeFixupPrecode *
        ; On Exit
        ; x12 = MethodDesc*
        ; x11, x13 Trashed
        ; Inline computation done by RelativeFixupPrecode::GetMethodDesc()
        ldrb    w12, [x11, #Offset_RelativeFixupPrecodeChunkIndex]            ; m_PrecodeChunkIndex
        ldrb    w13, [x11, #Offset_RelativeFixupPrecodeMethodDescChunkIndex]  ; m_MethodDescChunkIndex

        add     x11, x11, w12, uxtw #RelativeFixupPrecode_ALIGNMENT_SHIFT
        add     x11, x11, w12, uxtw #RelativeFixupPrecode_ALIGNMENT_SHIFT
        add     x12, x11, #SIZEOF__RelativeFixupPrecode                    ; GetBase()
        ldr     x11, [x12]                                                 ; base
        add     x12, x12, x11
        add     x12, x12, w13, uxtw #MethodDesc_ALIGNMENT_SHIFT

        b ThePreStub

        NESTED_END
; ------------------------------------------------------------------

        NESTED_ENTRY ThePreStub

        PROLOG_WITH_TRANSITION_BLOCK

        add         x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
        mov         x1, METHODDESC_REGISTER         ; pMethodDesc

        bl          PreStubWorker

        mov         x9, x0

        EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
        EPILOG_BRANCH_REG  x9

        NESTED_END

;; ------------------------------------------------------------------
;; ThePreStubPatch()

        LEAF_ENTRY ThePreStubPatch
        nop
ThePreStubPatchLabel
        EXPORT          ThePreStubPatchLabel
        ret             lr
        LEAF_END

;-----------------------------------------------------------------------------
; The following Macros help in WRITE_BARRIER Implemetations
    ; WRITE_BARRIER_ENTRY
    ;
    ; Declare the start of a write barrier function. Use similarly to NESTED_ENTRY. This is the only legal way
    ; to declare a write barrier function.
    ;
    MACRO
      WRITE_BARRIER_ENTRY $name

      LEAF_ENTRY $name
    MEND

    ; WRITE_BARRIER_END
    ;
    ; The partner to WRITE_BARRIER_ENTRY, used like NESTED_END.
    ;
    MACRO
      WRITE_BARRIER_END $__write_barrier_name 
      
      LEAF_END_MARKED $__write_barrier_name

    MEND

; ------------------------------------------------------------------
; Start of the writeable code region
    LEAF_ENTRY JIT_PatchedCodeStart
        ret      lr
    LEAF_END

;-----------------------------------------------------------------------------
; void JIT_UpdateWriteBarrierState(bool skipEphemeralCheck)
;
; Update shadow copies of the various state info required for barrier
;
; State info is contained in a literal pool at the end of the function
; Placed in text section so that it is close enough to use ldr literal and still
; be relocatable. Eliminates need for PREPARE_EXTERNAL_VAR in hot code.
;
; Align and group state info together so it fits in a single cache line
; and each entry can be written atomically
;
    WRITE_BARRIER_ENTRY JIT_UpdateWriteBarrierState
        PROLOG_SAVE_REG_PAIR   fp, lr, #-16!

        ; x0-x7 will contain intended new state
        ; x8 will preserve skipEphemeralCheck
        ; x12 will be used for pointers

        mov      x8, x0

        adrp     x12, g_card_table
        ldr      x0, [x12, g_card_table]

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
        adrp     x12, g_card_bundle_table
        ldr      x1, [x12, g_card_bundle_table]
#endif

#ifdef WRITE_BARRIER_CHECK
        adrp     x12, $g_GCShadow
        ldr      x2, [x12, $g_GCShadow]
#endif

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        adrp     x12, g_sw_ww_table
        ldr      x3, [x12, g_sw_ww_table]
#endif

        adrp     x12, g_ephemeral_low
        ldr      x4, [x12, g_ephemeral_low]

        adrp     x12, g_ephemeral_high
        ldr      x5, [x12, g_ephemeral_high]

        ; Check skipEphemeralCheck
        cbz      x8, EphemeralCheckEnabled
        movz     x4, #0
        movn     x5, #0

EphemeralCheckEnabled
        adrp     x12, g_lowest_address
        ldr      x6, [x12, g_lowest_address]

        adrp     x12, g_highest_address
        ldr      x7, [x12, g_highest_address]

        ; Update wbs state
        adr      x12, wbs_begin
        stp      x0, x1, [x12], 16
        stp      x2, x3, [x12], 16
        stp      x4, x5, [x12], 16
        stp      x6, x7, [x12], 16

        EPILOG_RESTORE_REG_PAIR fp, lr, #16!
        EPILOG_RETURN

        ; Begin patchable literal pool
        ALIGN 64  ; Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line

wbs_begin
wbs_card_table
        DCQ 0
wbs_card_bundle_table
        DCQ 0
wbs_GCShadow
        DCQ 0
wbs_sw_ww_table
        DCQ 0
wbs_ephemeral_low
        DCQ 0
wbs_ephemeral_high
        DCQ 0
wbs_lowest_address
        DCQ 0
wbs_highest_address
        DCQ 0

    WRITE_BARRIER_END JIT_UpdateWriteBarrierState

; ------------------------------------------------------------------
; End of the writeable code region
    LEAF_ENTRY JIT_PatchedCodeLast
        ret      lr
    LEAF_END

; void JIT_ByRefWriteBarrier
; On entry:
;   x13  : the source address (points to object reference to write)
;   x14  : the destination address (object reference written here)
;
; On exit:
;   x12  : trashed
;   x13  : incremented by 8
;   x14  : incremented by 8
;   x15  : trashed
;   x17  : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
;
    WRITE_BARRIER_ENTRY JIT_ByRefWriteBarrier

        ldr      x15, [x13], 8
        b        JIT_CheckedWriteBarrier

    WRITE_BARRIER_END JIT_ByRefWriteBarrier 

;-----------------------------------------------------------------------------
; Simple WriteBarriers
; void JIT_CheckedWriteBarrier(Object** dst, Object* src)
; On entry:
;   x14  : the destination address (LHS of the assignment)
;   x15  : the object reference (RHS of the assignment)
;
; On exit:
;   x12  : trashed
;   x14  : incremented by 8
;   x15  : trashed
;   x17  : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
;
    WRITE_BARRIER_ENTRY JIT_CheckedWriteBarrier
        ldr      x12,  wbs_lowest_address
        cmp      x14,  x12

        ldr      x12,  wbs_highest_address
        ccmphs   x14,  x12, #0x2
        blo      JIT_WriteBarrier

NotInHeap
        str      x15, [x14], 8
        ret      lr
    WRITE_BARRIER_END JIT_CheckedWriteBarrier

; void JIT_WriteBarrier(Object** dst, Object* src)
; On entry:
;   x14  : the destination address (LHS of the assignment)
;   x15  : the object reference (RHS of the assignment)
;
; On exit:
;   x12  : trashed
;   x14  : incremented by 8
;   x15  : trashed
;   x17  : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
;
    WRITE_BARRIER_ENTRY JIT_WriteBarrier
        stlr     x15, [x14]

#ifdef WRITE_BARRIER_CHECK
        ; Update GC Shadow Heap 

        ; Do not perform the work if g_GCShadow is 0
        ldr      x12, wbs_GCShadow
        cbz      x12, ShadowUpdateDisabled 

        ; need temporary register. Save before using.
        str      x13, [sp, #-16]!

        ; Compute address of shadow heap location:
        ;   pShadow = $g_GCShadow + (x14 - g_lowest_address)
        ldr      x13, wbs_lowest_address
        sub      x13, x14, x13
        add      x12, x13, x12

        ; if (pShadow >= $g_GCShadowEnd) goto end
        adrp     x13, $g_GCShadowEnd
        ldr      x13, [x13, $g_GCShadowEnd]
        cmp      x12, x13
        bhs      ShadowUpdateEnd

        ; *pShadow = x15
        str      x15, [x12]

        ; Ensure that the write to the shadow heap occurs before the read from the GC heap so that race
        ; conditions are caught by INVALIDGCVALUE.
        dmb      ish

        ; if ([x14] == x15) goto end
        ldr      x13, [x14]
        cmp      x13, x15
        beq ShadowUpdateEnd

        ; *pShadow = INVALIDGCVALUE (0xcccccccd)
        movz     x13, #0xcccd
        movk     x13, #0xcccc, LSL #16
        str      x13, [x12]

ShadowUpdateEnd
        ldr      x13, [sp], #16
ShadowUpdateDisabled
#endif

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
        ; Update the write watch table if necessary
        ldr      x12,  wbs_sw_ww_table
        cbz      x12,  CheckCardTable
        add      x12,  x12, x14, LSR #0xC  // SoftwareWriteWatch::AddressToTableByteIndexShift
        ldrb     w17,  [x12]
        cbnz     x17,  CheckCardTable
        mov      w17,  0xFF
        strb     w17,  [x12]
#endif

CheckCardTable
        ; Branch to Exit if the reference is not in the Gen0 heap
        ;
        adr      x12,  wbs_ephemeral_low
        ldp      x12,  x16, [x12]
        cbz      x12,  SkipEphemeralCheck

        cmp      x15,  x12
        blo      Exit

        cmp      x15,  x16
        bhi      Exit

SkipEphemeralCheck
        ; Check if we need to update the card table
        ldr      x12, wbs_card_table

        ; x15 := offset within card table
        lsr      x15, x14, #11

        ldrb     w16, [x12, x15]
        cmp      w16, 0xFF
        beq      Exit

UpdateCardTable
        mov      x16, 0xFF
        strb     w16, [x12, x15]

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
#error Need to implement for ARM64
#endif

Exit
        add      x14, x14, 8
        ret      lr
    WRITE_BARRIER_END JIT_WriteBarrier

#ifdef FEATURE_PREJIT
;------------------------------------------------
; VirtualMethodFixupStub
;
; In NGEN images, virtual slots inherited from cross-module dependencies
; point to a jump thunk that calls into the following function that will
; call into a VM helper. The VM helper is responsible for patching up
; thunk, upon executing the precode, so that all subsequent calls go directly
; to the actual method body.
;
; This is done lazily for performance reasons.
;
; On entry:
;
; x0 = "this" pointer
; x12 = Address of thunk

    NESTED_ENTRY VirtualMethodFixupStub

    ; Save arguments and return address
    PROLOG_SAVE_REG_PAIR           fp, lr, #-224!
    SAVE_ARGUMENT_REGISTERS        sp, 16
    SAVE_FLOAT_ARGUMENT_REGISTERS  sp, 96 

    ; Refer to ZapImportVirtualThunk::Save
    ; for details on this.
    ;
    ; Move the thunk start address in x1
    mov         x1, x12

    ; Call the helper in the VM to perform the actual fixup
    ; and tell us where to tail call. x0 already contains
    ; the this pointer.
    bl VirtualMethodFixupWorker
    ; On return, x0 contains the target to tailcall to
    mov         x12, x0

    ; pop the stack and restore original register state
    RESTORE_ARGUMENT_REGISTERS        sp, 16
    RESTORE_FLOAT_ARGUMENT_REGISTERS  sp, 96
    EPILOG_RESTORE_REG_PAIR           fp, lr, #224!

    PATCH_LABEL VirtualMethodFixupPatchLabel

    ; and tailcall to the actual method
    EPILOG_BRANCH_REG x12

    NESTED_END
#endif // FEATURE_PREJIT

;------------------------------------------------
; ExternalMethodFixupStub
;
; In NGEN images, calls to cross-module external methods initially
; point to a jump thunk that calls into the following function that will
; call into a VM helper. The VM helper is responsible for patching up the
; thunk, upon executing the precode, so that all subsequent calls go directly
; to the actual method body.
;
; This is done lazily for performance reasons.
;
; On entry:
;
; x12 = Address of thunk 

    NESTED_ENTRY ExternalMethodFixupStub

    PROLOG_WITH_TRANSITION_BLOCK

    add         x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
    mov         x1, x12                         ; pThunk

    bl          ExternalMethodFixupWorker

    ; mov the address we patched to in x12 so that we can tail call to it
    mov         x12, x0

    EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
    PATCH_LABEL ExternalMethodFixupPatchLabel
    EPILOG_BRANCH_REG   x12

    NESTED_END

; void SinglecastDelegateInvokeStub(Delegate *pThis)
    LEAF_ENTRY SinglecastDelegateInvokeStub
        cmp     x0, #0
        beq     LNullThis

        ldr     x16, [x0, #DelegateObject___methodPtr]
        ldr     x0, [x0, #DelegateObject___target]

        br      x16

LNullThis
        mov     x0, #CORINFO_NullReferenceException_ASM
        b       JIT_InternalThrow

    LEAF_END

#ifdef FEATURE_COMINTEROP

; ------------------------------------------------------------------
; setStubReturnValue
; w0 - size of floating point return value (MetaSig::GetFPReturnSize())
; x1 - pointer to the return buffer in the stub frame
    LEAF_ENTRY setStubReturnValue

        cbz     w0, NoFloatingPointRetVal

        ;; Float return case
        cmp     x0, #4
        bne     LNoFloatRetVal
        ldr     s0, [x1]
        ret
LNoFloatRetVal

        ;; Double return case
        cmp     w0, #8
        bne     LNoDoubleRetVal
        ldr     d0, [x1]
        ret
LNoDoubleRetVal

        ;; Float HFA return case
        cmp     w0, #16
        bne     LNoFloatHFARetVal
        ldp     s0, s1, [x1]
        ldp     s2, s3, [x1, #8]
        ret
LNoFloatHFARetVal

        ;;Double HFA return case
        cmp     w0, #32
        bne     LNoDoubleHFARetVal
        ldp     d0, d1, [x1]
        ldp     d2, d3, [x1, #16]
        ret
LNoDoubleHFARetVal

        ;;Vector HVA return case
        cmp     w3, #64
        bne     LNoVectorHVARetVal
        ldp     q0, q1, [x1]
        ldp     q2, q3, [x1, #32]
        ret
LNoVectorHVARetVal

        EMIT_BREAKPOINT ; Unreachable

NoFloatingPointRetVal

        ;; Restore the return value from retbuf
        ldr     x0, [x1]
        ldr     x1, [x1, #8]
        ret

    LEAF_END

; ------------------------------------------------------------------
; GenericComPlusCallStub that erects a ComPlusMethodFrame and calls into the runtime
; (CLRToCOMWorker) to dispatch rare cases of the interface call.
;
; On entry:
;   x0          : 'this' object
;   x12         : Interface MethodDesc*
;   plus user arguments in registers and on the stack
;
; On exit:
;   x0/x1/s0-s3/d0-d3 set to return value of the call as appropriate
;
    NESTED_ENTRY GenericComPlusCallStub

        PROLOG_WITH_TRANSITION_BLOCK ASM_ENREGISTERED_RETURNTYPE_MAXSIZE

        add         x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
        mov         x1, x12                         ; pMethodDesc

        ; Call CLRToCOMWorker(TransitionBlock *, ComPlusCallMethodDesc *). 
        ; This call will set up the rest of the frame (including the vfptr, the GS cookie and
        ; linking to the thread), make the client call and return with correct registers set 
        ; (x0/x1/s0-s3/d0-d3 as appropriate).

        bl          CLRToCOMWorker

        ; x0 = fpRetSize

        ; The return value is stored before float argument registers
        add         x1, sp, #(__PWTB_FloatArgumentRegisters - ASM_ENREGISTERED_RETURNTYPE_MAXSIZE)
        bl          setStubReturnValue

        EPILOG_WITH_TRANSITION_BLOCK_RETURN

    NESTED_END

; ------------------------------------------------------------------
; COM to CLR stub called the first time a particular method is invoked.
;
; On entry:
;   x12         : ComCallMethodDesc* provided by prepad thunk
;   plus user arguments in registers and on the stack
;
; On exit:
;   tail calls to real method
;
    NESTED_ENTRY ComCallPreStub

    GBLA ComCallPreStub_FrameSize
    GBLA ComCallPreStub_StackAlloc
    GBLA ComCallPreStub_FrameOffset
    GBLA ComCallPreStub_ErrorReturnOffset 
    GBLA ComCallPreStub_FirstStackAdjust

ComCallPreStub_FrameSize         SETA (SIZEOF__GSCookie + SIZEOF__ComMethodFrame)
ComCallPreStub_FirstStackAdjust  SETA (8 + SIZEOF__ArgumentRegisters + 2 * 8) ; x8, reg args , fp & lr already pushed
ComCallPreStub_StackAlloc        SETA ComCallPreStub_FrameSize - ComCallPreStub_FirstStackAdjust 
ComCallPreStub_StackAlloc        SETA ComCallPreStub_StackAlloc + SIZEOF__FloatArgumentRegisters + 8; 8 for ErrorReturn
    IF ComCallPreStub_StackAlloc:MOD:16 != 0
ComCallPreStub_StackAlloc     SETA ComCallPreStub_StackAlloc + 8
    ENDIF

ComCallPreStub_FrameOffset       SETA (ComCallPreStub_StackAlloc - (SIZEOF__ComMethodFrame - ComCallPreStub_FirstStackAdjust))
ComCallPreStub_ErrorReturnOffset SETA SIZEOF__FloatArgumentRegisters

    IF (ComCallPreStub_FirstStackAdjust):MOD:16 != 0
ComCallPreStub_FirstStackAdjust     SETA ComCallPreStub_FirstStackAdjust + 8
    ENDIF

    ; Save arguments and return address
    PROLOG_SAVE_REG_PAIR           fp, lr, #-ComCallPreStub_FirstStackAdjust!
    PROLOG_STACK_ALLOC  ComCallPreStub_StackAlloc 

    SAVE_ARGUMENT_REGISTERS        sp, (16+ComCallPreStub_StackAlloc)

    SAVE_FLOAT_ARGUMENT_REGISTERS  sp, 0

    str x12, [sp, #(ComCallPreStub_FrameOffset + UnmanagedToManagedFrame__m_pvDatum)]
    add x0, sp, #(ComCallPreStub_FrameOffset)
    add x1, sp, #(ComCallPreStub_ErrorReturnOffset)
    bl ComPreStubWorker

    cbz x0, ComCallPreStub_ErrorExit

    mov x12, x0

    ; pop the stack and restore original register state
    RESTORE_FLOAT_ARGUMENT_REGISTERS  sp, 0
    RESTORE_ARGUMENT_REGISTERS        sp, (16+ComCallPreStub_StackAlloc)

    EPILOG_STACK_FREE ComCallPreStub_StackAlloc
    EPILOG_RESTORE_REG_PAIR           fp, lr, #ComCallPreStub_FirstStackAdjust!

    ; and tailcall to the actual method
    EPILOG_BRANCH_REG x12
    
ComCallPreStub_ErrorExit
    ldr x0, [sp, #(ComCallPreStub_ErrorReturnOffset)] ; ErrorReturn
    
    ; pop the stack
    EPILOG_STACK_FREE ComCallPreStub_StackAlloc
    EPILOG_RESTORE_REG_PAIR           fp, lr, #ComCallPreStub_FirstStackAdjust!

    EPILOG_RETURN

    NESTED_END

; ------------------------------------------------------------------
; COM to CLR stub which sets up a ComMethodFrame and calls COMToCLRWorker.
;
; On entry:
;   x12         : ComCallMethodDesc*  provided by prepad thunk
;   plus user arguments in registers and on the stack
;
; On exit:
;   Result in x0/d0 as per the real method being called
;
    NESTED_ENTRY GenericComCallStub

    GBLA GenericComCallStub_FrameSize
    GBLA GenericComCallStub_StackAlloc
    GBLA GenericComCallStub_FrameOffset
    GBLA GenericComCallStub_FirstStackAdjust

GenericComCallStub_FrameSize         SETA (SIZEOF__GSCookie + SIZEOF__ComMethodFrame)
GenericComCallStub_FirstStackAdjust  SETA (8 + SIZEOF__ArgumentRegisters + 2 * 8)
GenericComCallStub_StackAlloc        SETA GenericComCallStub_FrameSize - GenericComCallStub_FirstStackAdjust
GenericComCallStub_StackAlloc        SETA GenericComCallStub_StackAlloc + SIZEOF__FloatArgumentRegisters

    IF (GenericComCallStub_StackAlloc):MOD:16 != 0
GenericComCallStub_StackAlloc     SETA GenericComCallStub_StackAlloc + 8
    ENDIF

GenericComCallStub_FrameOffset       SETA (GenericComCallStub_StackAlloc - (SIZEOF__ComMethodFrame - GenericComCallStub_FirstStackAdjust))

    IF (GenericComCallStub_FirstStackAdjust):MOD:16 != 0
GenericComCallStub_FirstStackAdjust     SETA GenericComCallStub_FirstStackAdjust + 8
    ENDIF


    ; Save arguments and return address
    PROLOG_SAVE_REG_PAIR           fp, lr, #-GenericComCallStub_FirstStackAdjust!
    PROLOG_STACK_ALLOC  GenericComCallStub_StackAlloc 

    SAVE_ARGUMENT_REGISTERS        sp, (16+GenericComCallStub_StackAlloc)
    SAVE_FLOAT_ARGUMENT_REGISTERS  sp, 0

    str x12, [sp, #(GenericComCallStub_FrameOffset + UnmanagedToManagedFrame__m_pvDatum)]
    add x1, sp, #GenericComCallStub_FrameOffset
    bl COMToCLRWorker
    
    ; pop the stack
    EPILOG_STACK_FREE GenericComCallStub_StackAlloc
    EPILOG_RESTORE_REG_PAIR           fp, lr, #GenericComCallStub_FirstStackAdjust!

    EPILOG_RETURN

    NESTED_END

; ------------------------------------------------------------------
; COM to CLR stub called from COMToCLRWorker that actually dispatches to the real managed method.
;
; On entry:
;   x0          : dwStackSlots, count of argument stack slots to copy
;   x1          : pFrame, ComMethodFrame pushed by GenericComCallStub above
;   x2          : pTarget, address of code to call
;   x3          : pSecretArg, hidden argument passed to target above in x12
;   x4          : pDangerousThis, managed 'this' reference
;
; On exit:
;   Result in x0/d0 as per the real method being called
;
    NESTED_ENTRY COMToCLRDispatchHelper,,CallDescrWorkerUnwindFrameChainHandler

    PROLOG_SAVE_REG_PAIR           fp, lr, #-16!

    cbz x0, COMToCLRDispatchHelper_RegSetup

    add x9, x1, #SIZEOF__ComMethodFrame

    ; Compute number of 8 bytes slots to copy. This is done by rounding up the
    ; dwStackSlots value to the nearest even value
    add x0, x0, #1
    bic x0, x0, #1

    ; Compute how many slots to adjust the address to copy from. Since we
    ; are copying 16 bytes at a time, adjust by -1 from the rounded value
    sub x6, x0, #1
    add x9, x9, x6, LSL #3

COMToCLRDispatchHelper_StackLoop
    ldp     x7, x8, [x9], #-16  ; post-index
    stp     x7, x8, [sp, #-16]! ; pre-index
    subs    x0, x0, #2
    bne     COMToCLRDispatchHelper_StackLoop
    
COMToCLRDispatchHelper_RegSetup

    ; We need an aligned offset for restoring float args, so do the subtraction into
    ; a scratch register
    sub     x5, x1, GenericComCallStub_FrameOffset
    RESTORE_FLOAT_ARGUMENT_REGISTERS x5, 0

    mov lr, x2
    mov x12, x3

    mov x0, x4

    ldp x2, x3, [x1, #(SIZEOF__ComMethodFrame - SIZEOF__ArgumentRegisters + 16)]
    ldp x4, x5, [x1, #(SIZEOF__ComMethodFrame - SIZEOF__ArgumentRegisters + 32)]
    ldp x6, x7, [x1, #(SIZEOF__ComMethodFrame - SIZEOF__ArgumentRegisters + 48)]
    ldr x8, [x1, #(SIZEOF__ComMethodFrame - SIZEOF__ArgumentRegisters - 8)]

    ldr x1, [x1, #(SIZEOF__ComMethodFrame - SIZEOF__ArgumentRegisters + 8)]

    blr lr
    
    EPILOG_STACK_RESTORE
    EPILOG_RESTORE_REG_PAIR           fp, lr, #16!
    EPILOG_RETURN
    
    NESTED_END

#endif ; FEATURE_COMINTEROP

;
; x12 = UMEntryThunk*
;
    NESTED_ENTRY TheUMEntryPrestub,,UMEntryPrestubUnwindFrameChainHandler

    ; Save arguments and return address
    PROLOG_SAVE_REG_PAIR           fp, lr, #-224!
    SAVE_ARGUMENT_REGISTERS        sp, 16
    SAVE_FLOAT_ARGUMENT_REGISTERS  sp, 96

    mov x0, x12
    bl  TheUMEntryPrestubWorker

    ; save real target address in x12.
    mov x12, x0

    ; pop the stack and restore original register state
    RESTORE_ARGUMENT_REGISTERS        sp, 16
    RESTORE_FLOAT_ARGUMENT_REGISTERS  sp, 96
    EPILOG_RESTORE_REG_PAIR           fp, lr, #224!

    ; and tailcall to the actual method
    EPILOG_BRANCH_REG x12

    NESTED_END

;
; x12 = UMEntryThunk*
;
    NESTED_ENTRY UMThunkStub,,UMThunkStubUnwindFrameChainHandler

    ; Save arguments and return address
    PROLOG_SAVE_REG_PAIR           fp, lr, #-112! ; 72 for regArgs, 8 for x19 & 8 for x12 & 8 for 16-byte align
    ; save callee saved reg x19. x19 is used in the method to store thread*
    PROLOG_SAVE_REG                x19, #96

    SAVE_ARGUMENT_REGISTERS        sp, 16

    GBLA UMThunkStub_HiddenArg ; offset of saved UMEntryThunk *
    GBLA UMThunkStub_StackArgs ; offset of original stack args (total size of UMThunkStub frame)
UMThunkStub_HiddenArg SETA 88
UMThunkStub_StackArgs SETA 112

    ; save UMEntryThunk*
    str                 x12, [sp, #UMThunkStub_HiddenArg]

    ; x0 = GetThread(). Trashes x19
    INLINE_GETTHREAD    x0, x19
    cbz                 x0, UMThunkStub_DoThreadSetup

UMThunkStub_HaveThread
    mov                 x19, x0                  ; x19 = Thread *

    mov                 x9, 1
    ; m_fPreemptiveGCDisabled is 4 byte field so using 32-bit variant
    str                 w9, [x19, #Thread__m_fPreemptiveGCDisabled]

    ldr                 x2, =g_TrapReturningThreads
    ldr                 x3, [x2]
    ; assuming x0 contains Thread* before jumping to UMThunkStub_DoTrapReturningThreads
    cbnz                x3, UMThunkStub_DoTrapReturningThreads

UMThunkStub_InCooperativeMode
    ldr                 x12, [fp, #UMThunkStub_HiddenArg] ; x12 = UMEntryThunk*
    ldr                 x3, [x12, #UMEntryThunk__m_pUMThunkMarshInfo] ; x3 = m_pUMThunkMarshInfo

    ; m_cbActualArgSize is UINT32 and hence occupies 4 bytes
    ldr                 w2, [x3, #UMThunkMarshInfo__m_cbActualArgSize] ; w2 = Stack arg bytes
    cbz                 w2, UMThunkStub_RegArgumentsSetup

    ; extend to 64-bits
    uxtw                x2, w2

    ; Source pointer
    add                 x0, fp, #UMThunkStub_StackArgs

    ; move source pointer to end of Stack Args
    add                 x0, x0, x2 

    ; Count of stack slot pairs to copy (divide by 16)
    lsr                 x1, x2, #4

    ; Is there an extra stack slot (can happen when stack arg bytes not multiple of 16)
    and                 x2, x2, #8

    ; If yes then start source pointer from 16 byte aligned stack slot
    add                 x0, x0, x2      

    ; increment stack slot pair count by 1 if x2 is not zero
    add                 x1, x1, x2, LSR #3 

UMThunkStub_StackLoop
    ldp                 x4, x5, [x0, #-16]! ; pre-Index
    stp                 x4, x5, [sp, #-16]! ; pre-Index
    subs                x1, x1, #1
    bne                 UMThunkStub_StackLoop

UMThunkStub_RegArgumentsSetup
    ldr                 x16, [x3, #UMThunkMarshInfo__m_pILStub]

    RESTORE_ARGUMENT_REGISTERS        fp, 16
    
    blr                 x16

UMThunkStub_PostCall
    mov                 x4, 0
    ; m_fPreemptiveGCDisabled is 4 byte field so using 32-bit variant
    str                 w4, [x19, #Thread__m_fPreemptiveGCDisabled]

    EPILOG_STACK_RESTORE
    EPILOG_RESTORE_REG                x19, #96
    EPILOG_RESTORE_REG_PAIR           fp, lr, #112!

    EPILOG_RETURN

UMThunkStub_DoThreadSetup
    sub                 sp, sp, #SIZEOF__FloatArgumentRegisters
    SAVE_FLOAT_ARGUMENT_REGISTERS  sp, 0
    bl                  CreateThreadBlockThrow
    RESTORE_FLOAT_ARGUMENT_REGISTERS  sp, 0
    add                 sp, sp, #SIZEOF__FloatArgumentRegisters
    b                   UMThunkStub_HaveThread

UMThunkStub_DoTrapReturningThreads
    sub                 sp, sp, #SIZEOF__FloatArgumentRegisters
    SAVE_FLOAT_ARGUMENT_REGISTERS  sp, 0
    ; x0 already contains Thread* pThread
    ; UMEntryThunk* pUMEntry
    ldr                 x1, [fp, #UMThunkStub_HiddenArg]
    bl                  UMThunkStubRareDisableWorker
    RESTORE_FLOAT_ARGUMENT_REGISTERS  sp, 0
    add                 sp, sp, #SIZEOF__FloatArgumentRegisters
    b                   UMThunkStub_InCooperativeMode

    NESTED_END

    INLINE_GETTHREAD_CONSTANT_POOL
    
#ifdef FEATURE_HIJACK
; ------------------------------------------------------------------
; Hijack function for functions which return a scalar type or a struct (value type)
    NESTED_ENTRY OnHijackTripThread
    PROLOG_SAVE_REG_PAIR   fp, lr, #-176!
    ; Spill callee saved registers 
    PROLOG_SAVE_REG_PAIR   x19, x20, #16
    PROLOG_SAVE_REG_PAIR   x21, x22, #32
    PROLOG_SAVE_REG_PAIR   x23, x24, #48
    PROLOG_SAVE_REG_PAIR   x25, x26, #64
    PROLOG_SAVE_REG_PAIR   x27, x28, #80

    ; save any integral return value(s)
    stp x0, x1, [sp, #96]

    ; save any FP/HFA/HVA return value(s)
    stp q0, q1, [sp, #112]
    stp q2, q3, [sp, #144]

    mov x0, sp
    bl OnHijackWorker
	
    ; restore any integral return value(s)
    ldp x0, x1, [sp, #96]

    ; restore any FP/HFA/HVA return value(s)
    ldp q0, q1, [sp, #112]
    ldp q2, q3, [sp, #144]

    EPILOG_RESTORE_REG_PAIR   x19, x20, #16
    EPILOG_RESTORE_REG_PAIR   x21, x22, #32
    EPILOG_RESTORE_REG_PAIR   x23, x24, #48
    EPILOG_RESTORE_REG_PAIR   x25, x26, #64
    EPILOG_RESTORE_REG_PAIR   x27, x28, #80
    EPILOG_RESTORE_REG_PAIR   fp, lr,   #176!
    EPILOG_RETURN
    NESTED_END

#endif ; FEATURE_HIJACK

;; ------------------------------------------------------------------
;; Redirection Stub for GC in fully interruptible method
        GenerateRedirectedHandledJITCaseStub GCThreadControl
;; ------------------------------------------------------------------
        GenerateRedirectedHandledJITCaseStub DbgThreadControl
;; ------------------------------------------------------------------
        GenerateRedirectedHandledJITCaseStub UserSuspend

#ifdef _DEBUG
; ------------------------------------------------------------------
; Redirection Stub for GC Stress
        GenerateRedirectedHandledJITCaseStub GCStress
#endif


; ------------------------------------------------------------------

        ; This helper enables us to call into a funclet after restoring Fp register
        NESTED_ENTRY CallEHFunclet
        ; On entry:
        ;
        ; X0 = throwable        
        ; X1 = PC to invoke
        ; X2 = address of X19 register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame
        ; X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved.
        ;

        ; Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,lr, #-16!
        ; is intentional. Above statement would also emit instruction to save
        ; sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body
        ; of method. However, this method needs to be able to change fp before calling funclet.
        ; This is required to access locals in funclet.
        PROLOG_SAVE_REG_PAIR_NO_FP fp,lr, #-96!

        ; Spill callee saved registers
        PROLOG_SAVE_REG_PAIR   x19, x20, 16
        PROLOG_SAVE_REG_PAIR   x21, x22, 32
        PROLOG_SAVE_REG_PAIR   x23, x24, 48
        PROLOG_SAVE_REG_PAIR   x25, x26, 64
        PROLOG_SAVE_REG_PAIR   x27, x28, 80

        ; Save the SP of this function. We cannot store SP directly.
        mov fp, sp
        str fp, [x3]

        ldp x19, x20, [x2, #0]
        ldp x21, x22, [x2, #16]
        ldp x23, x24, [x2, #32]
        ldp x25, x26, [x2, #48]
        ldp x27, x28, [x2, #64]
        ldr fp, [x2, #80] ; offset of fp in CONTEXT relative to X19

        ; Invoke the funclet
        blr x1
        nop

        EPILOG_RESTORE_REG_PAIR   x19, x20, 16
        EPILOG_RESTORE_REG_PAIR   x21, x22, 32
        EPILOG_RESTORE_REG_PAIR   x23, x24, 48
        EPILOG_RESTORE_REG_PAIR   x25, x26, 64
        EPILOG_RESTORE_REG_PAIR   x27, x28, 80
        EPILOG_RESTORE_REG_PAIR   fp, lr, #96!
        EPILOG_RETURN

        NESTED_END CallEHFunclet

        ; This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the 
        ; frame pointer for accessing the locals in the parent method.
        NESTED_ENTRY CallEHFilterFunclet

        PROLOG_SAVE_REG_PAIR   fp, lr, #-16!

        ; On entry:
        ;
        ; X0 = throwable        
        ; X1 = SP of the caller of the method/funclet containing the filter
        ; X2 = PC to invoke
        ; X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved.
        ;
        ; Save the SP of this function
        str fp, [x3]
        ; Invoke the filter funclet
        blr x2

        EPILOG_RESTORE_REG_PAIR   fp, lr,   #16!
        EPILOG_RETURN

        NESTED_END CallEHFilterFunclet


        GBLA FaultingExceptionFrame_StackAlloc
        GBLA FaultingExceptionFrame_FrameOffset

FaultingExceptionFrame_StackAlloc         SETA (SIZEOF__GSCookie + SIZEOF__FaultingExceptionFrame)
FaultingExceptionFrame_FrameOffset        SETA  SIZEOF__GSCookie

        MACRO
        GenerateRedirectedStubWithFrame $STUB, $TARGET

        ; 
        ; This is the primary function to which execution will be redirected to.
        ;
        NESTED_ENTRY $STUB

        ;
        ; IN: lr: original IP before redirect
        ;

        PROLOG_SAVE_REG_PAIR    fp, lr, #-16!
        PROLOG_STACK_ALLOC  FaultingExceptionFrame_StackAlloc

        ; At this point, the stack maybe misaligned if the thread abort was asynchronously
        ; triggered in the prolog or epilog of the managed method. For such a case, we must
        ; align the stack before calling into the VM.
        ;
        ; Runtime check for 16-byte alignment. 
        mov x0, sp
        and x0, x0, #15
        sub sp, sp, x0

        ; Save pointer to FEF for GetFrameFromRedirectedStubStackFrame
        add x19, sp, #FaultingExceptionFrame_FrameOffset

        ; Prepare to initialize to NULL
        mov x1,#0
        str x1, [x19]                                                        ; Initialize vtbl (it is not strictly necessary)
        str x1, [x19, #FaultingExceptionFrame__m_fFilterExecuted]            ; Initialize BOOL for personality routine

        mov x0, x19       ; move the ptr to FEF in X0

        bl            $TARGET

        ; Target should not return.
        EMIT_BREAKPOINT

        NESTED_END $STUB

        MEND


; ------------------------------------------------------------------
;
; Helpers for async (NullRef, AccessViolation) exceptions
;

        NESTED_ENTRY NakedThrowHelper2,,FixContextHandler
        PROLOG_SAVE_REG_PAIR fp,lr, #-16!

        ; On entry:
        ;
        ; X0 = Address of FaultingExceptionFrame
        bl LinkFrameAndThrow

        ; Target should not return.
        EMIT_BREAKPOINT

        NESTED_END NakedThrowHelper2


        GenerateRedirectedStubWithFrame NakedThrowHelper, NakedThrowHelper2

; ------------------------------------------------------------------
; ResolveWorkerChainLookupAsmStub
;
; This method will perform a quick chained lookup of the entry if the 
;  initial cache lookup fails.
; 
; On Entry:  
;   x9        contains the pointer to the current ResolveCacheElem
;   x11       contains the address of the indirection (and the flags in the low two bits)
;   x12       contains our contract the DispatchToken
; Must be preserved:
;   x0        contains the instance object ref that we are making an interface call on
;   x9        Must point to a ResolveCacheElem [For Sanity]
;  [x1-x7]    contains any additional register arguments for the interface method
;
; Loaded from x0 
;   x13       contains our type     the MethodTable  (from object ref in x0)
; 
; On Exit:
;   x0, [x1-x7] arguments for the interface implementation target
; 
; On Exit (to ResolveWorkerAsmStub):  
;   x11       contains the address of the indirection and the flags in the low two bits.
;   x12       contains our contract (DispatchToken)
;   x16,x17   will be trashed
; 
    GBLA BACKPATCH_FLAG      ; two low bit flags used by ResolveWorkerAsmStub
    GBLA PROMOTE_CHAIN_FLAG  ; two low bit flags used by ResolveWorkerAsmStub
BACKPATCH_FLAG      SETA  1
PROMOTE_CHAIN_FLAG  SETA  2
        
    NESTED_ENTRY ResolveWorkerChainLookupAsmStub

        tst     x11, #BACKPATCH_FLAG    ; First we check if x11 has the BACKPATCH_FLAG set
        bne     Fail                    ; If the BACKPATCH_FLAGS is set we will go directly to the ResolveWorkerAsmStub
        
        ldr     x13, [x0]         ; retrieve the MethodTable from the object ref in x0
MainLoop 
        ldr     x9, [x9, #ResolveCacheElem__pNext]     ; x9 <= the next entry in the chain
        cmp     x9, #0
        beq     Fail

        ldp     x16, x17, [x9]
        cmp     x16, x13          ; compare our MT with the one in the ResolveCacheElem
        bne     MainLoop
        
        cmp     x17, x12          ; compare our DispatchToken with one in the ResolveCacheElem
        bne     MainLoop
        
Success         
        ldr     x13, =g_dispatch_cache_chain_success_counter
        ldr     x16, [x13]
        subs    x16, x16, #1
        str     x16, [x13]
        blt     Promote

        ldr     x16, [x9, #ResolveCacheElem__target]    ; get the ImplTarget
        br      x16               ; branch to interface implemenation target
        
Promote
                                  ; Move this entry to head postion of the chain
        mov     x16, #256
        str     x16, [x13]        ; be quick to reset the counter so we don't get a bunch of contending threads
        orr     x11, x11, #PROMOTE_CHAIN_FLAG   ; set PROMOTE_CHAIN_FLAG 
        mov     x12, x9           ; We pass the ResolveCacheElem to ResolveWorkerAsmStub instead of the DispatchToken

Fail           
        b       ResolveWorkerAsmStub ; call the ResolveWorkerAsmStub method to transition into the VM
    
    NESTED_END ResolveWorkerChainLookupAsmStub

;; ------------------------------------------------------------------
;; void ResolveWorkerAsmStub(args in regs x0-x7 & stack and possibly retbuf arg in x8, x11:IndirectionCellAndFlags, x12:DispatchToken)
;;
;; The stub dispatch thunk which transfers control to VSD_ResolveWorker.
        NESTED_ENTRY ResolveWorkerAsmStub

        PROLOG_WITH_TRANSITION_BLOCK

        add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
        and x1, x11, #-4 ; Indirection cell
        mov x2, x12 ; DispatchToken
        and x3, x11, #3 ; flag
        bl VSD_ResolveWorker
        mov x9, x0
       
        EPILOG_WITH_TRANSITION_BLOCK_TAILCALL

        EPILOG_BRANCH_REG  x9

        NESTED_END

#ifdef FEATURE_READYTORUN

    NESTED_ENTRY DelayLoad_MethodCall
    PROLOG_WITH_TRANSITION_BLOCK

    add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
    mov x1, x11 ; Indirection cell
    mov x2, x9 ; sectionIndex
    mov x3, x10 ; Module*
    bl ExternalMethodFixupWorker
    mov x12, x0
    
    EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
    ; Share patch label
    b ExternalMethodFixupPatchLabel
    NESTED_END

    MACRO
        DynamicHelper $frameFlags, $suffix

        NESTED_ENTRY DelayLoad_Helper$suffix
        
        PROLOG_WITH_TRANSITION_BLOCK

        add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
        mov x1, x11 ; Indirection cell
        mov x2, x9 ; sectionIndex
        mov x3, x10 ; Module*		
        mov x4, $frameFlags
        bl DynamicHelperWorker
        cbnz x0, %FT0
        ldr x0, [sp, #__PWTB_ArgumentRegister_FirstArg]
        EPILOG_WITH_TRANSITION_BLOCK_RETURN
0
        mov x12, x0
        EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
        EPILOG_BRANCH_REG  x12
        NESTED_END
    MEND

    DynamicHelper DynamicHelperFrameFlags_Default
    DynamicHelper DynamicHelperFrameFlags_ObjectArg, _Obj
    DynamicHelper DynamicHelperFrameFlags_ObjectArg | DynamicHelperFrameFlags_ObjectArg2, _ObjObj
#endif // FEATURE_READYTORUN		
        
#ifdef FEATURE_PREJIT
;; ------------------------------------------------------------------
;; void StubDispatchFixupStub(args in regs x0-x7 & stack and possibly retbuff arg in x8, x11:IndirectionCellAndFlags)
;;
;; The stub dispatch thunk which transfers control to StubDispatchFixupWorker.
        NESTED_ENTRY StubDispatchFixupStub

        PROLOG_WITH_TRANSITION_BLOCK

        add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
        and x1, x11, #-4 ; Indirection cell
        mov x2, #0 ; sectionIndex
        mov x3, #0 ; pModule
        bl StubDispatchFixupWorker
        mov x12, x0

        EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
        PATCH_LABEL StubDispatchFixupPatchLabel
        EPILOG_BRANCH_REG  x12

        NESTED_END
#endif

#ifdef FEATURE_COMINTEROP
; ------------------------------------------------------------------
; Function used by COM interop to get floating point return value (since it's not in the same
; register(s) as non-floating point values).
;
; On entry;
;   x0          : size of the FP result (4 or 8 bytes)
;   x1          : pointer to 64-bit buffer to receive result
;
; On exit:
;   buffer pointed to by x1 on entry contains the float or double argument as appropriate
;
    LEAF_ENTRY getFPReturn
    str d0, [x1]
    LEAF_END

; ------------------------------------------------------------------
; Function used by COM interop to set floating point return value (since it's not in the same
; register(s) as non-floating point values).
;
; On entry:
;   x0          : size of the FP result (4 or 8 bytes)
;   x1          : 32-bit or 64-bit FP result
;
; On exit:
;   s0          : float result if x0 == 4
;   d0          : double result if x0 == 8
;
    LEAF_ENTRY setFPReturn
    fmov d0, x1
    LEAF_END
#endif

;
; JIT Static access helpers when coreclr host specifies single appdomain flag 
;

; ------------------------------------------------------------------
; void* JIT_GetSharedNonGCStaticBase(SIZE_T moduleDomainID, DWORD dwClassDomainID)

    LEAF_ENTRY JIT_GetSharedNonGCStaticBase_SingleAppDomain
    ; If class is not initialized, bail to C++ helper
    add x2, x0, #DomainLocalModule__m_pDataBlob
    ldrb w2, [x2, w1]
    tst w2, #1
    beq CallHelper1

    ret lr

CallHelper1
    ; Tail call JIT_GetSharedNonGCStaticBase_Helper
    b JIT_GetSharedNonGCStaticBase_Helper
    LEAF_END


; ------------------------------------------------------------------
; void* JIT_GetSharedNonGCStaticBaseNoCtor(SIZE_T moduleDomainID, DWORD dwClassDomainID)

    LEAF_ENTRY JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain
    ret lr
    LEAF_END


; ------------------------------------------------------------------
; void* JIT_GetSharedGCStaticBase(SIZE_T moduleDomainID, DWORD dwClassDomainID)

    LEAF_ENTRY JIT_GetSharedGCStaticBase_SingleAppDomain
    ; If class is not initialized, bail to C++ helper
    add x2, x0, #DomainLocalModule__m_pDataBlob
    ldrb w2, [x2, w1]
    tst w2, #1
    beq CallHelper2

    ldr x0, [x0, #DomainLocalModule__m_pGCStatics]
    ret lr

CallHelper2
    ; Tail call Jit_GetSharedGCStaticBase_Helper
    b JIT_GetSharedGCStaticBase_Helper
    LEAF_END


; ------------------------------------------------------------------
; void* JIT_GetSharedGCStaticBaseNoCtor(SIZE_T moduleDomainID, DWORD dwClassDomainID)

    LEAF_ENTRY JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain
    ldr x0, [x0, #DomainLocalModule__m_pGCStatics]
    ret lr
    LEAF_END

; ------------------------------------------------------------------
;__declspec(naked) void F_CALL_CONV JIT_Stelem_Ref(PtrArray* array, unsigned idx, Object* val)
    LEAF_ENTRY JIT_Stelem_Ref
    ; We retain arguments as they were passed and use x0 == array x1 == idx x2 == val

    ; check for null array
    cbz     x0, ThrowNullReferenceException

    ; idx bounds check
    ldr     x3,[x0,#ArrayBase__m_NumComponents]
    cmp     x3, x1
    bls     ThrowIndexOutOfRangeException

    ; fast path to null assignment (doesn't need any write-barriers)
    cbz     x2, AssigningNull

    ; Verify the array-type and val-type matches before writing
    ldr     x12, [x0] ; x12 = array MT
    ldr     x3, [x2] ; x3 = val->GetMethodTable()
    ldr     x12, [x12, #MethodTable__m_ElementType] ; array->GetArrayElementTypeHandle()
    cmp     x3, x12
    beq     JIT_Stelem_DoWrite

    ; Types didnt match but allow writing into an array of objects
    ldr     x3, =$g_pObjectClass
    ldr     x3, [x3]  ; x3 = *g_pObjectClass
    cmp     x3, x12   ; array type matches with Object*
    beq     JIT_Stelem_DoWrite

    ; array type and val type do not exactly match. Raise frame and do detailed match
    b       JIT_Stelem_Ref_NotExactMatch

AssigningNull
    ; Assigning null doesn't need write barrier
    add     x0, x0, x1, LSL #3           ; x0 = x0 + (x1 x 8) = array->m_array[idx]
    str     x2, [x0, #PtrArray__m_Array] ; array->m_array[idx] = val
    ret

ThrowNullReferenceException
    ; Tail call JIT_InternalThrow(NullReferenceException)
    ldr     x0, =CORINFO_NullReferenceException_ASM
    b       JIT_InternalThrow

ThrowIndexOutOfRangeException
    ; Tail call JIT_InternalThrow(NullReferenceException)
    ldr     x0, =CORINFO_IndexOutOfRangeException_ASM
    b       JIT_InternalThrow

   LEAF_END 

; ------------------------------------------------------------------
; __declspec(naked) void F_CALL_CONV JIT_Stelem_Ref_NotExactMatch(PtrArray* array,
;                                                       unsigned idx, Object* val)
;   x12 = array->GetArrayElementTypeHandle()
;
    NESTED_ENTRY JIT_Stelem_Ref_NotExactMatch    
    PROLOG_SAVE_REG_PAIR           fp, lr, #-48!    
    stp     x0, x1, [sp, #16]
    str     x2, [sp, #32]

    ; allow in case val can be casted to array element type
    ; call ObjIsInstanceOfNoGC(val, array->GetArrayElementTypeHandle())
    mov     x1, x12 ; array->GetArrayElementTypeHandle()
    mov     x0, x2
    bl      ObjIsInstanceOfNoGC
    cmp     x0, TypeHandle_CanCast
    beq     DoWrite             ; ObjIsInstance returned TypeHandle::CanCast

    ; check via raising frame
NeedFrame
    add     x1, sp, #16             ; x1 = &array
    add     x0, sp, #32             ; x0 = &val

    bl      ArrayStoreCheck ; ArrayStoreCheck(&val, &array)

DoWrite        
    ldp     x0, x1, [sp, #16]
    ldr     x2, [sp, #32]	
    EPILOG_RESTORE_REG_PAIR           fp, lr, #48!
    EPILOG_BRANCH JIT_Stelem_DoWrite    
    NESTED_END 

; ------------------------------------------------------------------
; __declspec(naked) void F_CALL_CONV JIT_Stelem_DoWrite(PtrArray* array, unsigned idx, Object* val)
    LEAF_ENTRY  JIT_Stelem_DoWrite

    ; Setup args for JIT_WriteBarrier. x14 = &array->m_array[idx] x15 = val
    add     x14, x0, #PtrArray__m_Array ; x14 = &array->m_array
    add     x14, x14, x1, LSL #3
    mov     x15, x2                     ; x15 = val

    ; Branch to the write barrier (which is already correctly overwritten with
    ; single or multi-proc code based on the current CPU
    b       JIT_WriteBarrier
    LEAF_END

#ifdef PROFILING_SUPPORTED

; ------------------------------------------------------------------
; void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
   LEAF_ENTRY  JIT_ProfilerEnterLeaveTailcallStub
   ret      lr
   LEAF_END

 #define PROFILE_ENTER    1
 #define PROFILE_LEAVE    2
 #define PROFILE_TAILCALL 4
 #define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 256

; ------------------------------------------------------------------
    MACRO
    GenerateProfileHelper $helper, $flags

    LCLS __HelperNakedFuncName
__HelperNakedFuncName SETS "$helper":CC:"Naked"
    IMPORT $helper

    NESTED_ENTRY $__HelperNakedFuncName
        ; On entry:
        ;   x10 = functionIDOrClientID
        ;   x11 = profiledSp
        ;   x12 = throwable
        ;
        ; On exit:
        ;   Values of x0-x8, q0-q7, fp are preserved.
        ;   Values of other volatile registers are not preserved.

        PROLOG_SAVE_REG_PAIR fp, lr, -SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA! ; Allocate space and save Fp, Pc.
        SAVE_ARGUMENT_REGISTERS sp, 16          ; Save x8 and argument registers (x0-x7).
        str     xzr, [sp, #88]                  ; Clear functionId.
        SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96    ; Save floating-point/SIMD registers (q0-q7).
        add     x12, fp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA ; Compute probeSp - initial value of Sp on entry to the helper.
        stp     x12, x11, [sp, #224]            ; Save probeSp, profiledSp.
        str     xzr, [sp, #240]                 ; Clear hiddenArg.
        mov     w12, $flags
        stp     w12, wzr, [sp, #248]            ; Save flags and clear unused field.

        mov     x0, x10
        mov     x1, sp
        bl $helper

        RESTORE_ARGUMENT_REGISTERS sp, 16       ; Restore x8 and argument registers.
        RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96 ; Restore floating-point/SIMD registers.

        EPILOG_RESTORE_REG_PAIR fp, lr, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA!
        EPILOG_RETURN

    NESTED_END
0

    MEND

    GenerateProfileHelper ProfileEnter, PROFILE_ENTER
    GenerateProfileHelper ProfileLeave, PROFILE_LEAVE
    GenerateProfileHelper ProfileTailcall, PROFILE_TAILCALL

#endif

; Must be at very end of file
    END