1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
|
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
//
//
// File: VirtualCallStub.h
//
//
// See code:VirtualCallStubManager for details
//
// ============================================================================
#ifndef _VIRTUAL_CALL_STUB_H
#define _VIRTUAL_CALL_STUB_H
#define CHAIN_LOOKUP
#if defined(_TARGET_X86_)
// If this is uncommented, leaves a file "StubLog_<pid>.log" with statistics on the behavior
// of stub-based interface dispatch.
//#define STUB_LOGGING
#endif
#include "stubmgr.h"
/////////////////////////////////////////////////////////////////////////////////////
// Forward class declarations
class FastTable;
class BucketTable;
class Entry;
class Prober;
class VirtualCallStubManager;
class VirtualCallStubManagerManager;
struct LookupHolder;
struct DispatchHolder;
struct ResolveHolder;
/////////////////////////////////////////////////////////////////////////////////////
// Forward function declarations
extern "C" void InContextTPQuickDispatchAsmStub();
extern "C" PCODE STDCALL StubDispatchFixupWorker(TransitionBlock * pTransitionBlock,
TADDR siteAddrForRegisterIndirect,
DWORD sectionIndex,
Module * pModule);
extern "C" PCODE STDCALL VSD_ResolveWorker(TransitionBlock * pTransitionBlock,
TADDR siteAddrForRegisterIndirect,
size_t token
#ifndef _TARGET_X86_
, UINT_PTR flags
#endif
);
#ifdef FEATURE_REMOTING
// This is used by TransparentProxyWorkerStub to take a stub address (token), and
// MethodTable and return the target. It will look in the cache first, and if not found
// will call the resolver and then put the result into the cache.
extern "C" PCODE STDCALL VSD_GetTargetForTPWorkerQuick(TransparentProxyObject * orTP, size_t token);
extern "C" PCODE STDCALL VSD_GetTargetForTPWorker(TransitionBlock * pTransitionBlock, size_t token);
#endif
/////////////////////////////////////////////////////////////////////////////////////
#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
typedef INT32 DISPL;
#endif
/////////////////////////////////////////////////////////////////////////////////////
// Represents the struct that is added to the resolve cache
// NOTE: If you change the layout of this struct, you'll need to update various
// ASM helpers in VirtualCallStubCpu that rely on offsets of members.
//
struct ResolveCacheElem
{
void *pMT;
size_t token; // DispatchToken
void *target;
// These are used for chaining
ResolveCacheElem *pNext;
ResolveCacheElem *Next()
{ LIMITED_METHOD_CONTRACT; return VolatileLoad(&pNext); }
#ifdef _DEBUG
UINT16 debug_hash;
UINT16 debug_index;
#endif // _DEBUG
BOOL Equals(size_t token, void *pMT)
{ LIMITED_METHOD_CONTRACT; return (this->pMT == pMT && this->token == token); }
BOOL Equals(ResolveCacheElem *pElem)
{ WRAPPER_NO_CONTRACT; return Equals(pElem->token, pElem->pMT); }
};
enum
{
e_resolveCacheElem_sizeof_mt = sizeof(void *),
e_resolveCacheElem_sizeof_token = sizeof(size_t),
e_resolveCacheElem_sizeof_target = sizeof(void *),
e_resolveCacheElem_sizeof_next = sizeof(ResolveCacheElem *),
e_resolveCacheElem_offset_mt = 0,
e_resolveCacheElem_offset_token = e_resolveCacheElem_offset_mt + e_resolveCacheElem_sizeof_mt,
e_resolveCacheElem_offset_target = e_resolveCacheElem_offset_token + e_resolveCacheElem_sizeof_token,
e_resolveCacheElem_offset_next = e_resolveCacheElem_offset_target + e_resolveCacheElem_sizeof_target,
};
/////////////////////////////////////////////////////////////////////////////////////
// A utility class to help manipulate a call site
struct StubCallSite
{
friend class VirtualCallStubManager;
private:
// On x86 are four possible kinds of callsites when you take into account all features
// Relative: direct call, e.g. "call addr". Not used currently.
// RelativeIndirect (JmpRel): indirect call through a relative address, e.g. "call [addr]"
// RegisterIndirect: indirect call through a register, e.g. "call [eax]"
// DelegateCallSite: anything else, tail called through a register by shuffle thunk, e.g. "jmp [eax]"
//
// On all other platforms we always use an indirect call through an indirection cell
// In these cases all calls are made by the platform equivalent of "call [addr]".
//
// DelegateCallSite are particular in that they can come in a variety of forms:
// a direct delegate call has a sequence defined by the jit but a multicast or secure delegate
// are defined in a stub and have a different shape
//
PTR_PCODE m_siteAddr; // Stores the address of an indirection cell
PCODE m_returnAddr;
public:
#if defined(_TARGET_X86_)
StubCallSite(TADDR siteAddrForRegisterIndirect, PCODE returnAddr);
PCODE GetCallerAddress();
#else // !defined(_TARGET_X86_)
// On platforms where we always use an indirection cell things
// are much simpler - the siteAddr always stores a pointer to a
// value that in turn points to the indirection cell.
StubCallSite(TADDR siteAddr, PCODE returnAddr)
{ LIMITED_METHOD_CONTRACT; m_siteAddr = dac_cast<PTR_PCODE>(siteAddr); m_returnAddr = returnAddr; }
PCODE GetCallerAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; }
#endif // !defined(_TARGET_X86_)
PCODE GetSiteTarget() { WRAPPER_NO_CONTRACT; return *(GetIndirectCell()); }
void SetSiteTarget(PCODE newTarget);
PTR_PCODE GetIndirectCell() { LIMITED_METHOD_CONTRACT; return dac_cast<PTR_PCODE>(m_siteAddr); }
PTR_PCODE * GetIndirectCellAddress() { LIMITED_METHOD_CONTRACT; return &m_siteAddr; }
PCODE GetReturnAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; }
};
#ifdef FEATURE_PREJIT
extern "C" void StubDispatchFixupStub(); // for lazy fixup of ngen call sites
#endif
// These are the assembly language entry points that the stubs use when they want to go into the EE
extern "C" void ResolveWorkerAsmStub(); // resolve a token and transfer control to that method
extern "C" void ResolveWorkerChainLookupAsmStub(); // for chaining of entries in the cache
#ifdef _TARGET_X86_
extern "C" void BackPatchWorkerAsmStub(); // backpatch a call site to point to a different stub
#endif // _TARGET_X86_
typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager;
// VirtualCallStubManager is the heart of the stub dispatch logic. See the book of the runtime entry
//
// file:../../doc/BookOfTheRuntime/ClassLoader/VirtualStubDispatchDesign.doc
//
// The basic idea is that a call to an interface (it could also be used for virtual calls in general, but we
// do not do this), is simply the code
//
// call [DispatchCell]
//
// Where we make sure 'DispatchCell' points at stubs that will do the right thing. DispatchCell is writable
// so we can udpate the code over time. There are three basic types of stubs that the dispatch cell can point
// to.
// * Lookup: The intial stub that has no 'fast path' and simply pushes a ID for interface being called
// and calls into the runtime at code:VirtualCallStubManager.ResolveWorkerStatic.
// * Dispatch: Lookup stubs are patched to this stub which has a fast path that checks for a particular
// Method Table and if that fails jumps to code that
// * Decrements a 'missCount' (starts out as code:STUB_MISS_COUNT_VALUE). If this count goes to zero
// code:VirtualCallStubManager.BackPatchWorkerStatic is called, morphs it into a resolve stub
// (however since this decrementing logic is SHARED among all dispatch stubs, it may take
// multiples of code:STUB_MISS_COUNT_VALUE if mulitple call sites are actively polymorphic (this
// seems unlikley).
// * Calls a resolve stub (Whenever a dispatch stub is created, it always has a cooresponding resolve
// stub (but the resolve stubs are shared among many dispatch stubs).
// * Resolve: see code:ResolveStub. This looks up the Method table in a process wide cache (see
// code:ResolveCacheElem, and if found, jumps to it. This code path is about 17 instructions long (so
// pretty fast, but certainly much slower than a normal call). If the method table is not found in
// the cache, it calls into the runtime code:VirtualCallStubManager.ResolveWorkerStatic, which
// populates it.
// So the general progression is call site's cells
// * start out life pointing to a lookup stub
// * On first call they get updated into a dispatch stub. When this misses, it calls a resolve stub,
// which populates a resovle stub's cache, but does not update the call site' cell (thus it is still
// pointing at the dispatch cell.
// * After code:STUB_MISS_COUNT_VALUE misses, we update the call site's cell to point directly at the
// resolve stub (thus avoiding the overhead of the quick check that always seems to be failing and
// the miss count update).
//
// QUESTION: What is the lifetimes of the various stubs and hash table entries?
//
// QUESTION: There does not seem to be any logic that will change a call site's cell once it becomes a
// Resolve stub. Thus once a particular call site becomes a Resolve stub we live with the Resolve stub's
// (in)efficiency forever.
//
// see code:#StubDispatchNotes for more
class VirtualCallStubManager : public StubManager
{
friend class ClrDataAccess;
friend class VirtualCallStubManagerManager;
friend class VirtualCallStubManagerIterator;
VPTR_VTABLE_CLASS(VirtualCallStubManager, StubManager)
public:
#ifdef _DEBUG
virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManager"; }
#endif
// The reason for our existence, return a callstub for type id and slot number
// where type id = 0 for the class contract (i.e. a virtual call), and type id > 0 for an
// interface invoke where the id indicates which interface it is.
//
// The function is idempotent, i.e.
// you'll get the same callstub twice if you call it with identical inputs.
PCODE GetCallStub(TypeHandle ownerType, MethodDesc *pMD);
PCODE GetCallStub(TypeHandle ownerType, DWORD slot);
// Generate an fresh indirection cell.
BYTE* GenerateStubIndirection(PCODE stub, BOOL fUseRecycledCell = FALSE);
// Set up static data structures - called during EEStartup
static void InitStatic();
static void UninitStatic();
// Per instance initialization - called during AppDomain::Init and ::Uninit and for collectible loader allocators
void Init(BaseDomain* pDomain, LoaderAllocator *pLoaderAllocator);
void Uninit();
//@TODO: the logging should be tied into the VMs normal loggin mechanisms,
//@TODO: for now we just always write a short log file called "StubLog_<pid>.log"
static void StartupLogging();
static void LoggingDump();
static void FinishLogging();
static void ResetCache();
// Reclaim/rearrange any structures that can only be done during a gc sync point.
// This is the mechanism we are using to avoid synchronization of alot of our
// cache and hash table accesses. We are requiring that during a gc sync point we are not
// executing any stub code at all, hence at this time we are serialized on a single thread (gc)
// and no other thread is accessing the data structures.
static void ReclaimAll();
void Reclaim();
#ifndef DACCESS_COMPILE
VirtualCallStubManager()
: StubManager(),
lookup_rangeList(),
resolve_rangeList(),
dispatch_rangeList(),
cache_entry_rangeList(),
parentDomain(NULL),
isCollectible(false),
m_initialReservedMemForHeaps(NULL),
m_FreeIndCellList(NULL),
m_RecycledIndCellList(NULL),
indcell_heap(NULL),
cache_entry_heap(NULL),
lookup_heap(NULL),
dispatch_heap(NULL),
resolve_heap(NULL),
#ifdef _TARGET_AMD64_
m_fShouldAllocateLongJumpDispatchStubs(FALSE),
#endif
lookups(NULL),
cache_entries(NULL),
dispatchers(NULL),
resolvers(NULL),
m_counters(NULL),
m_cur_counter_block(NULL),
m_cur_counter_block_for_reclaim(NULL),
m_cur_counter_block_for_reclaim_index(NULL),
m_pNext(NULL)
{
LIMITED_METHOD_CONTRACT;
ZeroMemory(&stats, sizeof(stats));
}
~VirtualCallStubManager();
#endif // !DACCESS_COMPILE
enum StubKind {
SK_UNKNOWN,
SK_LOOKUP, // Lookup Stubs are SLOW stubs that simply call into the runtime to do all work.
SK_DISPATCH, // Dispatch Stubs have a fast check for one type otherwise jumps to runtime. Works for monomorphic sites
SK_RESOLVE, // Resolve Stubs do a hash lookup before fallling back to the runtime. Works for polymorphic sites.
SK_BREAKPOINT
};
// peek at the assembly code and predict which kind of a stub we have
StubKind predictStubKind(PCODE stubStartAddress);
/* know thine own stubs. It is possible that when multiple
virtualcallstub managers are built that these may need to become
non-static, and the callers modified accordingly */
StubKind getStubKind(PCODE stubStartAddress)
{
WRAPPER_NO_CONTRACT;
SUPPORTS_DAC;
// This method can called with stubStartAddress==NULL, e.g. when handling null reference exceptions
// caused by IP=0. Early out for this case to avoid confusing handled access violations inside predictStubKind.
if (PCODEToPINSTR(stubStartAddress) == NULL)
return SK_UNKNOWN;
// Rather than calling IsInRange(stubStartAddress) for each possible stub kind
// we can peek at the assembly code and predict which kind of a stub we have
StubKind predictedKind = predictStubKind(stubStartAddress);
if (predictedKind == SK_DISPATCH)
{
if (isDispatchingStub(stubStartAddress))
return SK_DISPATCH;
}
else if (predictedKind == SK_LOOKUP)
{
if (isLookupStub(stubStartAddress))
return SK_LOOKUP;
}
else if (predictedKind == SK_RESOLVE)
{
if (isResolvingStub(stubStartAddress))
return SK_RESOLVE;
}
// This is the slow case. If the predict returned SK_UNKNOWN, SK_BREAKPOINT,
// or the predict was found to be incorrect when checked against the RangeLists
// (isXXXStub), then we'll check each stub heap in sequence.
if (isDispatchingStub(stubStartAddress))
return SK_DISPATCH;
else if (isLookupStub(stubStartAddress))
return SK_LOOKUP;
else if (isResolvingStub(stubStartAddress))
return SK_RESOLVE;
return SK_UNKNOWN;
}
inline BOOL isStub(PCODE stubStartAddress)
{
WRAPPER_NO_CONTRACT;
SUPPORTS_DAC;
return (getStubKind(stubStartAddress) != SK_UNKNOWN);
}
BOOL isDispatchingStub(PCODE stubStartAddress)
{
WRAPPER_NO_CONTRACT;
SUPPORTS_DAC;
return GetDispatchRangeList()->IsInRange(stubStartAddress);
}
BOOL isResolvingStub(PCODE stubStartAddress)
{
WRAPPER_NO_CONTRACT;
SUPPORTS_DAC;
return GetResolveRangeList()->IsInRange(stubStartAddress);
}
BOOL isLookupStub(PCODE stubStartAddress)
{
WRAPPER_NO_CONTRACT;
SUPPORTS_DAC;
return GetLookupRangeList()->IsInRange(stubStartAddress);
}
static BOOL isDispatchingStubStatic(PCODE addr)
{
WRAPPER_NO_CONTRACT;
StubKind stubKind;
FindStubManager(addr, &stubKind);
return stubKind == SK_DISPATCH;
}
static BOOL isResolvingStubStatic(PCODE addr)
{
WRAPPER_NO_CONTRACT;
StubKind stubKind;
FindStubManager(addr, &stubKind);
return stubKind == SK_RESOLVE;
}
static BOOL isLookupStubStatic(PCODE addr)
{
WRAPPER_NO_CONTRACT;
StubKind stubKind;
FindStubManager(addr, &stubKind);
return stubKind == SK_LOOKUP;
}
//use range lists to track the chunks of memory that are part of each heap
LockedRangeList lookup_rangeList;
LockedRangeList resolve_rangeList;
LockedRangeList dispatch_rangeList;
LockedRangeList cache_entry_rangeList;
// Get dac-ized pointers to rangelist.
RangeList* GetLookupRangeList()
{
SUPPORTS_DAC;
TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, lookup_rangeList);
return PTR_RangeList(addr);
}
RangeList* GetResolveRangeList()
{
SUPPORTS_DAC;
TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, resolve_rangeList);
return PTR_RangeList(addr);
}
RangeList* GetDispatchRangeList()
{
SUPPORTS_DAC;
TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, dispatch_rangeList);
return PTR_RangeList(addr);
}
RangeList* GetCacheEntryRangeList()
{
SUPPORTS_DAC;
TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, cache_entry_rangeList);
return PTR_RangeList(addr);
}
private:
//allocate and initialize a stub of the desired kind
DispatchHolder *GenerateDispatchStub(PCODE addrOfCode,
PCODE addrOfFail,
void *pMTExpected,
size_t dispatchToken);
#ifdef _TARGET_AMD64_
// Used to allocate a long jump dispatch stub. See comment around
// m_fShouldAllocateLongJumpDispatchStubs for explaination.
DispatchHolder *GenerateDispatchStubLong(PCODE addrOfCode,
PCODE addrOfFail,
void *pMTExpected,
size_t dispatchToken);
#endif
ResolveHolder *GenerateResolveStub(PCODE addrOfResolver,
PCODE addrOfPatcher,
size_t dispatchToken);
LookupHolder *GenerateLookupStub(PCODE addrOfResolver,
size_t dispatchToken);
template <typename STUB_HOLDER>
void AddToCollectibleVSDRangeList(STUB_HOLDER *holder)
{
if (isCollectible)
{
parentDomain->GetCollectibleVSDRanges()->AddRange(reinterpret_cast<BYTE *>(holder->stub()),
reinterpret_cast<BYTE *>(holder->stub()) + holder->stub()->size(),
this);
}
}
// The resolve cache is static across all AppDomains
ResolveCacheElem *GenerateResolveCacheElem(void *addrOfCode,
void *pMTExpected,
size_t token);
ResolveCacheElem *GetResolveCacheElem(void *pMT,
size_t token,
void *target);
//Given a dispatch token, an object and a method table, determine the
//target address to go to. The return value (BOOL) states whether this address
//is cacheable or not.
static BOOL Resolver(MethodTable * pMT,
DispatchToken token,
OBJECTREF * protectedObj,
PCODE * ppTarget);
// This can be used to find a target without needing the ability to throw
static BOOL TraceResolver(Object *pObj, DispatchToken token, TraceDestination *trace);
public:
// Return the MethodDesc corresponding to this token.
static MethodDesc *GetRepresentativeMethodDescFromToken(DispatchToken token, MethodTable *pMT);
static MethodDesc *GetInterfaceMethodDescFromToken(DispatchToken token);
static MethodTable *GetTypeFromToken(DispatchToken token);
//This is used to get the token out of a stub
static size_t GetTokenFromStub(PCODE stub);
//This is used to get the token out of a stub and we know the stub manager and stub kind
static size_t GetTokenFromStubQuick(VirtualCallStubManager * pMgr, PCODE stub, StubKind kind);
// General utility functions
// Quick lookup in the cache. NOTHROW, GC_NOTRIGGER
static PCODE CacheLookup(size_t token, UINT16 tokenHash, MethodTable *pMT);
// Full exhaustive lookup. THROWS, GC_TRIGGERS
static PCODE GetTarget(DispatchToken token, MethodTable *pMT);
private:
// Given a dispatch token, return true if the token represents an interface, false if just a slot.
static BOOL IsInterfaceToken(DispatchToken token);
// Given a dispatch token, return true if the token represents a slot on the target.
static BOOL IsClassToken(DispatchToken token);
#ifdef CHAIN_LOOKUP
static ResolveCacheElem* __fastcall PromoteChainEntry(ResolveCacheElem *pElem);
#endif
// Flags used by the non-x86 versions of VSD_ResolveWorker
#define SDF_ResolveBackPatch (0x01)
#define SDF_ResolvePromoteChain (0x02)
#define SDF_ResolveFlags (0x03)
// These method needs to call the instance methods.
friend PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock,
TADDR siteAddrForRegisterIndirect,
size_t token
#ifndef _TARGET_X86_
, UINT_PTR flags
#endif
);
//These are the entrypoints that the stubs actually end up calling via the
// xxxAsmStub methods above
static void STDCALL BackPatchWorkerStatic(PCODE returnAddr, TADDR siteAddrForRegisterIndirect);
public:
PCODE ResolveWorker(StubCallSite* pCallSite, OBJECTREF *protectedObj, DispatchToken token, StubKind stubKind);
void BackPatchWorker(StubCallSite* pCallSite);
//Change the callsite to point to stub
void BackPatchSite(StubCallSite* pCallSite, PCODE stub);
public:
/* the following two public functions are to support tracing or stepping thru
stubs via the debugger. */
virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress);
virtual BOOL TraceManager(Thread *thread,
TraceDestination *trace,
T_CONTEXT *pContext,
BYTE **pRetAddr);
size_t GetSize()
{
LIMITED_METHOD_CONTRACT;
size_t retval=0;
if(indcell_heap)
retval+=indcell_heap->GetSize();
if(cache_entry_heap)
retval+=cache_entry_heap->GetSize();
if(lookup_heap)
retval+=lookup_heap->GetSize();
if(dispatch_heap)
retval+=dispatch_heap->GetSize();
if(resolve_heap)
retval+=resolve_heap->GetSize();
return retval;
};
private:
/* the following two private functions are to support tracing or stepping thru
stubs via the debugger. */
virtual BOOL DoTraceStub(PCODE stubStartAddress,
TraceDestination *trace);
private:
// The parent domain of this manager
PTR_BaseDomain parentDomain;
bool isCollectible;
BYTE * m_initialReservedMemForHeaps;
static const UINT32 INDCELLS_PER_BLOCK = 32; // 32 indirection cells per block.
CrstExplicitInit m_indCellLock;
// List of free indirection cells. The cells were directly allocated from the loader heap
// (code:VirtualCallStubManager::GenerateStubIndirection)
BYTE * m_FreeIndCellList;
// List of recycled indirection cells. The cells were recycled from finalized dynamic methods
// (code:LCGMethodResolver::RecycleIndCells).
BYTE * m_RecycledIndCellList;
#ifndef DACCESS_COMPILE
// This methods returns the a free cell from m_FreeIndCellList. It returns NULL if the list is empty.
BYTE * GetOneFreeIndCell()
{
WRAPPER_NO_CONTRACT;
return GetOneIndCell(&m_FreeIndCellList);
}
// This methods returns the a recycled cell from m_RecycledIndCellList. It returns NULL if the list is empty.
BYTE * GetOneRecycledIndCell()
{
WRAPPER_NO_CONTRACT;
return GetOneIndCell(&m_RecycledIndCellList);
}
// This methods returns the a cell from ppList. It returns NULL if the list is empty.
BYTE * GetOneIndCell(BYTE ** ppList)
{
CONTRACT (BYTE*) {
NOTHROW;
GC_NOTRIGGER;
MODE_ANY;
PRECONDITION(CheckPointer(ppList));
PRECONDITION(m_indCellLock.OwnedByCurrentThread());
} CONTRACT_END;
BYTE * temp = *ppList;
if (temp)
{
BYTE * pNext = *((BYTE **)temp);
*ppList = pNext;
RETURN temp;
}
RETURN NULL;
}
// insert a linked list of indirection cells at the beginning of m_FreeIndCellList
void InsertIntoFreeIndCellList(BYTE * head, BYTE * tail)
{
WRAPPER_NO_CONTRACT;
InsertIntoIndCellList(&m_FreeIndCellList, head, tail);
}
// insert a linked list of indirection cells at the beginning of ppList
void InsertIntoIndCellList(BYTE ** ppList, BYTE * head, BYTE * tail)
{
CONTRACTL {
NOTHROW;
GC_NOTRIGGER;
PRECONDITION(CheckPointer(ppList));
PRECONDITION(CheckPointer(head));
PRECONDITION(CheckPointer(tail));
PRECONDITION(m_indCellLock.OwnedByCurrentThread());
} CONTRACTL_END;
BYTE * temphead = *ppList;
*((BYTE**)tail) = temphead;
*ppList = head;
}
#endif // !DACCESS_COMPILE
PTR_LoaderHeap indcell_heap; // indirection cells go here
PTR_LoaderHeap cache_entry_heap; // resolve cache elem entries go here
PTR_LoaderHeap lookup_heap; // lookup stubs go here
PTR_LoaderHeap dispatch_heap; // dispatch stubs go here
PTR_LoaderHeap resolve_heap; // resolve stubs go here
#ifdef _TARGET_AMD64_
// When we layout the stub heaps, we put them close together in a sequential order
// so that we maximize performance with respect to branch predictions. On AMD64,
// dispatch stubs use a rel32 jump on failure to the resolve stub. This works for
// a while because of the ordering, but as soon as we have to start allocating more
// memory for either the dispatch or resolve heaps we have a chance that we'll be
// further away than a rel32 jump can reach, because we're in a 64-bit address
// space. As such, this flag will indicate when we allocate the first dispatch stub
// that cannot reach a resolve stub, and when this happens we'll switch over to
// allocating the larger version of the dispatch stub which contains an abs64 jump.
//@TODO: This is a bit of a workaround, but the limitations of LoaderHeap require that we
//@TODO: take this approach. Hopefully in Orcas we'll have a chance to rewrite LoaderHeap.
BOOL m_fShouldAllocateLongJumpDispatchStubs; // Defaults to FALSE.
#endif
BucketTable * lookups; // hash table of lookups keyed by tokens
BucketTable * cache_entries; // hash table of dispatch token/target structs for dispatch cache
BucketTable * dispatchers; // hash table of dispatching stubs keyed by tokens/actualtype
BucketTable * resolvers; // hash table of resolvers keyed by tokens/resolverstub
// This structure is used to keep track of the fail counters.
// We only need one fail counter per ResolveStub,
// and most programs use less than 250 ResolveStubs
// We allocate these on the main heap using "new counter block"
struct counter_block
{
static const UINT32 MAX_COUNTER_ENTRIES = 256-2; // 254 counters should be enough for most cases.
counter_block * next; // the next block
UINT32 used; // the index of the next free entry
INT32 block[MAX_COUNTER_ENTRIES]; // the counters
};
counter_block *m_counters; // linked list of counter blocks of failure counters
counter_block *m_cur_counter_block; // current block for updating counts
counter_block *m_cur_counter_block_for_reclaim; // current block for updating
UINT32 m_cur_counter_block_for_reclaim_index; // index into the current block for updating
// Used to keep track of all the VCSManager objects in the system.
PTR_VirtualCallStubManager m_pNext; // Linked list pointer
public:
// Given a stub address, find the VCSManager that owns it.
static VirtualCallStubManager *FindStubManager(PCODE addr,
StubKind* wbStubKind = NULL);
#ifndef DACCESS_COMPILE
// insert a linked list of indirection cells at the beginning of m_RecycledIndCellList
void InsertIntoRecycledIndCellList_Locked(BYTE * head, BYTE * tail)
{
CONTRACTL {
NOTHROW;
GC_TRIGGERS;
MODE_ANY;
} CONTRACTL_END;
CrstHolder lh(&m_indCellLock);
InsertIntoIndCellList(&m_RecycledIndCellList, head, tail);
}
#endif // !DACCESS_COMPILE
// These are the counters for keeping statistics
struct
{
UINT32 site_counter; //# of call sites
UINT32 stub_lookup_counter; //# of lookup stubs
UINT32 stub_poly_counter; //# of resolve stubs
UINT32 stub_mono_counter; //# of dispatch stubs
UINT32 site_write; //# of call site backpatch writes
UINT32 site_write_poly; //# of call site backpatch writes to point to resolve stubs
UINT32 site_write_mono; //# of call site backpatch writes to point to dispatch stubs
UINT32 worker_call; //# of calls into ResolveWorker
UINT32 worker_call_no_patch; //# of times call_worker resulted in no patch
UINT32 worker_collide_to_mono; //# of times we converted a poly stub to a mono stub instead of writing the cache entry
UINT32 stub_space; //# of bytes of stubs
UINT32 cache_entry_counter; //# of cache structs
UINT32 cache_entry_space; //# of bytes used by cache lookup structs
} stats;
void LogStats();
#ifdef DACCESS_COMPILE
protected:
virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags);
virtual LPCWSTR GetStubManagerName(PCODE addr)
{
WRAPPER_NO_CONTRACT;
CONSISTENCY_CHECK(isStub(addr));
if (isLookupStub(addr))
{
return W("VSD_LookupStub");
}
else if (isDispatchingStub(addr))
{
return W("VSD_DispatchStub");
}
else
{
CONSISTENCY_CHECK(isResolvingStub(addr));
return W("VSD_ResolveStub");
}
}
#endif
};
/********************************************************************************************************
********************************************************************************************************/
typedef VPTR(class VirtualCallStubManagerManager) PTR_VirtualCallStubManagerManager;
class VirtualCallStubManagerIterator;
class VirtualCallStubManagerManager : public StubManager
{
VPTR_VTABLE_CLASS(VirtualCallStubManagerManager, StubManager)
friend class StubManager;
friend class VirtualCallStubManager;
friend class VirtualCallStubManagerIterator;
friend class StubManagerIterator;
public:
virtual BOOL TraceManager(Thread *thread, TraceDestination *trace,
T_CONTEXT *pContext, BYTE **pRetAddr);
virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress);
virtual BOOL DoTraceStub(PCODE stubStartAddress, TraceDestination *trace);
static MethodDesc *Entry2MethodDesc(PCODE stubStartAddress, MethodTable *pMT);
#ifdef DACCESS_COMPILE
virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags);
virtual LPCWSTR GetStubManagerName(PCODE addr)
{ WRAPPER_NO_CONTRACT; return FindVirtualCallStubManager(addr)->GetStubManagerName(addr); }
#endif
private:
// Used to keep track of all the VCSManager objects in the system.
PTR_VirtualCallStubManager m_pManagers; // Head of the linked list
#ifndef DACCESS_COMPILE
// Ctor. This is only used by StaticInit.
VirtualCallStubManagerManager();
#endif
// A cache element to quickly check the last matched manager.
Volatile<VirtualCallStubManager*> m_pCacheElem;
// RW lock for reading entries and removing them.
SimpleRWLock m_RWLock;
// This will look through all the managers in an intelligent fashion to
// find the manager that owns the address.
VirtualCallStubManager *FindVirtualCallStubManager(PCODE stubAddress);
protected:
// Add a VCSManager to the linked list.
void AddStubManager(VirtualCallStubManager *pMgr);
// Remove a VCSManager from the linked list.
void RemoveStubManager(VirtualCallStubManager *pMgr);
VirtualCallStubManager *FirstManager()
{ WRAPPER_NO_CONTRACT; return m_pManagers; }
#ifndef DACCESS_COMPILE
static void InitStatic();
#endif
public:
SPTR_DECL(VirtualCallStubManagerManager, g_pManager);
static VirtualCallStubManagerManager *GlobalManager()
{ LIMITED_METHOD_DAC_CONTRACT; CONSISTENCY_CHECK(CheckPointer(g_pManager)); return g_pManager; }
VirtualCallStubManagerIterator IterateVirtualCallStubManagers();
#ifdef _DEBUG
// Debug helper to help identify stub-managers.
virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManagerManager"; }
#endif
};
/********************************************************************************************************
********************************************************************************************************/
class VirtualCallStubManagerIterator
{
friend class VirtualCallStubManagerManager;
public:
BOOL Next();
VirtualCallStubManager *Current();
// Copy ctor
inline VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it);
protected:
inline VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr);
BOOL m_fIsStart;
VirtualCallStubManager *m_pCurMgr;
};
/////////////////////////////////////////////////////////////////////////////////////////////
// Ctor
inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr)
: m_fIsStart(TRUE), m_pCurMgr(pMgr->m_pManagers)
{
LIMITED_METHOD_DAC_CONTRACT;
CONSISTENCY_CHECK(CheckPointer(pMgr));
}
/////////////////////////////////////////////////////////////////////////////////////////////
// Copy ctor
inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it)
: m_fIsStart(it.m_fIsStart), m_pCurMgr(it.m_pCurMgr)
{
LIMITED_METHOD_DAC_CONTRACT;
}
/********************************************************************************************************
#StubDispatchNotes
A note on approach. The cache and hash tables used by the stub and lookup mechanism
are designed with an eye to minimizing interlocking and/or syncing and/or locking operations.
They are intended to run in a highly concurrent environment. Since there is no magic,
some tradeoffs and and some implementation constraints are required. The basic notion
is that if all reads and writes are atomic and if all functions and operations operate
correctly in the face of commutative reorderings of the visibility of all reads and writes
across threads, then we don't have to interlock, sync, or serialize. Our approximation of
this is:
1. All reads and all writes to tables must be atomic. This effectively limits the actual entry
size in a table to be a pointer or a pointer sized thing.
2. All functions, like comparisons for equality or computation of hash values must function
correctly in the face of concurrent updating of the underlying table. This is accomplished
by making the underlying structures/entries effectively immutable, if concurrency is in anyway possible.
By effectively immutatable, we mean that the stub or token structure is either immutable or that
if it is ever written, all possibley concurrent writes are attempting to write the same value (atomically)
or that the competing (atomic) values do not affect correctness, and that the function operates correctly whether
or not any of the writes have taken place (is visible yet). The constraint we maintain is that all competeing
updates (and their visibility or lack thereof) do not alter the correctness of the program.
3. All tables are inexact. The counts they hold (e.g. number of contained entries) may be inaccurrate,
but that inaccurracy cannot affect their correctness. Table modifications, such as insertion of
an new entry may not succeed, but such failures cannot affect correctness. This implies that just
because a stub/entry is not present in a table, e.g. has been removed, that does not mean that
it is not in use. It also implies that internal table structures, such as discarded hash table buckets,
cannot be freely recycled since another concurrent thread may still be walking thru it.
4. Occassionaly it is necessary to pick up the pieces that have been dropped on the floor
so to speak, e.g. actually recycle hash buckets that aren't in use. Since we have a natural
sync point already in the GC, we use that to provide cleanup points. We need to make sure that code that
is walking our structures is not a GC safe point. Hence if the GC calls back into us inside the GC
sync point, we know that nobody is inside our stuctures and we can safely rearrange and recycle things.
********************************************************************************************************/
//initial and increment value for fail stub counters
#ifdef STUB_LOGGING
extern UINT32 STUB_MISS_COUNT_VALUE;
extern UINT32 STUB_COLLIDE_WRITE_PCT;
extern UINT32 STUB_COLLIDE_MONO_PCT;
#else // !STUB_LOGGING
#define STUB_MISS_COUNT_VALUE 100
#define STUB_COLLIDE_WRITE_PCT 100
#define STUB_COLLIDE_MONO_PCT 0
#endif // !STUB_LOGGING
//size and mask of the cache used by resolve stubs
// CALL_STUB_CACHE_SIZE must be equal to 2^CALL_STUB_CACHE_NUM_BITS
#define CALL_STUB_CACHE_NUM_BITS 12 //10
#define CALL_STUB_CACHE_SIZE 4096 //1024
#define CALL_STUB_CACHE_MASK (CALL_STUB_CACHE_SIZE-1)
#define CALL_STUB_CACHE_PROBES 5
//min sizes for BucketTable and buckets and the growth and hashing constants
#define CALL_STUB_MIN_BUCKETS 32
#define CALL_STUB_MIN_ENTRIES 4
//this is so that the very first growth will jump from 4 to 32 entries, then double from there.
#define CALL_STUB_SECONDARY_ENTRIES 8
#define CALL_STUB_GROWTH_FACTOR 2
#define CALL_STUB_LOAD_FACTOR 90
#define CALL_STUB_HASH_CONST1 1327
#define CALL_STUB_HASH_CONST2 43627
#define LARGE_PRIME 7199369
//internal layout of buckets=size-1,count,entries....
#define CALL_STUB_MASK_INDEX 0
#define CALL_STUB_COUNT_INDEX 1
#define CALL_STUB_DEAD_LINK 2
#define CALL_STUB_FIRST_INDEX 3
//marker entries in cache and hash tables
#define CALL_STUB_EMPTY_ENTRY 0
// number of successes for a chained element before it gets moved to the front
#define CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT (0x100)
/*******************************************************************************************************
Entry is an abstract class. We will make specific subclasses for each kind of
entry. Entries hold references to stubs or tokens. The principle thing they provide
is a virtual Equals function that is used by the caching and hashing tables within which
the stubs and tokens are stored. Entries are typically stack allocated by the routines
that call into the hash and caching functions, and the functions stuff stubs into the entry
to do the comparisons. Essentially specific entry subclasses supply a vtable to a stub
as and when needed. This means we don't have to have vtables attached to stubs.
Summarizing so far, there is a struct for each kind of stub or token of the form XXXXStub.
They provide that actual storage layouts.
There is a stuct in which each stub which has code is containted of the form XXXXHolder.
They provide alignment and anciliary storage for the stub code.
There is a subclass of Entry for each kind of stub or token, of the form XXXXEntry.
They provide the specific implementations of the virtual functions declared in Entry. */
class Entry
{
public:
//access and compare the keys of the entry
virtual BOOL Equals(size_t keyA, size_t keyB)=0;
virtual size_t KeyA()=0;
virtual size_t KeyB()=0;
//contents is the struct or token that the entry exposes
virtual void SetContents(size_t contents)=0;
};
/* define the platform specific Stubs and stub holders */
#include <virtualcallstubcpu.hpp>
#if USES_LOOKUP_STUBS
/**********************************************************************************************
LookupEntry wraps LookupStubs and provide the concrete implementation of the abstract class Entry.
Virtual and interface call sites when they are first jitted point to LookupStubs. The hash table
that contains look up stubs is keyed by token, hence the Equals function uses the embedded token in
the stub for comparison purposes. Since we are willing to allow duplicates in the hash table (as
long as they are relatively rare) we do use direct comparison of the tokens rather than extracting
the fields from within the tokens, for perf reasons. */
class LookupEntry : public Entry
{
public:
//Creates an entry that wraps lookup stub s
LookupEntry(size_t s)
{
LIMITED_METHOD_CONTRACT;
_ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)s));
stub = (LookupStub*) s;
}
//default contructor to allow stack and inline allocation of lookup entries
LookupEntry() {LIMITED_METHOD_CONTRACT; stub = NULL;}
//implementations of abstract class Entry
BOOL Equals(size_t keyA, size_t keyB)
{ WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); }
size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); }
size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; }
void SetContents(size_t contents)
{
LIMITED_METHOD_CONTRACT;
_ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)contents));
stub = LookupHolder::FromLookupEntry((PCODE)contents)->stub();
}
//extract the token of the underlying lookup stub
inline size_t Token() { LIMITED_METHOD_CONTRACT; return stub ? stub->token() : 0; }
private:
LookupStub* stub; //the stub the entry wrapping
};
#endif // USES_LOOKUP_STUBS
/**********************************************************************************************
ResolveCacheEntry wraps a ResolveCacheElem and provides lookup functionality for entries that
were created that may be added to the ResolveCache
*/
class ResolveCacheEntry : public Entry
{
public:
ResolveCacheEntry(size_t elem)
{
LIMITED_METHOD_CONTRACT;
_ASSERTE(elem != 0);
pElem = (ResolveCacheElem*) elem;
}
//default contructor to allow stack and inline allocation of lookup entries
ResolveCacheEntry() { LIMITED_METHOD_CONTRACT; pElem = NULL; }
//access and compare the keys of the entry
virtual BOOL Equals(size_t keyA, size_t keyB)
{ WRAPPER_NO_CONTRACT; return pElem && (keyA == KeyA()) && (keyB == KeyB()); }
virtual size_t KeyA()
{ LIMITED_METHOD_CONTRACT; return pElem != NULL ? pElem->token : 0; }
virtual size_t KeyB()
{ LIMITED_METHOD_CONTRACT; return pElem != NULL ? (size_t) pElem->pMT : 0; }
//contents is the struct or token that the entry exposes
virtual void SetContents(size_t contents)
{
LIMITED_METHOD_CONTRACT;
pElem = (ResolveCacheElem*) contents;
}
inline const BYTE *Target()
{
LIMITED_METHOD_CONTRACT;
return pElem != NULL ? (const BYTE *)pElem->target : NULL;
}
private:
ResolveCacheElem *pElem;
};
/**********************************************************************************************
ResolveEntry wraps ResolveStubs and provide the concrete implementation of the abstract class Entry.
Polymorphic call sites and monomorphic calls that fail end up in a ResolveStub. Resolve stubs
are stored in hash tables keyed by token, hence the Equals function uses the embedded token in
the stub for comparison purposes. Since we are willing to allow duplicates in the hash table (as
long as they are relatively rare) we do use direct comparison of the tokens rather than extracting
the fields from within the tokens, for perf reasons. */
class ResolveEntry : public Entry
{
public:
//Creates an entry that wraps resolve stub s
ResolveEntry (size_t s)
{
LIMITED_METHOD_CONTRACT;
_ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)s));
stub = (ResolveStub*) s;
}
//default contructor to allow stack and inline allocation of resovler entries
ResolveEntry() { LIMITED_METHOD_CONTRACT; stub = CALL_STUB_EMPTY_ENTRY; }
//implementations of abstract class Entry
inline BOOL Equals(size_t keyA, size_t keyB)
{ WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); }
inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); }
inline size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; }
void SetContents(size_t contents)
{
LIMITED_METHOD_CONTRACT;
_ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)contents));
stub = ResolveHolder::FromResolveEntry((PCODE)contents)->stub();
}
//extract the token of the underlying resolve stub
inline size_t Token() { WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->token()) : 0; }
private:
ResolveStub* stub; //the stub the entry is wrapping
};
/**********************************************************************************************
DispatchEntry wraps DispatchStubs and provide the concrete implementation of the abstract class Entry.
Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs. Dispatch stubs
are placed in hash and cache tables keyed by the expected Method Table and token they are built for.
Since we are willing to allow duplicates in the hash table (as long as they are relatively rare)
we do use direct comparison of the tokens rather than extracting the fields from within the tokens,
for perf reasons.*/
class DispatchEntry : public Entry
{
public:
//Creates an entry that wraps dispatch stub s
DispatchEntry (size_t s)
{
LIMITED_METHOD_CONTRACT;
_ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)s));
stub = (DispatchStub*) s;
}
//default contructor to allow stack and inline allocation of resovler entries
DispatchEntry() { LIMITED_METHOD_CONTRACT; stub = CALL_STUB_EMPTY_ENTRY; }
//implementations of abstract class Entry
inline BOOL Equals(size_t keyA, size_t keyB)
{ WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); }
inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); }
inline size_t KeyB() { WRAPPER_NO_CONTRACT; return ExpectedMT();}
void SetContents(size_t contents)
{
LIMITED_METHOD_CONTRACT;
_ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)contents));
stub = DispatchHolder::FromDispatchEntry((PCODE)contents)->stub();
}
//extract the fields of the underlying dispatch stub
inline size_t ExpectedMT()
{ WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->expectedMT()) : 0; }
size_t Token()
{
WRAPPER_NO_CONTRACT;
if (stub)
{
ResolveHolder * resolveHolder = ResolveHolder::FromFailEntry(stub->failTarget());
size_t token = resolveHolder->stub()->token();
_ASSERTE(token == VirtualCallStubManager::GetTokenFromStub((PCODE)stub));
return token;
}
else
{
return 0;
}
}
inline PCODE Target()
{ WRAPPER_NO_CONTRACT; return stub ? stub->implTarget() : 0; }
private:
DispatchStub* stub;
};
/*************************************************************************************************
DispatchCache is the cache table that the resolve stubs use for inline polymorphic resolution
of a call. The cache entry is logically a triplet of (method table, token, impl address) where method table
is the type of the calling frame's <this>, token identifies the method being invoked,
i.e. is a (type id,slot #) pair, and impl address is the address of the method implementation.
*/
class DispatchCache
{
public:
static const UINT16 INVALID_HASH = (UINT16)(-1);
DispatchCache();
//read and write the cache keyed by (method table,token) pair.
inline ResolveCacheElem* Lookup(size_t token, void* mt)
{ WRAPPER_NO_CONTRACT; return Lookup(token, INVALID_HASH, mt);}
ResolveCacheElem* Lookup(size_t token, UINT16 tokenHash, void* mt);
enum InsertKind {IK_NONE, IK_DISPATCH, IK_RESOLVE, IK_SHARED, IK_EXTERNAL};
BOOL Insert(ResolveCacheElem* elem, InsertKind insertKind);
#ifdef CHAIN_LOOKUP
void PromoteChainEntry(ResolveCacheElem* elem);
#endif
// This is the heavyweight hashing algorithm. Use sparingly.
static UINT16 HashToken(size_t token);
inline void GetLoadFactor(size_t *total, size_t *used)
{
LIMITED_METHOD_CONTRACT;
*total = CALL_STUB_CACHE_SIZE;
size_t count = 0;
for (size_t i = 0; i < CALL_STUB_CACHE_SIZE; i++)
if (cache[i] != empty)
count++;
*used = count;
}
inline void *GetCacheBaseAddr()
{ LIMITED_METHOD_CONTRACT; return &cache[0]; }
inline size_t GetCacheCount()
{ LIMITED_METHOD_CONTRACT; return CALL_STUB_CACHE_SIZE; }
inline ResolveCacheElem *GetCacheEntry(size_t idx)
{ LIMITED_METHOD_CONTRACT; return VolatileLoad(&cache[idx]); }
inline BOOL IsCacheEntryEmpty(size_t idx)
{ LIMITED_METHOD_CONTRACT; return cache[idx] == empty; }
inline void SetCacheEntry(size_t idx, ResolveCacheElem *elem)
{
LIMITED_METHOD_CONTRACT;
#ifdef STUB_LOGGING
cacheData[idx].numWrites++;
#endif
#ifdef CHAIN_LOOKUP
CONSISTENCY_CHECK(m_writeLock.OwnedByCurrentThread());
#endif
cache[idx] = elem;
}
inline void ClearCacheEntry(size_t idx)
{
LIMITED_METHOD_CONTRACT;
#ifdef STUB_LOGGING
cacheData[idx].numClears++;
#endif
cache[idx] = empty;
}
struct
{
UINT32 insert_cache_external; //# of times Insert was called for IK_EXTERNAL
UINT32 insert_cache_shared; //# of times Insert was called for IK_SHARED
UINT32 insert_cache_dispatch; //# of times Insert was called for IK_DISPATCH
UINT32 insert_cache_resolve; //# of times Insert was called for IK_RESOLVE
UINT32 insert_cache_hit; //# of times Insert found an empty cache entry
UINT32 insert_cache_miss; //# of times Insert already had a matching cache entry
UINT32 insert_cache_collide; //# of times Insert found a used cache entry
UINT32 insert_cache_write; //# of times Insert wrote a cache entry
} stats;
void LogStats();
// Unlocked iterator of entries. Use only when read/write access to the cache
// is safe. This would typically be at GC sync points, currently needed during
// appdomain unloading.
class Iterator
{
public:
Iterator(DispatchCache *pCache);
inline BOOL IsValid()
{ WRAPPER_NO_CONTRACT; return (m_curBucket < (INT32)m_pCache->GetCacheCount()); }
void Next();
// Unlink the current entry.
// **NOTE** Using this method implicitly performs a call to Next to move
// past the unlinked entry. Thus, one could accidentally skip
// entries unless you take this into consideration.
ResolveCacheElem *UnlinkEntry();
inline ResolveCacheElem *Entry()
{ LIMITED_METHOD_CONTRACT; CONSISTENCY_CHECK(IsValid()); return *m_ppCurElem; }
private:
void NextValidBucket();
inline void NextBucket()
{ LIMITED_METHOD_CONTRACT; m_curBucket++; m_ppCurElem = &m_pCache->cache[m_curBucket]; }
DispatchCache *m_pCache;
INT32 m_curBucket;
ResolveCacheElem **m_ppCurElem;
};
private:
#ifdef CHAIN_LOOKUP
Crst m_writeLock;
#endif
//the following hash computation is also inlined in the resolve stub in asm (SO NO TOUCHIE)
inline static UINT16 HashMT(UINT16 tokenHash, void* mt)
{
LIMITED_METHOD_CONTRACT;
UINT16 hash;
size_t mtHash = (size_t) mt;
mtHash = (((mtHash >> CALL_STUB_CACHE_NUM_BITS) + mtHash) >> LOG2_PTRSIZE) & CALL_STUB_CACHE_MASK;
hash = (UINT16) mtHash;
hash ^= (tokenHash & CALL_STUB_CACHE_MASK);
return hash;
}
ResolveCacheElem* cache[CALL_STUB_CACHE_SIZE]; //must be first
ResolveCacheElem* empty; //empty entry, initialized to fail all comparisons
#ifdef STUB_LOGGING
public:
struct CacheEntryData {
UINT32 numWrites;
UINT16 numClears;
};
CacheEntryData cacheData[CALL_STUB_CACHE_SIZE];
#endif // STUB_LOGGING
};
/**************************************************************************************************
The hash tables are accessed via instances of the Prober. Prober is a probe into a bucket
of the hash table, and therefore has an index which is the current probe position.
It includes a count of the number of probes done in that bucket so far and a stride
to step thru the bucket with. To do comparisons, it has a reference to an entry with which
it can do comparisons (Equals(...)) of the entries (stubs) inside the hash table. It also has
the key pair (keyA, keyB) that it is looking for.
Typically, an entry of the appropriate type is created on the stack and then the prober is created passing
in a reference to the entry. The prober is used for a complete operation, such as look for and find an
entry (stub), creating and inserting it as necessary.
The initial index and the stride are orthogonal hashes of the key pair, i.e. we are doing a varient of
double hashing. When we initialize the prober (see FormHash below) we set the initial probe based on
one hash. The stride (used as a modulo addition of the probe position) is based on a different hash and
is such that it will vist every location in the bucket before repeating. Hence it is imperative that
the bucket size and the stride be relative prime wrt each other. We have chosen to make bucket sizes
a power of 2, so we force stride to be odd.
Note -- it must be assumed that multiple probers are walking the same tables and buckets at the same time.
Additionally, the counts may not be accurate, and there may be duplicates in the tables. Since the tables
do not allow concurrrent deletion, some of the concurrency issues are ameliorated.
*/
class Prober
{
friend class FastTable;
friend class BucketTable;
public:
Prober(Entry* e) {LIMITED_METHOD_CONTRACT; comparer = e;}
//find the requested entry, if not there return CALL_STUB_EMPTY_ENTRY
size_t Find();
//add the entry into the bucket, if it is not already in the bucket.
//return the entry actually in the bucket (existing or added)
size_t Add(size_t entry);
private:
//return the bucket (FastTable*) that the prober is currently walking
inline size_t* items() {LIMITED_METHOD_CONTRACT; return &base[-CALL_STUB_FIRST_INDEX];}
//are there more probes possible, or have we probed everything in the bucket
inline BOOL NoMore() {LIMITED_METHOD_CONTRACT; return probes>mask;} //both probes and mask are (-1)
//advance the probe to a new place in the bucket
inline BOOL Next()
{
WRAPPER_NO_CONTRACT;
index = (index + stride) & mask;
probes++;
return !NoMore();
}
inline size_t Read()
{
LIMITED_METHOD_CONTRACT;
_ASSERTE(base);
return VolatileLoad(&base[index]);
}
//initialize a prober across a bucket (table) for the specified keys.
void InitProber(size_t key1, size_t key2, size_t* table);
//set up the initial index and stride and probe count
inline void FormHash()
{
LIMITED_METHOD_CONTRACT;
probes = 0;
//these two hash functions have not been formally measured for effectiveness
//but they are at least orthogonal
size_t a = ((keyA>>16) + keyA);
size_t b = ((keyB>>16) ^ keyB);
index = (((a*CALL_STUB_HASH_CONST1)>>4)+((b*CALL_STUB_HASH_CONST2)>>4)+CALL_STUB_HASH_CONST1) & mask;
stride = ((a+(b*CALL_STUB_HASH_CONST1)+CALL_STUB_HASH_CONST2) | 1) & mask;
}
//atomically grab an empty slot so we can insert a new entry into the bucket
BOOL GrabEntry(size_t entryValue);
size_t keyA; //key pair we are looking for
size_t keyB;
size_t* base; //we have our own pointer to the bucket, so races don't matter.
// We won't care if we do the lookup in an
// outdated bucket (has grown out from under us).
// All that will happen is possibly dropping an entry
// on the floor or adding a duplicate.
size_t index; //current probe point in the bucket
size_t stride; //amount to step on each successive probe, must be relatively prime wrt the bucket size
size_t mask; //size of bucket - 1
size_t probes; //number probes - 1
Entry* comparer;//used to compare an entry against the sought after key pair
};
/********************************************************************************************************
FastTable is used to implement the buckets of a BucketTable, a bucketized hash table. A FastTable is
an array of entries (contents). The first two slots of contents store the size-1 and count of entries
actually in the FastTable. Note that the count may be inaccurate and there may be duplicates. Careful
attention must be paid to eliminate the need for interlocked or serialized or locked operations in face
of concurrency.
*/
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4200) // disable zero-sized array warning
#endif // _MSC_VER
class FastTable
{
friend class BucketTable;
public:
private:
FastTable() { LIMITED_METHOD_CONTRACT; }
~FastTable() { LIMITED_METHOD_CONTRACT; }
//initialize a prober for the specified keys.
inline BOOL SetUpProber(size_t keyA, size_t keyB, Prober* probe)
{
CONTRACTL {
NOTHROW;
GC_NOTRIGGER;
FORBID_FAULT;
} CONTRACTL_END;
_ASSERTE(probe);
_ASSERTE(contents);
probe->InitProber(keyA, keyB, &contents[0]);
return TRUE;
}
//find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY
size_t Find(Prober* probe);
//add the entry, if it is not already there. Probe is used to search.
//Return the entry actually containted (existing or added)
size_t Add(size_t entry, Prober* probe);
void IncrementCount();
// Create a FastTable with space for numberOfEntries. Please note that this method
// does not throw on OOM. **YOU MUST CHECK FOR NULL RETURN**
static FastTable* MakeTable(size_t numberOfEntries)
{
CONTRACTL {
THROWS;
GC_TRIGGERS;
INJECT_FAULT(COMPlusThrowOM(););
} CONTRACTL_END;
size_t size = CALL_STUB_MIN_ENTRIES;
while (size < numberOfEntries) {size = size<<1;}
// if (size == CALL_STUB_MIN_ENTRIES)
// size += 3;
size_t* bucket = new size_t[(sizeof(FastTable)/sizeof(size_t))+size+CALL_STUB_FIRST_INDEX];
FastTable* table = new (bucket) FastTable();
table->InitializeContents(size);
return table;
}
//Initialize as empty
void InitializeContents(size_t size)
{
LIMITED_METHOD_CONTRACT;
memset(&contents[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(BYTE*));
contents[CALL_STUB_MASK_INDEX] = size-1;
}
inline size_t tableMask() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_MASK_INDEX]);}
inline size_t tableSize() {LIMITED_METHOD_CONTRACT; return tableMask()+1;}
inline size_t tableCount() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_COUNT_INDEX]);}
inline BOOL isFull()
{
LIMITED_METHOD_CONTRACT;
return (tableCount()+1) * 100 / CALL_STUB_LOAD_FACTOR >= tableSize();
}
//we store (size-1) in bucket[CALL_STUB_MASK_INDEX==0],
//we store the used count in bucket[CALL_STUB_COUNT_INDEX==1],
//we have an unused cell to use as a temp at bucket[CALL_STUB_DEAD_LINK==2],
//and the table starts at bucket[CALL_STUB_FIRST_INDEX==3],
size_t contents[];
};
#ifdef _MSC_VER
#pragma warning(pop)
#endif
/******************************************************************************************************
BucketTable is a bucketized hash table. It uses FastTables for its buckets. The hash tables
used by the VirtualCallStubManager are BucketTables. The number of buckets is fixed at the time
the table is created. The actual buckets are allocated as needed, and grow as necessary. The reason
for using buckets is primarily to reduce the cost of growing, since only a single bucket is actually
grown at any given time. Since the hash tables are accessed infrequently, the load factor that
controls growth is quite high (90%). Since we use hashing to pick the bucket, and we use hashing to
lookup inside the bucket, it is important that the hashing function used here is orthogonal to the ones
used in the buckets themselves (see FastTable::FormHash).
*/
class BucketTable
{
public:
BucketTable(size_t numberOfBuckets)
{
WRAPPER_NO_CONTRACT;
size_t size = CALL_STUB_MIN_BUCKETS;
while (size < numberOfBuckets) {size = size<<1;}
buckets = AllocateBuckets(size);
// Initialize statistics counters
memset(&stats, 0, sizeof(stats));
}
~BucketTable()
{
LIMITED_METHOD_CONTRACT;
if(buckets != NULL)
{
size_t size = bucketCount()+CALL_STUB_FIRST_INDEX;
for(size_t ix = CALL_STUB_FIRST_INDEX; ix < size; ix++) delete (FastTable*)(buckets[ix]);
delete buckets;
}
}
//initialize a prober for the specified keys.
BOOL SetUpProber(size_t keyA, size_t keyB, Prober *prober);
//find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY
inline size_t Find(Prober* probe) {WRAPPER_NO_CONTRACT; return probe->Find();}
//add the entry, if it is not already there. Probe is used to search.
size_t Add(size_t entry, Prober* probe);
//reclaim abandoned buckets. Buckets are abaondoned when they need to grow.
//needs to be called inside a gc sync point.
static void Reclaim();
struct
{
UINT32 bucket_space; //# of bytes in caches and tables, not including the stubs themselves
UINT32 bucket_space_dead; //# of bytes of abandoned buckets not yet recycled.
} stats;
void LogStats();
private:
inline size_t bucketMask() {LIMITED_METHOD_CONTRACT; return (size_t) (buckets[CALL_STUB_MASK_INDEX]);}
inline size_t bucketCount() {LIMITED_METHOD_CONTRACT; return bucketMask()+1;}
inline size_t ComputeBucketIndex(size_t keyA, size_t keyB)
{
LIMITED_METHOD_CONTRACT;
size_t a = ((keyA>>16) + keyA);
size_t b = ((keyB>>16) ^ keyB);
return CALL_STUB_FIRST_INDEX+(((((a*CALL_STUB_HASH_CONST2)>>5)^((b*CALL_STUB_HASH_CONST1)>>5))+CALL_STUB_HASH_CONST2) & bucketMask());
}
//grows the bucket referenced by probe.
BOOL GetMoreSpace(const Prober* probe);
//creates storage in which to store references to the buckets
static size_t* AllocateBuckets(size_t size)
{
LIMITED_METHOD_CONTRACT;
size_t* buckets = new size_t[size+CALL_STUB_FIRST_INDEX];
if (buckets != NULL)
{
memset(&buckets[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(void*));
buckets[CALL_STUB_MASK_INDEX] = size-1;
}
return buckets;
}
inline size_t Read(size_t index)
{
LIMITED_METHOD_CONTRACT;
CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX);
return VolatileLoad(&buckets[index]);
}
#ifdef _MSC_VER
#pragma warning(disable: 4267) //work-around for the compiler
#endif
inline void Write(size_t index, size_t value)
{
LIMITED_METHOD_CONTRACT;
CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX);
VolatileStore(&buckets[index], value);
}
#ifdef _MSC_VER
#pragma warning(default: 4267)
#endif
// We store (#buckets-1) in bucket[CALL_STUB_MASK_INDEX ==0]
// We have two unused cells at bucket[CALL_STUB_COUNT_INDEX ==1]
// and bucket[CALL_STUB_DEAD_LINK ==2]
// and the table starts at bucket[CALL_STUB_FIRST_INDEX ==3]
// the number of elements is bucket[CALL_STUB_MASK_INDEX]+CALL_STUB_FIRST_INDEX
size_t* buckets;
static FastTable* dead; //linked list head of to be deleted (abandoned) buckets
};
#endif // !_VIRTUAL_CALL_STUB_H
|