summaryrefslogtreecommitdiff
path: root/src/vm/i386/virtualcallstubcpu.hpp
blob: f7a5fb42b97a6d1aff42da34f32fa778dcbf1cfe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
//
//
// File: virtualcallstubcpu.hpp
//


//

//
// ============================================================================

#ifndef _VIRTUAL_CALL_STUB_X86_H
#define _VIRTUAL_CALL_STUB_X86_H

#ifdef DECLARE_DATA
#include "asmconstants.h"
#ifdef FEATURE_REMOTING
#include "remoting.h"
#endif
#endif

#include <pshpack1.h>  // Since we are placing code, we want byte packing of the structs

#define USES_LOOKUP_STUBS	1

/*********************************************************************************************
Stubs that contain code are all part of larger structs called Holders.  There is a
Holder for each kind of stub, i.e XXXStub is contained with XXXHolder.  Holders are
essentially an implementation trick that allowed rearranging the code sequences more
easily while trying out different alternatives, and for dealing with any alignment 
issues in a way that was mostly immune to the actually code sequences.  These Holders
should be revisited when the stub code sequences are fixed, since in many cases they
add extra space to a stub that is not really needed.  

Stubs are placed in cache and hash tables.  Since unaligned access of data in memory
is very slow, the keys used in those tables should be aligned.  The things used as keys
typically also occur in the generated code, e.g. a token as an immediate part of an instruction.
For now, to avoid alignment computations as different code strategies are tried out, the key
fields are all in the Holders.  Eventually, many of these fields should be dropped, and the instruction
streams aligned so that the immediate fields fall on aligned boundaries.  
*/

#if USES_LOOKUP_STUBS

struct LookupStub;
struct LookupHolder;

/*LookupStub**************************************************************************************
Virtual and interface call sites are initially setup to point at LookupStubs.  
This is because the runtime type of the <this> pointer is not yet known, 
so the target cannot be resolved.  Note: if the jit is able to determine the runtime type 
of the <this> pointer, it should be generating a direct call not a virtual or interface call.
This stub pushes a lookup token onto the stack to identify the sought after method, and then 
jumps into the EE (VirtualCallStubManager::ResolveWorkerStub) to effectuate the lookup and
transfer of control to the appropriate target method implementation, perhaps patching of the call site
along the way to point to a more appropriate stub.  Hence callsites that point to LookupStubs 
get quickly changed to point to another kind of stub.
*/
struct LookupStub
{
    inline PCODE entryPoint()           { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
    inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
    inline size_t       size()          { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }

private:
    friend struct LookupHolder;

    // DispatchStub:: _entryPoint expects:
    //       ecx: object (the "this" pointer)
    //       eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
    BYTE    _entryPoint [2];    // 50           push    eax             ;save siteAddrForRegisterIndirect - this may be an indirect call
                                // 68           push
    size_t  _token;             // xx xx xx xx          32-bit constant
#ifdef STUB_LOGGING
    BYTE cntr2[2];              // ff 05        inc
    size_t* c_lookup;           // xx xx xx xx          [call_lookup_counter]
#endif //STUB_LOGGING 
    BYTE part2 [1];             // e9           jmp
    DISPL   _resolveWorkerDispl;// xx xx xx xx          pc-rel displ
};

/* LookupHolders are the containers for LookupStubs, they provide for any alignment of 
stubs as necessary.  In the case of LookupStubs, alignment is necessary since
LookupStubs are placed in a hash table keyed by token. */
struct LookupHolder
{
    static void InitializeStatic();

    void  Initialize(PCODE resolveWorkerTarget, size_t dispatchToken);

    LookupStub*    stub()               { LIMITED_METHOD_CONTRACT;  return &_stub;    }

    static LookupHolder*  FromLookupEntry(PCODE lookupEntry);

private:
    friend struct LookupStub;

    BYTE align[(sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*)))%sizeof(void*)];
    LookupStub _stub;
    BYTE pad[sizeof(void*) -
             ((sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*))) +
              (sizeof(LookupStub))
             ) % sizeof(void*)];    //complete DWORD

    static_assert_no_msg((sizeof(void*) -
             ((sizeof(void*)-(offsetof(LookupStub,_token)%sizeof(void*))) +
              (sizeof(LookupStub))
             ) % sizeof(void*)) != 0);
};

#endif // USES_LOOKUP_STUBS

struct DispatchStub;
struct DispatchHolder;

/*DispatchStub**************************************************************************************
Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs.
A dispatch stub has an expected type (expectedMT), target address (target) and fail address (failure).  
If the calling frame does in fact have the <this> type be of the expected type, then
control is transfered to the target address, the method implementation.  If not, 
then control is transfered to the fail address, a fail stub (see below) where a polymorphic 
lookup is done to find the correct address to go to.  

implementation note: Order, choice of instructions, and branch directions
should be carefully tuned since it can have an inordinate effect on performance.  Particular
attention needs to be paid to the effects on the BTB and branch prediction, both in the small
and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
Note that since this stub is only used for mostly monomorphic callsites (ones that are not, get patched
to something else), therefore the conditional jump "jne failure" is mostly not taken, and hence it is important
that the branch prediction staticly predict this, which means it must be a forward jump.  The alternative 
is to reverse the order of the jumps and make sure that the resulting conditional jump "je implTarget" 
is statically predicted as taken, i.e a backward jump. The current choice was taken since it was easier
to control the placement of the stubs than control the placement of the jitted code and the stubs. */
struct DispatchStub 
{
    inline PCODE        entryPoint()  { LIMITED_METHOD_CONTRACT;  return (PCODE)&_entryPoint[0]; }

    inline size_t       expectedMT()  { LIMITED_METHOD_CONTRACT;  return _expectedMT;     }
    inline PCODE        implTarget()  { LIMITED_METHOD_CONTRACT;  return (PCODE) &_implDispl + sizeof(DISPL) + _implDispl; }
    inline PCODE        failTarget()  { LIMITED_METHOD_CONTRACT;  return (PCODE) &_failDispl + sizeof(DISPL) + _failDispl; }
    inline size_t       size()        { LIMITED_METHOD_CONTRACT;  return sizeof(DispatchStub); }

private:
    friend struct DispatchHolder;

    // DispatchStub:: _entryPoint expects:
    //       ecx: object (the "this" pointer)
    //       eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
#ifndef STUB_LOGGING
    BYTE    _entryPoint [2];    // 81 39        cmp  [ecx],                   ; This is the place where we are going to fault on null this.
    size_t  _expectedMT;        // xx xx xx xx              expectedMT        ; If you change it, change also AdjustContextForVirtualStub in excep.cpp!!!
    BYTE    jmpOp1[2];          // 0f 85        jne                 
    DISPL   _failDispl;         // xx xx xx xx              failEntry         ;must be forward jmp for perf reasons
    BYTE jmpOp2;                // e9           jmp     
    DISPL   _implDispl;         // xx xx xx xx              implTarget
#else //STUB_LOGGING
    BYTE    _entryPoint [2];    // ff 05        inc
    size_t* d_call;             // xx xx xx xx              [call_mono_counter]
    BYTE cmpOp [2];             // 81 39        cmp  [ecx],
    size_t  _expectedMT;        // xx xx xx xx              expectedMT
    BYTE jmpOp1[2];             // 0f 84        je 
    DISPL   _implDispl;         // xx xx xx xx              implTarget        ;during logging, perf is not so important               
    BYTE fail [2];              // ff 05        inc 
    size_t* d_miss;             // xx xx xx xx      [miss_mono_counter]
    BYTE jmpFail;               // e9           jmp     
    DISPL   _failDispl;         // xx xx xx xx              failEntry 
#endif //STUB_LOGGING 
};

/* DispatchHolders are the containers for DispatchStubs, they provide for any alignment of 
stubs as necessary.  DispatchStubs are placed in a hashtable and in a cache.  The keys for both
are the pair expectedMT and token.  Efficiency of the of the hash table is not a big issue,
since lookups in it are fairly rare.  Efficiency of the cache is paramount since it is accessed frequently
o(see ResolveStub below).  Currently we are storing both of these fields in the DispatchHolder to simplify
alignment issues.  If inlineMT in the stub itself was aligned, then it could be the expectedMT field.
While the token field can be logically gotten by following the failure target to the failEntryPoint 
of the ResolveStub and then to the token over there, for perf reasons of cache access, it is duplicated here.
This allows us to use DispatchStubs in the cache.  The alternative is to provide some other immutable struct
for the cache composed of the triplet (expectedMT, token, target) and some sort of reclaimation scheme when
they are thrown out of the cache via overwrites (since concurrency will make the obvious approaches invalid).
*/

/* @workaround for ee resolution - Since the EE does not currently have a resolver function that
does what we want, see notes in implementation of VirtualCallStubManager::Resolver, we are 
using dispatch stubs to siumulate what we want.  That means that inlineTarget, which should be immutable
is in fact written.  Hence we have moved target out into the holder and aligned it so we can 
atomically update it.  When we get a resolver function that does what we want, we can drop this field,
and live with just the inlineTarget field in the stub itself, since immutability will hold.*/
struct DispatchHolder
{
    static void InitializeStatic();

    void  Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT);

    DispatchStub* stub()      { LIMITED_METHOD_CONTRACT;  return &_stub; }

    static DispatchHolder*  FromDispatchEntry(PCODE dispatchEntry);

private:
    //force expectedMT to be aligned since used as key in hash tables.
#ifndef STUB_LOGGING
    BYTE align[(sizeof(void*)-(offsetof(DispatchStub,_expectedMT)%sizeof(void*)))%sizeof(void*)];
#endif
    DispatchStub _stub;
    BYTE pad[(sizeof(void*)-(sizeof(DispatchStub)%sizeof(void*))+offsetof(DispatchStub,_expectedMT))%sizeof(void*)];	//complete DWORD
};

struct ResolveStub;
struct ResolveHolder;

/*ResolveStub**************************************************************************************
Polymorphic call sites and monomorphic calls that fail end up in a ResolverStub.  There is only 
one resolver stub built for any given token, even though there may be many call sites that
use that token and many distinct <this> types that are used in the calling call frames.  A resolver stub 
actually has two entry points, one for polymorphic call sites and one for dispatch stubs that fail on their
expectedMT test.  There is a third part of the resolver stub that enters the ee when a decision should
be made about changing the callsite.  Therefore, we have defined the resolver stub as three distinct pieces,
even though they are actually allocated as a single contiguous block of memory.  These pieces are:

A ResolveStub has two entry points:

FailEntry - where the dispatch stub goes if the expected MT test fails.  This piece of the stub does
a check to see how often we are actually failing. If failures are frequent, control transfers to the 
patch piece to cause the call site to be changed from a mostly monomorphic callsite 
(calls dispatch stub) to a polymorphic callsize (calls resolve stub).  If failures are rare, control
transfers to the resolve piece (see ResolveStub).  The failEntryPoint decrements a counter 
every time it is entered.  The ee at various times will add a large chunk to the counter. 

ResolveEntry - does a lookup via in a cache by hashing the actual type of the calling frame s
<this> and the token identifying the (contract,method) pair desired.  If found, control is transfered
to the method implementation.  If not found in the cache, the token is pushed and the ee is entered via
the ResolveWorkerStub to do a full lookup and eventual transfer to the correct method implementation.  Since
there is a different resolve stub for every token, the token can be inlined and the token can be pre-hashed.
The effectiveness of this approach is highly sensitive to the effectiveness of the hashing algorithm used,
as well as its speed.  It turns out it is very important to make the hash function sensitive to all 
of the bits of the method table, as method tables are laid out in memory in a very non-random way.  Before
making any changes to the code sequences here, it is very important to measure and tune them as perf
can vary greatly, in unexpected ways, with seeming minor changes.

Implementation note - Order, choice of instructions, and branch directions
should be carefully tuned since it can have an inordinate effect on performance.  Particular
attention needs to be paid to the effects on the BTB and branch prediction, both in the small
and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions. 
Note that this stub is called in highly polymorphic cases, but the cache should have been sized
and the hash function chosen to maximize the cache hit case.  Hence the cmp/jcc instructions should
mostly be going down the cache hit route, and it is important that this be statically predicted as so.
Hence the 3 jcc instrs need to be forward jumps.  As structured, there is only one jmp/jcc that typically
gets put in the BTB since all the others typically fall straight thru.  Minimizing potential BTB entries
is important. */

struct ResolveStub 
{
    inline PCODE failEntryPoint()           { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[0];    }
    inline PCODE resolveEntryPoint()        { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; }
    inline PCODE slowEntryPoint()           { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; }

    inline INT32* pCounter()                { LIMITED_METHOD_CONTRACT; return _pCounter; }
    inline UINT32 hashedToken()             { LIMITED_METHOD_CONTRACT; return _hashedToken >> LOG2_PTRSIZE;    }
    inline size_t cacheAddress()            { LIMITED_METHOD_CONTRACT; return _cacheAddress;   }
    inline size_t token()                   { LIMITED_METHOD_CONTRACT; return _token;          }
    inline size_t size()                    { LIMITED_METHOD_CONTRACT; return sizeof(ResolveStub); }

private:
    friend struct ResolveHolder;

    // ResolveStub::_failEntryPoint expects:
    //       ecx: object (the "this" pointer)
    //       eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
    BYTE   _failEntryPoint [2];     // 83 2d        sub
    INT32* _pCounter;               // xx xx xx xx          [counter],
    BYTE   part0 [2];               // 01                   01
                                    // 7c           jl
    BYTE toPatcher;                 // xx                   backpatcher     ;must be forward jump, for perf reasons
                                    //                                      ;fall into the resolver stub

    // ResolveStub::_resolveEntryPoint expects:
    //       ecx: object (the "this" pointer)
    //       eax: siteAddrForRegisterIndirect if this is a RegisterIndirect dispatch call
    BYTE    _resolveEntryPoint[6];  // 50           push    eax             ;save siteAddrForRegisterIndirect - this may be an indirect call
                                    // 8b 01        mov     eax,[ecx]       ;get the method table from the "this" pointer. This is the place
                                    //                                      ;    where we are going to fault on null this. If you change it,
                                    //                                      ;    change also AdjustContextForVirtualStub in excep.cpp!!!
                                    // 52           push    edx            
                                    // 8b d0        mov     edx, eax
    BYTE    part1 [6];              // c1 e8 0C     shr     eax,12          ;we are adding upper bits into lower bits of mt
                                    // 03 c2        add     eax,edx
                                    // 35           xor     eax,
    UINT32  _hashedToken;           // xx xx xx xx              hashedToken ;along with pre-hashed token
    BYTE    part2 [1];              // 25           and     eax,
    size_t mask;                    // xx xx xx xx              cache_mask
    BYTE part3 [2];                 // 8b 80        mov     eax, [eax+
    size_t  _cacheAddress;          // xx xx xx xx                lookupCache]
#ifdef STUB_LOGGING
    BYTE cntr1[2];                  // ff 05        inc
    size_t* c_call;                 // xx xx xx xx          [call_cache_counter]
#endif //STUB_LOGGING 
    BYTE part4 [2];                 // 3b 10        cmp     edx,[eax+
    // BYTE mtOffset;               //                          ResolverCacheElem.pMT]
    BYTE part5 [1];                 // 75           jne
    BYTE toMiss1;                   // xx                   miss            ;must be forward jump, for perf reasons
    BYTE part6 [2];                 // 81 78        cmp     [eax+
    BYTE tokenOffset;               // xx                        ResolverCacheElem.token],
    size_t  _token;                 // xx xx xx xx              token
    BYTE part7 [1];                 // 75           jne
    BYTE toMiss2;                   // xx                   miss            ;must be forward jump, for perf reasons
    BYTE part8 [2];                 // 8B 40 xx     mov     eax,[eax+
    BYTE targetOffset;              //                          ResolverCacheElem.target]
    BYTE part9 [6];                 // 5a           pop     edx
                                    // 83 c4 04     add     esp,4           ;throw away siteAddrForRegisterIndirect - we don't need it now
                                    // ff e0        jmp     eax
                                    //         miss:
    BYTE    miss [1];               // 5a           pop     edx             ; don't pop siteAddrForRegisterIndirect - leave it on the stack for use by ResolveWorkerChainLookupAsmStub and/or ResolveWorkerAsmStub
    BYTE    _slowEntryPoint[1];     // 68           push
    size_t  _tokenPush;             // xx xx xx xx          token
#ifdef STUB_LOGGING
    BYTE cntr2[2];                  // ff 05        inc
    size_t* c_miss;                 // xx xx xx xx          [miss_cache_counter]
#endif //STUB_LOGGING
    BYTE part10 [1];                // e9           jmp
    DISPL   _resolveWorkerDispl;    // xx xx xx xx          resolveWorker == ResolveWorkerChainLookupAsmStub or ResolveWorkerAsmStub
    BYTE  patch[1];                 // e8           call
    DISPL _backpatcherDispl;        // xx xx xx xx          backpatcherWorker  == BackPatchWorkerAsmStub
    BYTE  part11 [1];               // eb           jmp
    BYTE toResolveStub;             // xx                   resolveStub, i.e. go back to _resolveEntryPoint
};

/* ResolveHolders are the containers for ResolveStubs,  They provide 
for any alignment of the stubs as necessary. The stubs are placed in a hash table keyed by 
the token for which they are built.  Efficiency of access requires that this token be aligned.  
For now, we have copied that field into the ResolveHolder itself, if the resolve stub is arranged such that
any of its inlined tokens (non-prehashed) is aligned, then the token field in the ResolveHolder
is not needed. */ 
struct ResolveHolder
{
    static void  InitializeStatic();

    void  Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget, 
                     size_t dispatchToken, UINT32 hashedToken,
                     void * cacheAddr, INT32 * counterAddr);

    ResolveStub* stub()      { LIMITED_METHOD_CONTRACT;  return &_stub; }

    static ResolveHolder*  FromFailEntry(PCODE failEntry);
    static ResolveHolder*  FromResolveEntry(PCODE resolveEntry);

private:
    //align _token in resolve stub

    BYTE align[(sizeof(void*)-((offsetof(ResolveStub,_token))%sizeof(void*)))%sizeof(void*)
#ifdef STUB_LOGGING // This turns out to be zero-sized in stub_logging case, and is an error. So round up.
               +sizeof(void*)
#endif
              ];

    ResolveStub _stub;

//#ifdef STUB_LOGGING // This turns out to be zero-sized in non stub_logging case, and is an error. So remove
    BYTE pad[(sizeof(void*)-((sizeof(ResolveStub))%sizeof(void*))+offsetof(ResolveStub,_token))%sizeof(void*)];	//fill out DWORD
//#endif
};
#include <poppack.h>


#ifdef DECLARE_DATA

#ifndef DACCESS_COMPILE

#ifdef _MSC_VER

#ifdef CHAIN_LOOKUP
/* This will perform a chained lookup of the entry if the initial cache lookup fails

   Entry stack:
            dispatch token
            siteAddrForRegisterIndirect (used only if this is a RegisterIndirect dispatch call)
            return address of caller to stub
        Also, EAX contains the pointer to the first ResolveCacheElem pointer for the calculated
        bucket in the cache table.
*/
__declspec (naked) void ResolveWorkerChainLookupAsmStub()
{
    enum
    {
        e_token_size                = 4,
        e_indirect_addr_size        = 4,
        e_caller_ret_addr_size      = 4,
    };
    enum
    {
        // this is the part of the stack that is present as we enter this function:
        e_token                     = 0,
        e_indirect_addr             = e_token + e_token_size,
        e_caller_ret_addr           = e_indirect_addr + e_indirect_addr_size,
        e_ret_esp                   = e_caller_ret_addr + e_caller_ret_addr_size,
    };
    enum
    {
        e_spilled_reg_size          = 8,
    };

    // main loop setup
    __asm {
#ifdef STUB_LOGGING
        inc     g_chained_lookup_call_counter
#endif
        // spill regs
        push    edx
        push    ecx
        // move the token into edx
        mov     edx,[esp+e_spilled_reg_size+e_token]
        // move the MT into ecx
        mov     ecx,[ecx]
    }
    main_loop:
    __asm {
        // get the next entry in the chain (don't bother checking the first entry again)
        mov     eax,[eax+e_resolveCacheElem_offset_next]
        // test if we hit a terminating NULL
        test    eax,eax
        jz      fail
        // compare the MT of the ResolveCacheElem
        cmp     ecx,[eax+e_resolveCacheElem_offset_mt]
        jne     main_loop
        // compare the token of the ResolveCacheElem
        cmp     edx,[eax+e_resolveCacheElem_offset_token]
        jne     main_loop
        // success
        // decrement success counter and move entry to start if necessary
        sub     g_dispatch_cache_chain_success_counter,1
        //@TODO: Perhaps this should be a jl for better branch prediction?
        jge     nopromote
        // be quick to reset the counter so we don't get a bunch of contending threads
        add     g_dispatch_cache_chain_success_counter,CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT
        // promote the entry to the beginning of the chain
        mov     ecx,eax
        call    VirtualCallStubManager::PromoteChainEntry
    }
    nopromote:
    __asm {
        // clean up the stack and jump to the target
        pop     ecx
        pop     edx
        add     esp,(e_caller_ret_addr - e_token)
        mov     eax,[eax+e_resolveCacheElem_offset_target]
        jmp     eax
    }
    fail:
    __asm {
#ifdef STUB_LOGGING
        inc     g_chained_lookup_miss_counter
#endif
        // restore registers
        pop     ecx
        pop     edx
        jmp     ResolveWorkerAsmStub
    }
}
#endif 

/* Call the resolver, it will return where we are supposed to go.
   There is a little stack magic here, in that we are entered with one
   of the arguments for the resolver (the token) on the stack already.
   We just push the other arguments, <this> in the call frame and the call site pointer, 
   and call the resolver.
   
   On return we have the stack frame restored to the way it was when the ResolveStub
   was called, i.e. as it was at the actual call site.  The return value from
   the resolver is the address we need to transfer control to, simulating a direct
   call from the original call site.  If we get passed back NULL, it means that the
   resolution failed, an unimpelemented method is being called.

   Entry stack:
            dispatch token
            siteAddrForRegisterIndirect (used only if this is a RegisterIndirect dispatch call)
            return address of caller to stub

   Call stack:
            pointer to TransitionBlock
            call site
            dispatch token
            TransitionBlock
                ArgumentRegisters (ecx, edx)
                CalleeSavedRegisters (ebp, ebx, esi, edi)
            return address of caller to stub
   */
__declspec (naked) void ResolveWorkerAsmStub()
{
    CANNOT_HAVE_CONTRACT;

    __asm {
        //
        // The stub arguments are where we want to setup the TransitionBlock. We will
        // setup the TransitionBlock later once we can trash them
        //
        // push ebp-frame
        // push      ebp
        // mov       ebp,esp

        // save CalleeSavedRegisters
        // push      ebx

        push        esi
        push        edi

        // push ArgumentRegisters
        push        ecx
        push        edx

        mov         esi, esp

        push        [esi + 4*4]     // dispatch token
        push        [esi + 5*4]     // siteAddrForRegisterIndirect
        push        esi             // pTransitionBlock

        // Setup up proper EBP frame now that the stub arguments can be trashed
        mov         [esi + 4*4],ebx
        mov         [esi + 5*4],ebp
        lea         ebp, [esi + 5*4]

        // Make the call
        call        VSD_ResolveWorker

        // From here on, mustn't trash eax
        
        // pop ArgumentRegisters
        pop     edx
        pop     ecx

        // pop CalleeSavedRegisters
        pop edi
        pop esi
        pop ebx
        pop ebp
        
        // Now jump to the target
        jmp     eax             // continue on into the method
    }
}

#ifdef FEATURE_REMOTING
/*  For an in-context dispatch, we will find the target. This
    is the slow path, and erects a MachState structure for 
    creating a HelperMethodFrame

    Entry stack:
            dispatch token
            return address of caller to stub

   Call stack:
            pointer to StubDispatchFrame
            call site
            dispatch token
            StubDispatchFrame
                GSCookie
                negspace
                vptr
                datum
                ArgumentRegisters (ecx, edx)
                CalleeSavedRegisters (ebp, ebx, esi, edi)
            return address of caller to stub
*/    
__declspec (naked) void InContextTPDispatchAsmStub()
{
    CANNOT_HAVE_CONTRACT;

    __asm {
        // Pop dispatch token
        pop         eax

        // push ebp-frame
        push        ebp
        mov         ebp,esp

        // save CalleeSavedRegisters
        push        ebx
        push        esi
        push        edi

        // push ArgumentRegisters
        push        ecx
        push        edx

        mov         esi, esp

        push        eax                     // token
        push        esi                     // pTransitionContext

        // Make the call
        call    VSD_GetTargetForTPWorker

        // From here on, mustn't trash eax
        
        // pop ArgumentRegisters
        pop     edx
        pop     ecx

        // pop CalleeSavedRegisters
        pop edi
        pop esi
        pop ebx
        pop ebp

        // Now jump to the target
        jmp     eax             // continue on into the method
    }
}

/*  For an in-context dispatch, we will try to find the target in
    the resolve cache. If this fails, we will jump to the full
    version of InContextTPDispatchAsmStub
    
    Entry stack:
        dispatch slot number of interface MD
        caller return address
    ECX: this object
*/    
__declspec (naked) void InContextTPQuickDispatchAsmStub()
{
    CANNOT_HAVE_CONTRACT;

    __asm {
        // Spill registers
        push        ecx
        push        edx

        // Arg 2 -  token
        mov         eax, [esp + 8]
        push        eax

        // Arg 1 - this
        push        ecx

        // Make the call
        call        VSD_GetTargetForTPWorkerQuick

        // Restore registers
        pop         edx
        pop         ecx

        // Test to see if we found a target
        test        eax, eax
        jnz         TargetFound

        // If no target, jump to the slow worker
        jmp         InContextTPDispatchAsmStub

    TargetFound:
        // We got a target, so pop off the token and jump to it
        add         esp,4
        jmp         eax
    }
}
#endif // FEATURE_REMOTING

/* Call the callsite back patcher.  The fail stub piece of the resolver is being
call too often, i.e. dispatch stubs are failing the expect MT test too often.
In this stub wraps the call to the BackPatchWorker to take care of any stack magic
needed.
*/
__declspec (naked) void BackPatchWorkerAsmStub()
{
    CANNOT_HAVE_CONTRACT;

    __asm {
        push EBP
        mov ebp,esp
        push EAX        // it may contain siteAddrForRegisterIndirect
        push ECX
        push EDX
        push EAX        //  push any indirect call address as the second arg to BackPatchWorker
        push [EBP+8]    //  and push return address as the first arg to BackPatchWorker
        call VirtualCallStubManager::BackPatchWorkerStatic
        pop EDX
        pop ECX
        pop EAX
        mov esp,ebp
        pop ebp
        ret
    }
}

#endif // _MSC_VER

#ifdef _DEBUG
//
// This function verifies that a pointer to an indirection cell lives inside a delegate object.
// In the delegate case the indirection cell is held by the delegate itself in _methodPtrAux, when the delegate Invoke is
// called the shuffle thunk is first invoked and that will call into the virtual dispatch stub.
// Before control is given to the virtual dispatch stub a pointer to the indirection cell (thus an interior pointer to the delegate)
// is pushed in EAX
//
BOOL isDelegateCall(BYTE *interiorPtr)
{
    LIMITED_METHOD_CONTRACT;

    if (GCHeap::GetGCHeap()->IsHeapPointer((void*)interiorPtr))
    {
        Object *delegate = (Object*)(interiorPtr - DelegateObject::GetOffsetOfMethodPtrAux());
        VALIDATEOBJECTREF(ObjectToOBJECTREF(delegate));
        _ASSERTE(delegate->GetMethodTable()->IsDelegate());

        return TRUE;
    }
    return FALSE;
}
#endif

StubCallSite::StubCallSite(TADDR siteAddrForRegisterIndirect, PCODE returnAddr)
{
    LIMITED_METHOD_CONTRACT;

    // Not used
    // if (isCallRelative(returnAddr))
    // {
    //      m_siteAddr = returnAddr - sizeof(DISPL);
    // }
    // else
    if (isCallRelativeIndirect((BYTE *)returnAddr))
    {
        m_siteAddr = *dac_cast<PTR_PTR_PCODE>(returnAddr - sizeof(PCODE));
    }
    else
    {
        _ASSERTE(isCallRegisterIndirect((BYTE *)returnAddr) || isDelegateCall((BYTE *)siteAddrForRegisterIndirect));
        m_siteAddr = dac_cast<PTR_PCODE>(siteAddrForRegisterIndirect);
    }
}

// the special return address for VSD tailcalls
extern "C" void STDCALL JIT_TailCallReturnFromVSD();

PCODE StubCallSite::GetCallerAddress()
{
    LIMITED_METHOD_CONTRACT; 
    if (m_returnAddr != (PCODE)JIT_TailCallReturnFromVSD)
        return m_returnAddr;

    // Find the tailcallframe in the frame chain and get the actual caller from the first TailCallFrame
    return TailCallFrame::FindTailCallFrame(GetThread()->GetFrame())->GetCallerAddress();
}

#ifdef STUB_LOGGING
extern size_t g_lookup_inline_counter;
extern size_t g_mono_call_counter;
extern size_t g_mono_miss_counter;
extern size_t g_poly_call_counter;
extern size_t g_poly_miss_counter;
#endif

/* Template used to generate the stub.  We generate a stub by allocating a block of 
   memory and copy the template over it and just update the specific fields that need 
   to be changed.
*/ 
LookupStub lookupInit;

void LookupHolder::InitializeStatic()
{
    static_assert_no_msg(((offsetof(LookupStub, _token)+offsetof(LookupHolder, _stub)) % sizeof(void*)) == 0);
    static_assert_no_msg((sizeof(LookupHolder) % sizeof(void*)) == 0);

    lookupInit._entryPoint [0]     = 0x50;
    lookupInit._entryPoint [1]     = 0x68;
    static_assert_no_msg(sizeof(lookupInit._entryPoint) == 2);
    lookupInit._token              = 0xcccccccc;
#ifdef STUB_LOGGING
    lookupInit.cntr2 [0]           = 0xff;
    lookupInit.cntr2 [1]           = 0x05;
    static_assert_no_msg(sizeof(lookupInit.cntr2) == 2);
    lookupInit.c_lookup            = &g_call_lookup_counter;
#endif //STUB_LOGGING 
    lookupInit.part2 [0]           = 0xe9;
    static_assert_no_msg(sizeof(lookupInit.part2) == 1);
    lookupInit._resolveWorkerDispl = 0xcccccccc;
}

void  LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken)
{
    _stub = lookupInit;

    //fill in the stub specific fields
    //@TODO: Get rid of this duplication of data.
    _stub._token              = dispatchToken;
    _stub._resolveWorkerDispl = resolveWorkerTarget - ((PCODE) &_stub._resolveWorkerDispl + sizeof(DISPL));
}

LookupHolder* LookupHolder::FromLookupEntry(PCODE lookupEntry)
{ 
    LIMITED_METHOD_CONTRACT;
    LookupHolder* lookupHolder = (LookupHolder*) ( lookupEntry - offsetof(LookupHolder, _stub) - offsetof(LookupStub, _entryPoint)  );
    //    _ASSERTE(lookupHolder->_stub._entryPoint[0] == lookupInit._entryPoint[0]);
    return lookupHolder;
}


/* Template used to generate the stub.  We generate a stub by allocating a block of 
   memory and copy the template over it and just update the specific fields that need 
   to be changed.
*/ 
DispatchStub dispatchInit;

void DispatchHolder::InitializeStatic()
{
    // Check that _expectedMT is aligned in the DispatchHolder
    static_assert_no_msg(((offsetof(DispatchHolder, _stub) + offsetof(DispatchStub,_expectedMT)) % sizeof(void*)) == 0);
    static_assert_no_msg((sizeof(DispatchHolder) % sizeof(void*)) == 0);

#ifndef STUB_LOGGING
    dispatchInit._entryPoint [0] = 0x81;
    dispatchInit._entryPoint [1] = 0x39;
    static_assert_no_msg(sizeof(dispatchInit._entryPoint) == 2);

    dispatchInit._expectedMT     = 0xcccccccc;
    dispatchInit.jmpOp1 [0]      = 0x0f;
    dispatchInit.jmpOp1 [1]      = 0x85;
    static_assert_no_msg(sizeof(dispatchInit.jmpOp1) == 2);

    dispatchInit._failDispl      = 0xcccccccc;
    dispatchInit.jmpOp2          = 0xe9;
    dispatchInit._implDispl      = 0xcccccccc;
#else //STUB_LOGGING
    dispatchInit._entryPoint [0] = 0xff;
    dispatchInit._entryPoint [1] = 0x05;
    static_assert_no_msg(sizeof(dispatchInit._entryPoint) == 2);

    dispatchInit.d_call          = &g_mono_call_counter;
    dispatchInit.cmpOp [0]       = 0x81;
    dispatchInit.cmpOp [1]       = 0x39;              
    static_assert_no_msg(sizeof(dispatchInit.cmpOp) == 2);

    dispatchInit._expectedMT     = 0xcccccccc;
    dispatchInit.jmpOp1 [0]      = 0x0f;
    dispatchInit.jmpOp1 [1]      = 0x84;
    static_assert_no_msg(sizeof(dispatchInit.jmpOp1) == 2);

    dispatchInit._implDispl      = 0xcccccccc;
    dispatchInit.fail [0]        = 0xff;
    dispatchInit.fail [1]        = 0x05;
    static_assert_no_msg(sizeof(dispatchInit.fail) == 2);

    dispatchInit.d_miss          = &g_mono_miss_counter;
    dispatchInit.jmpFail         = 0xe9;
    dispatchInit._failDispl      = 0xcccccccc;
#endif //STUB_LOGGING 
};

void  DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT)
{
    _stub = dispatchInit;

    //fill in the stub specific fields
    _stub._expectedMT  = (size_t) expectedMT;
    _stub._failDispl   = failTarget - ((PCODE) &_stub._failDispl + sizeof(DISPL));
    _stub._implDispl   = implTarget - ((PCODE) &_stub._implDispl + sizeof(DISPL));
}

DispatchHolder* DispatchHolder::FromDispatchEntry(PCODE dispatchEntry)
{ 
    LIMITED_METHOD_CONTRACT;
    DispatchHolder* dispatchHolder = (DispatchHolder*) ( dispatchEntry - offsetof(DispatchHolder, _stub) - offsetof(DispatchStub, _entryPoint) );
    //    _ASSERTE(dispatchHolder->_stub._entryPoint[0] == dispatchInit._entryPoint[0]);
    return dispatchHolder;
}


/* Template used to generate the stub.  We generate a stub by allocating a block of 
   memory and copy the template over it and just update the specific fields that need 
   to be changed.
*/ 

ResolveStub resolveInit;

void ResolveHolder::InitializeStatic()
{
    //Check that _token is aligned in ResolveHolder
    static_assert_no_msg(((offsetof(ResolveHolder, _stub) + offsetof(ResolveStub, _token)) % sizeof(void*)) == 0);
    static_assert_no_msg((sizeof(ResolveHolder) % sizeof(void*)) == 0);

    resolveInit._failEntryPoint [0]    = 0x83;
    resolveInit._failEntryPoint [1]    = 0x2d;
    static_assert_no_msg(sizeof(resolveInit._failEntryPoint) == 2);

    resolveInit._pCounter              = (INT32 *) (size_t) 0xcccccccc;
    resolveInit.part0 [0]              = 0x01;
    resolveInit.part0 [1]              = 0x7c;
    static_assert_no_msg(sizeof(resolveInit.part0) == 2);

    resolveInit.toPatcher              = (offsetof(ResolveStub, patch) - (offsetof(ResolveStub, toPatcher) + 1)) & 0xFF;

    resolveInit._resolveEntryPoint [0] = 0x50;
    resolveInit._resolveEntryPoint [1] = 0x8b;
    resolveInit._resolveEntryPoint [2] = 0x01;
    resolveInit._resolveEntryPoint [3] = 0x52;
    resolveInit._resolveEntryPoint [4] = 0x8b;
    resolveInit._resolveEntryPoint [5] = 0xd0;
    static_assert_no_msg(sizeof(resolveInit._resolveEntryPoint) == 6);

    resolveInit.part1 [0]              = 0xc1;
    resolveInit.part1 [1]              = 0xe8;
    resolveInit.part1 [2]              = CALL_STUB_CACHE_NUM_BITS;
    resolveInit.part1 [3]              = 0x03;
    resolveInit.part1 [4]              = 0xc2;
    resolveInit.part1 [5]              = 0x35;
    static_assert_no_msg(sizeof(resolveInit.part1) == 6);

    resolveInit._hashedToken           = 0xcccccccc;
    resolveInit.part2 [0]              = 0x25;
    static_assert_no_msg(sizeof(resolveInit.part2) == 1);

    resolveInit.mask                   = (CALL_STUB_CACHE_MASK << LOG2_PTRSIZE);
    resolveInit.part3 [0]              = 0x8b;
    resolveInit.part3 [1]              = 0x80;;
    static_assert_no_msg(sizeof(resolveInit.part3) == 2);

    resolveInit._cacheAddress          = 0xcccccccc;
#ifdef STUB_LOGGING
    resolveInit.cntr1 [0]              = 0xff;
    resolveInit.cntr1 [1]              = 0x05;
    static_assert_no_msg(sizeof(resolveInit.cntr1) == 2);

    resolveInit.c_call                 = &g_poly_call_counter;
#endif //STUB_LOGGING 
    resolveInit.part4 [0]              = 0x3b;
    resolveInit.part4 [1]              = 0x10;
    static_assert_no_msg(sizeof(resolveInit.part4) == 2);

    // resolveInit.mtOffset               = offsetof(ResolveCacheElem,pMT) & 0xFF;
    static_assert_no_msg(offsetof(ResolveCacheElem,pMT) == 0);

    resolveInit.part5 [0]              = 0x75;
    static_assert_no_msg(sizeof(resolveInit.part5) == 1);

    resolveInit.toMiss1                = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss1)+1);

    resolveInit.part6 [0]              = 0x81;
    resolveInit.part6 [1]              = 0x78;
    static_assert_no_msg(sizeof(resolveInit.part6) == 2);

    resolveInit.tokenOffset            = offsetof(ResolveCacheElem,token) & 0xFF;    

    resolveInit._token                 = 0xcccccccc;

    resolveInit.part7 [0]              = 0x75;
    static_assert_no_msg(sizeof(resolveInit.part7) == 1);

    resolveInit.part8 [0]              = 0x8b;
    resolveInit.part8 [1]              = 0x40;
    static_assert_no_msg(sizeof(resolveInit.part8) == 2);

    resolveInit.targetOffset           = offsetof(ResolveCacheElem,target) & 0xFF;

    resolveInit.toMiss2                = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss2)+1);

    resolveInit.part9 [0]              = 0x5a;
    resolveInit.part9 [1]              = 0x83;
    resolveInit.part9 [2]              = 0xc4;
    resolveInit.part9 [3]              = 0x04;
    resolveInit.part9 [4]              = 0xff;
    resolveInit.part9 [5]              = 0xe0;
    static_assert_no_msg(sizeof(resolveInit.part9) == 6);

    resolveInit.miss [0]               = 0x5a;
//    resolveInit.miss [1]               = 0xb8;
//    resolveInit._hashedTokenMov        = 0xcccccccc;
    resolveInit._slowEntryPoint [0]    = 0x68;
    resolveInit._tokenPush             = 0xcccccccc;
#ifdef STUB_LOGGING
    resolveInit.cntr2 [0]              = 0xff;
    resolveInit.cntr2 [1]              = 0x05;
    resolveInit.c_miss                 = &g_poly_miss_counter;
#endif //STUB_LOGGING 
    resolveInit.part10 [0]             = 0xe9;
    resolveInit._resolveWorkerDispl    = 0xcccccccc;

    resolveInit.patch [0]              = 0xe8;
    resolveInit._backpatcherDispl      = 0xcccccccc;
    resolveInit.part11 [0]             = 0xeb;
    resolveInit.toResolveStub          = (offsetof(ResolveStub, _resolveEntryPoint) - (offsetof(ResolveStub, toResolveStub) + 1)) & 0xFF;
};

void  ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget, 
                                size_t dispatchToken, UINT32 hashedToken,
                                void * cacheAddr, INT32 * counterAddr)
{
    _stub = resolveInit;

    //fill in the stub specific fields
    _stub._pCounter           = counterAddr;
    _stub._hashedToken        = hashedToken << LOG2_PTRSIZE;
    _stub._cacheAddress       = (size_t) cacheAddr;
    _stub._token              = dispatchToken;
//    _stub._hashedTokenMov     = hashedToken;
    _stub._tokenPush          = dispatchToken;
    _stub._resolveWorkerDispl = resolveWorkerTarget - ((PCODE) &_stub._resolveWorkerDispl + sizeof(DISPL));
    _stub._backpatcherDispl   = patcherTarget       - ((PCODE) &_stub._backpatcherDispl   + sizeof(DISPL));
}

ResolveHolder* ResolveHolder::FromFailEntry(PCODE failEntry)
{ 
    LIMITED_METHOD_CONTRACT;
    ResolveHolder* resolveHolder = (ResolveHolder*) ( failEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _failEntryPoint) );
    //    _ASSERTE(resolveHolder->_stub._resolveEntryPoint[0] == resolveInit._resolveEntryPoint[0]);
    return resolveHolder;
}

ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry)
{ 
    LIMITED_METHOD_CONTRACT;
    ResolveHolder* resolveHolder = (ResolveHolder*) ( resolveEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _resolveEntryPoint) );
    //    _ASSERTE(resolveHolder->_stub._resolveEntryPoint[0] == resolveInit._resolveEntryPoint[0]);
    return resolveHolder;
}

#endif // DACCESS_COMPILE

VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress)
{
    SUPPORTS_DAC;
#ifdef DACCESS_COMPILE

    return SK_BREAKPOINT;  // Dac always uses the slower lookup

#else

    StubKind stubKind = SK_UNKNOWN;

    EX_TRY
    {
        // If stubStartAddress is completely bogus, then this might AV,
        // so we protect it with SEH. An AV here is OK.
        AVInRuntimeImplOkayHolder AVOkay;

        WORD firstWord = *((WORD*) stubStartAddress);

#ifndef STUB_LOGGING
        if (firstWord == 0x3981)
#else //STUB_LOGGING
        if (firstWord == 0x05ff)
#endif
        {
            stubKind = SK_DISPATCH;
        }
        else if (firstWord == 0x6850)
        {
            stubKind = SK_LOOKUP;
        }
        else if (firstWord == 0x8b50)
        {
            stubKind = SK_RESOLVE;
        }
        else
        {
            BYTE firstByte  = ((BYTE*) stubStartAddress)[0];
            BYTE secondByte = ((BYTE*) stubStartAddress)[1];

            if ((firstByte  == X86_INSTR_INT3) ||
                (secondByte == X86_INSTR_INT3))
            {
                stubKind = SK_BREAKPOINT;
            }
        }
    }
    EX_CATCH
    {
        stubKind = SK_UNKNOWN;
    }
    EX_END_CATCH(SwallowAllExceptions);        

    return stubKind;

#endif // DACCESS_COMPILE
}

#endif //DECLARE_DATA

#endif // _VIRTUAL_CALL_STUB_X86_H