summaryrefslogtreecommitdiff
path: root/src/vm/threadsuspend.h
blob: a5fb30238de3d945206083979bf3e5f46b706b7f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
// threadsuspend.h

#ifndef _THREAD_SUSPEND_H_
#define _THREAD_SUSPEND_H_

#if defined(TIME_SUSPEND) || defined(GC_STATS)

enum timeUnit { usec, msec, sec };

// running aggregations
struct MinMaxTot
{
    DWORD minVal, maxVal, totVal;

    void Accumulate(DWORD time)
    {
        LIMITED_METHOD_CONTRACT;
        if (time < minVal || minVal == 0)
            minVal = time;

        if (time > maxVal)
            maxVal = time;

        // We are supposed to anticipate overflow and clear our totals
        // However we still see this assert and for now, let's ignore it...
        // _ASSERTE(((DWORD) (totVal + time)) > ((DWORD) totVal));
        if (((DWORD) (totVal + time)) > ((DWORD) totVal))
            totVal += time;
    }

    void Reset()
    {
        LIMITED_METHOD_CONTRACT;
        minVal = maxVal = 0;
    }

    void DisplayAndUpdate(FILE* logFile, __in_z const char *pName, MinMaxTot *pLastOne, int fullCount, int priorCount, timeUnit=usec);
};

// A note about timings.  We use QueryPerformanceCounter to measure all timings in units.  During
// Initialization, we compute a divisor to convert those timings into microseconds.  This means
// that we can accumulate about 4,000 seconds (over one hour) of GC time into 32-bit quantities
// before we must reinitialize.

// A note about performance: derived classes have taken a dependency on cntDisplay being the first
// field of this class, following the vtable*. When this is violated a compile time assert will fire.
struct StatisticsBase
{
    // display the statistics every so many seconds.
    static DWORD secondsToDisplay;

    // we must re-initialize after an hour of GC time, to avoid overflow.  It's more convenient to
    // re-initialize after an hour of wall-clock time, instead
    int cntDisplay;

    // convert all timings into microseconds
    DWORD divisor;
    DWORD GetTime();
    static DWORD GetElapsed(DWORD start, DWORD stop);

    // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval.
    DWORD startTick;

    // derived classes must call this regularly (from a logical "end of a cycle")
    void RollOverIfNeeded();

    virtual void Initialize() = 0;
    virtual void DisplayAndUpdate() = 0;
};

#endif // defined(TIME_SUSPEND) || defined(GC_STATS)

#ifdef TIME_SUSPEND

struct SuspendStatistics
    : public StatisticsBase
{
    static WCHAR* logFileName;

    // number of times we call SuspendEE, RestartEE
    int cntSuspends, cntRestarts;

    int cntSuspendsInBGC, cntNonGCSuspends, cntNonGCSuspendsInBGC;

    // Times for current suspension & restart
    DWORD startSuspend, startRestart;

    // min, max and total time spent performing a Suspend, a Restart, or Paused from the start of
    // a Suspend to the end of a Restart.  We can compute 'avg' using 'cnt' and 'tot' values.
    MinMaxTot suspend, restart, paused;

    // We know there can be contention on acquiring the ThreadStoreLock.
    MinMaxTot acquireTSL, releaseTSL;

    // And if we OS suspend a thread that is blocking or perhaps throwing an exception and is therefore
    // stuck in the kernel, it could take approximately a second.  So track the time taken for OS
    // suspends
    MinMaxTot osSuspend;

    // And if we place a hijack, we need to crawl a stack to do so.
    MinMaxTot crawl;

    // And waiting can be a significant part of the total suspension time.
    MinMaxTot wait;

    ///////////////////////////////////////////////////////////////////////////////////////////////
    // There are some interesting events that are worth counting, because they show where the time is going:

    // number of times we waited on g_pGCSuspendEvent while trying to suspend the EE
    int cntWaits;

    // and the number of times those Waits timed out rather than being signalled by a cooperating thread
    int cntWaitTimeouts;

    // number of times we did an OS (or hosted) suspend or resume on a thread
    int cntOSSuspendResume;

    // number of times we crawled a stack for a hijack
    int cntHijackCrawl;

    // and the number of times the hijack actually trapped a thread for us
    int cntHijackTrap;

    // the number of times we redirected a thread in fully interruptible code, by rewriting its EIP
    // so it will throw to a blocking point
    int cntRedirections;

    ///////////////////////////////////////////////////////////////////////////////////////////////
    // And there are some "failure" cases that should never or almost never occur.

    // number of times we have a collision between e.g. Debugger suspension & GC suspension.
    // In these cases, everyone yields to the GC but at some cost.
    int cntCollideRetry;

    // number of times the OS or Host was unable to ::SuspendThread a thread for us.  This count should be
    // approximately 0.
    int cntFailedSuspends;

    // number of times we were unable to redirect a thread by rewriting its register state in a
    // suspended context.  This count should be approximately 0.
    int cntFailedRedirections;

    ///////////////////////////////////////////////////////////////////////////////////////////////
    // Internal mechanism:

    virtual void Initialize();
    virtual void DisplayAndUpdate();

    // Public API

    void StartSuspend();
    void EndSuspend(BOOL bForGC);
    DWORD CurrentSuspend();

    void StartRestart();
    void EndRestart();
    DWORD CurrentRestart();
};

extern SuspendStatistics g_SuspendStatistics;
extern SuspendStatistics g_LastSuspendStatistics;

#endif // TIME_SUSPEND

BOOL EEGetThreadContext(Thread *pThread, CONTEXT *pContext);
BOOL EnsureThreadIsSuspended(HANDLE hThread, Thread* pThread);

class ThreadSuspend
{
    friend class Thread;
    friend class ThreadStore;

public:
    typedef enum
    {
        SUSPEND_OTHER                   = 0,
        SUSPEND_FOR_GC                  = 1,
        SUSPEND_FOR_APPDOMAIN_SHUTDOWN  = 2,
        SUSPEND_FOR_REJIT               = 3,
        SUSPEND_FOR_SHUTDOWN            = 4,
        SUSPEND_FOR_DEBUGGER            = 5,
        SUSPEND_FOR_GC_PREP             = 6,
        SUSPEND_FOR_DEBUGGER_SWEEP      = 7,     // This must only be used in Thread::SysSweepThreadsForDebug
        SUSPEND_FOR_PROFILER            = 8
    } SUSPEND_REASON;

private:
    static SUSPEND_REASON    m_suspendReason;    // This contains the reason
                                          // that the runtime was suspended
    static Thread* m_pThreadAttemptingSuspendForGC;

public:
    static HRESULT SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason);
    static void    ResumeRuntime(BOOL bFinishedGC, BOOL SuspendSucceded);

    // Initialize thread suspension support
    static void    Initialize();

private:
    static CLREvent * g_pGCSuspendEvent;

    // This is true iff we're currently in the process of suspending threads.  Once the
    // threads have been suspended, this is false.  This is set via an instance of
    // SuspendRuntimeInProgressHolder placed in SuspendRuntime, SysStartSuspendForDebug,
    // and SysSweepThreadsForDebug.  Code outside Thread reads this via
    // Thread::SysIsSuspendInProgress.
    //
    // *** THERE IS NO SYNCHRONIZATION AROUND SETTING OR READING THIS ***
    // This value is only useful for code that can be more efficient if it has a good guess
    // as to whether we're suspending the runtime.  This is NOT to be used by code that
    // *requires* this knowledge with 100% accuracy in order to behave correctly, unless
    // you add synchronization yourself.  An example of where Thread::SysIsSuspendInProgress
    // is used is by the profiler API, in ProfToEEInterfaceImpl::DoStackSnapshot.  The profiler
    // API needs to suspend the target thread whose stack it is about to walk.  But the profiler
    // API should avoid this if the runtime is being suspended.  Otherwise, the thread trying to
    // suspend the runtime (thread A) might get stuck when it tries to suspend the thread
    // executing ProfToEEInterfaceImpl::DoStackSnapshot (thread B), since thread B will be
    // busy trying to suspend the target of the stack walk (thread C).  Bad luck with timing
    // could cause A to try to suspend B over and over again while B is busy suspending C, and
    // then suspending D, etc., assuming the profiler does a lot of stack walks.  This, in turn,
    // could cause the deadlock detection assert in Thread::SuspendThread to fire.  So the
    // moral here is that, if B realizes the runtime is being suspended, it can just fail the stackwalk
    // immediately without trying to do the suspend.  But if B occasionally gets false positives or
    // false negatives from calling Thread::SysIsSuspendInProgress, the worst is we might
    // delay the EE suspension a little bit, or we might too eagerly fail from ProfToEEInterfaceImpl::DoStackSnapshot.
    // But there won't be any corruption or AV.  More details on the profiler API scenario in VsWhidbey bug 454936.
    static bool     s_fSuspendRuntimeInProgress;

    static void SetSuspendRuntimeInProgress();
    static void ResetSuspendRuntimeInProgress();

    typedef StateHolder<ThreadSuspend::SetSuspendRuntimeInProgress, ThreadSuspend::ResetSuspendRuntimeInProgress> SuspendRuntimeInProgressHolder;

public:
    static bool SysIsSuspendInProgress() { return s_fSuspendRuntimeInProgress; }

public:
    //suspend all threads
    static void SuspendEE(SUSPEND_REASON reason);
    static void RestartEE(BOOL bFinishedGC, BOOL SuspendSucceded); //resume threads.

    static void LockThreadStore(ThreadSuspend::SUSPEND_REASON reason);
    static void UnlockThreadStore(BOOL bThreadDestroyed = FALSE,
                                  ThreadSuspend::SUSPEND_REASON reason = ThreadSuspend::SUSPEND_OTHER);

    static Thread * GetSuspensionThread()
    {
        LIMITED_METHOD_CONTRACT;
        return g_pSuspensionThread;
    }

private:
    // This is used to avoid thread starvation if non-GC threads are competing for
    // the thread store lock when there is a real GC-thread waiting to get in.
    // This is initialized lazily when the first non-GC thread backs out because of
    // a waiting GC thread.  The s_hAbortEvtCache is used to store the handle when
    // it is not being used.
    static CLREventBase *s_hAbortEvt;
    static CLREventBase *s_hAbortEvtCache;

    static LONG m_DebugWillSyncCount;
};

class ThreadStoreLockHolderWithSuspendReason
{
public:
    ThreadStoreLockHolderWithSuspendReason(ThreadSuspend::SUSPEND_REASON reason)
    {
        ThreadSuspend::LockThreadStore(reason);
    }
    ~ThreadStoreLockHolderWithSuspendReason()
    {
        ThreadSuspend::UnlockThreadStore();
    }
private:
    ThreadSuspend::SUSPEND_REASON m_reason;
};

#endif // _THREAD_SUSPEND_H_