summaryrefslogtreecommitdiff
path: root/src/utilcode/cycletimer.cpp
blob: 8a5b4732190a93d850307190c377480cf116e47d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

#include "stdafx.h"

#include "cycletimer.h"
#include "winbase.h"
#include "winwrap.h"
#include "assert.h"
#include "utilcode.h"

bool CycleTimer::GetThreadCyclesS(unsigned __int64* cycles)
{
    BOOL res = FALSE;
    res = QueryThreadCycleTime(GetCurrentThread(), cycles);
    return res != FALSE;
}

static const int SampleLoopSize = 1000000;

// static
double CycleTimer::CyclesPerSecond()
{
    // Windows does not provide a way of converting cycles to time -- reasonably enough,
    // since the frequency of a machine may vary, due, e.g., to power management.
    // Windows *does* allow you to translate QueryPerformanceCounter counts into time,
    // however.  So we'll assume that the clock speed stayed constant, and measure both the
    // QPC counts and cycles of a short loop, to get a conversion factor.
    LARGE_INTEGER lpFrequency;
    if (!QueryPerformanceFrequency(&lpFrequency)) return 0.0;
    // Otherwise...
    LARGE_INTEGER qpcStart;
    unsigned __int64 cycleStart;
    if (!QueryPerformanceCounter(&qpcStart)) return 0.0;
    if (!GetThreadCyclesS(&cycleStart)) return 0.0;
    volatile int sum = 0;
    for (int k = 0; k < SampleLoopSize; k++)
    {
        sum += k;
    }
    LARGE_INTEGER qpcEnd;
    if (!QueryPerformanceCounter(&qpcEnd)) return 0.0;
    unsigned __int64 cycleEnd;
    if (!GetThreadCyclesS(&cycleEnd)) return 0.0;

    double qpcTicks = ((double)qpcEnd.QuadPart) - ((double)qpcStart.QuadPart);
    double secs = (qpcTicks / ((double)lpFrequency.QuadPart));
    double cycles = ((double)cycleEnd) - ((double)cycleStart);
    return cycles / secs;
}

// static
unsigned __int64 CycleTimer::QueryOverhead()
{
    unsigned __int64 tot = 0;
    unsigned __int64 startCycles;
    unsigned __int64 endCycles;
    const int N = 1000;
    bool b = GetThreadCyclesS(&startCycles); assert(b);
    for (int i = 0; i < N; i++)
    {
        b = GetThreadCyclesS(&endCycles); assert(b);
        tot += (endCycles-startCycles);
        startCycles = endCycles;
    }
    return tot/N;
}

// static
void CycleTimer::InterlockedAddU64(unsigned __int64* loc, unsigned __int64 amount)
{
    volatile __int64* vloc = (volatile __int64*)loc;
    unsigned __int64 prev = *vloc;
    for (;;)
    {
        unsigned __int64 next = prev + amount;
        __int64 snext = (__int64)next;
        __int64 sprev = (__int64)prev;
        __int64 res = InterlockedCompareExchange64(vloc, snext, sprev);
        if (res == sprev) return;
        else prev = (unsigned __int64)res;
    }
}