diff options
Diffstat (limited to 'runtimes/nn/depend/external/gemmlowp/profiling/profiler.h')
-rw-r--r-- | runtimes/nn/depend/external/gemmlowp/profiling/profiler.h | 373 |
1 files changed, 0 insertions, 373 deletions
diff --git a/runtimes/nn/depend/external/gemmlowp/profiling/profiler.h b/runtimes/nn/depend/external/gemmlowp/profiling/profiler.h deleted file mode 100644 index a18c036c8..000000000 --- a/runtimes/nn/depend/external/gemmlowp/profiling/profiler.h +++ /dev/null @@ -1,373 +0,0 @@ -// Copyright 2015 Google Inc. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// profiler.h: a simple sampling profiler that's always just one #include away! -// -// Overview -// ======== -// -// This profiler only samples a pseudo-stack, not the actual call stack. -// The code to be profiled needs to be instrumented with -// pseudo-stack "labels", see ScopedProfilingLabel. -// Using pseudo-stacks allows this profiler to be very simple, low-overhead, -// portable, and independent of compilation details such as function inlining -// and frame pointers. The granularity of instrumentation can be freely chosen, -// and it is possible to get some annotate-like detail, i.e. detail within one -// function without splitting it into multiple functions. -// -// This profiler should remain small and simple; its key feature is to fit in -// a single header file so that there should never be a reason to refrain -// from profiling. More complex and feature-rich alternatives are -// readily available. This one offers a strict superset of its -// functionality: https://github.com/bgirard/GeckoProfiler, including -// intertwining pseudostacks with real call stacks, more annotation options, -// and advanced visualization. -// -// Usage -// ===== -// -// 0. Enable profiling by defining GEMMLOWP_PROFILING. When profiling is -// not enabled, profiling instrumentation from instrumentation.h -// (ScopedProfilingLabel, RegisterCurrentThreadForProfiling) -// is still defined but does nothing. On the other hand, -// when profiling is not enabled, it is an error to #include the -// present file. -// -// 1. Each thread can opt in to profiling by calling -// RegisterCurrentThreadForProfiling() defined in instrumentation.h. -// This can be done at any time, before or during profiling. -// No sample will be collected from a thread until -// it has called RegisterCurrentThreadForProfiling(). -// -// 2. Instrument your code to be profiled with ScopedProfilingLabel, -// which is a RAII helper defined in instrumentation.h. The identifier -// names (some_label, etc) do not matter; what will show up -// in the profile is the string passed to the constructor, which -// must be a literal string. See the full example below. -// -// Note: the overhead of ScopedProfilingLabel is zero when not -// enabling profiling (when not defining GEMMLOWP_PROFILING). -// -// 3. Use the profiler.h interface to control profiling. There are two -// functions: StartProfiling() and FinishProfiling(). They must be -// called on the same thread. FinishProfiling() prints the profile -// on stdout. -// -// Full example -// ============ -/* - #define GEMMLOWP_PROFILING - #include "profiling/instrumentation.h" - using namespace gemmlowp; - - const int iters = 100000000; - volatile int i; - - void Bar() { - ScopedProfilingLabel label("Bar"); - for (i = 0; i < iters; i++) {} - } - - void Foo() { - ScopedProfilingLabel label("Foo"); - for (i = 0; i < iters; i++) {} - Bar(); - } - - void Init() { - RegisterCurrentThreadForProfiling(); - } - - #include "profiling/profiler.h" - - int main() { - Init(); - StartProfiling(); - Foo(); - FinishProfiling(); - } -* -* Output: -* - gemmlowp profile (1 threads, 304 samples) - 100.00% Foo - 51.32% other - 48.68% Bar - 0.00% other (outside of any label) -*/ -// -// Interpreting results -// ==================== -// -// Each node shows the absolute percentage, among all the samples, -// of the number of samples that recorded the given pseudo-stack. -// The percentages are *NOT* relative to the parent node. In addition -// to your own labels, you will also see 'other' nodes that collect -// the remainder of samples under the parent node that didn't fall into -// any of the labelled child nodes. Example: -// -// 20% Foo -// 12% Bar -// 6% Xyz -// 2% other -// -// This means that 20% of all labels were under Foo, of which 12%/20%==60% -// were under Bar, 6%/20%==30% were under Xyz, and 2%/20%==10% were not -// under either Bar or Xyz. -// -// Typically, one wants to keep adding ScopedProfilingLabel's until -// the 'other' nodes show low percentages. -// -// Interpreting results with multiple threads -// ========================================== -// -// At each sample, each thread registered for profiling gets sampled once. -// So if there is one "main thread" spending its time in MainFunc() and -// 4 "worker threads" spending time in WorkerFunc(), then 80% (=4/5) of the -// samples will be in WorkerFunc, so the profile will look like this: -// -// 80% WorkerFunc -// 20% MainFunc - -#ifndef GEMMLOWP_PROFILING_PROFILER_H_ -#define GEMMLOWP_PROFILING_PROFILER_H_ - -#ifndef GEMMLOWP_PROFILING -#error Profiling is not enabled! -#endif - -#include <vector> - -#include "instrumentation.h" - -namespace gemmlowp { - -// A tree view of a profile. -class ProfileTreeView { - struct Node { - std::vector<Node*> children; - const char* label; - std::size_t weight; - Node() : label(nullptr), weight(0) {} - ~Node() { - for (auto child : children) { - delete child; - } - } - }; - - static bool CompareNodes(Node* n1, Node* n2) { - return n1->weight > n2->weight; - } - - Node root_; - - void PrintNode(const Node* node, int level) const { - if (level) { - for (int i = 1; i < level; i++) { - printf(" "); - } - printf("%.2f%% %s\n", 100.0f * node->weight / root_.weight, node->label); - } - for (auto child : node->children) { - PrintNode(child, level + 1); - } - } - - static void AddStackToNode(const ProfilingStack& stack, Node* node, - std::size_t level) { - node->weight++; - if (stack.size == level) { - return; - } - Node* child_to_add_to = nullptr; - for (auto child : node->children) { - if (child->label == stack.labels[level]) { - child_to_add_to = child; - break; - } - } - if (!child_to_add_to) { - child_to_add_to = new Node; - child_to_add_to->label = stack.labels[level]; - node->children.push_back(child_to_add_to); - } - AddStackToNode(stack, child_to_add_to, level + 1); - return; - } - - void AddStack(const ProfilingStack& stack) { - AddStackToNode(stack, &root_, 0); - } - - void AddOtherChildrenToNode(Node* node) { - std::size_t top_level_children_weight = 0; - for (auto c : node->children) { - AddOtherChildrenToNode(c); - top_level_children_weight += c->weight; - } - if (top_level_children_weight) { - Node* other_child = new Node; - other_child->label = - node == &root_ ? "other (outside of any label)" : "other"; - other_child->weight = node->weight - top_level_children_weight; - node->children.push_back(other_child); - } - } - - void AddOtherNodes() { AddOtherChildrenToNode(&root_); } - - void SortNode(Node* node) { - std::sort(node->children.begin(), node->children.end(), CompareNodes); - for (auto child : node->children) { - SortNode(child); - } - } - - void Sort() { SortNode(&root_); } - - public: - explicit ProfileTreeView(const std::vector<ProfilingStack>& stacks) { - for (auto stack : stacks) { - AddStack(stack); - } - AddOtherNodes(); - Sort(); - } - - void Print() const { - printf("\n"); - printf("gemmlowp profile (%d threads, %d samples)\n", - static_cast<int>(ThreadsUnderProfiling().size()), - static_cast<int>(root_.weight)); - PrintNode(&root_, 0); - printf("\n"); - } -}; - -// This function is the only place that determines our sampling frequency. -inline void WaitOneProfilerTick() { - static const int millisecond = 1000000; - -#if defined __arm__ || defined __aarch64__ - // Reduced sampling frequency on mobile devices helps limit time and memory - // overhead there. - static const int interval = 10 * millisecond; -#else - static const int interval = 1 * millisecond; -#endif - - timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = interval; - nanosleep(&ts, nullptr); -} - -// This is how we track whether we've already started profiling, -// to guard against misuse of the API. -inline bool& IsProfiling() { - static bool b; - return b; -} - -// This is how we tell the profiler thread to finish. -inline bool& ProfilerThreadShouldFinish() { - static bool b; - return b; -} - -// The profiler thread. See ProfilerThreadFunc. -inline pthread_t& ProfilerThread() { - static pthread_t t; - return t; -} - -// Records a stack from a running thread. -// The tricky part is that we're not interrupting the thread. -// This is OK because we're looking at a pseudo-stack of labels, -// not at the real thread stack, and if the pseudo-stack changes -// while we're recording it, we are OK with getting either the -// old or the new stack. Note that ProfilingStack::Pop -// only decrements the size, and doesn't null the popped label, -// so if we're concurrently recording it, it shouldn't change -// under our feet until another label is pushed, at which point -// we are OK with getting either this new label or the old one. -// In the end, the key atomicity property that we are relying on -// here is that pointers are changed atomically, and the labels -// are pointers (to literal strings). -inline void RecordStack(const ThreadInfo* thread, ProfilingStack* dst) { - assert(!dst->size); - while (dst->size < thread->stack.size) { - dst->labels[dst->size] = thread->stack.labels[dst->size]; - dst->size++; - MemoryBarrier(); // thread->stack can change at any time - } -} - -// The profiler thread's entry point. -// Note that a separate thread is to be started each time we call -// StartProfiling(), and finishes when we call FinishProfiling(). -// So here we only need to handle the recording and reporting of -// a single profile. -inline void* ProfilerThreadFunc(void*) { - assert(ProfilerThread() == pthread_self()); - - // Since we only handle one profile per profiler thread, the - // profile data (the array of recorded stacks) can be a local variable here. - std::vector<ProfilingStack> stacks; - - while (!ProfilerThreadShouldFinish()) { - WaitOneProfilerTick(); - { - AutoGlobalLock<ProfilerLockId> lock; - for (auto t : ThreadsUnderProfiling()) { - ProfilingStack s; - RecordStack(t, &s); - stacks.push_back(s); - } - } - } - - // Profiling is finished and we now report the results. - ProfileTreeView(stacks).Print(); - - return nullptr; -} - -// Starts recording samples. -inline void StartProfiling() { - AutoGlobalLock<ProfilerLockId> lock; - ReleaseBuildAssertion(!IsProfiling(), "We're already profiling!"); - IsProfiling() = true; - ProfilerThreadShouldFinish() = false; - pthread_create(&ProfilerThread(), nullptr, ProfilerThreadFunc, nullptr); -} - -// Stops recording samples, and prints a profile tree-view on stdout. -inline void FinishProfiling() { - { - AutoGlobalLock<ProfilerLockId> lock; - ReleaseBuildAssertion(IsProfiling(), "We weren't profiling!"); - // The ProfilerThreadShouldFinish() mechanism here is really naive and bad, - // as the scary comments below should make clear. - // Should we use a condition variable? - ProfilerThreadShouldFinish() = true; - } // must release the lock here to avoid deadlock with profiler thread. - pthread_join(ProfilerThread(), nullptr); - IsProfiling() = false; // yikes, this should be guarded by the lock! -} - -} // namespace gemmlowp - -#endif // GEMMLOWP_PROFILING_PROFILER_H_ |