diff options
author | Kai Li <kaili_kloud@163.com> | 2014-02-20 18:29:08 +0800 |
---|---|---|
committer | Kai Li <kaili_kloud@163.com> | 2014-03-04 10:01:42 +0800 |
commit | 8e580fd1e9a09d87754aa15dfd1968fe8df3d7fd (patch) | |
tree | 50e9f3e8afcefc9e7b2cf953f84f08c0594ce3ca /tools/net_speed_benchmark.cpp | |
parent | 32fb3337a9bfc3f847d133e0ee0edf5f2e64711b (diff) | |
download | caffeonacl-8e580fd1e9a09d87754aa15dfd1968fe8df3d7fd.tar.gz caffeonacl-8e580fd1e9a09d87754aa15dfd1968fe8df3d7fd.tar.bz2 caffeonacl-8e580fd1e9a09d87754aa15dfd1968fe8df3d7fd.zip |
Synchronize GPU before CPU timers start and stop in net_speed_benchmark
Diffstat (limited to 'tools/net_speed_benchmark.cpp')
-rw-r--r-- | tools/net_speed_benchmark.cpp | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/tools/net_speed_benchmark.cpp b/tools/net_speed_benchmark.cpp index 9976744d..83fba147 100644 --- a/tools/net_speed_benchmark.cpp +++ b/tools/net_speed_benchmark.cpp @@ -67,13 +67,22 @@ int main(int argc, char** argv) { vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs(); vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs(); LOG(ERROR) << "*** Benchmark begins ***"; + if (Caffe::mode() == Caffe::GPU) { + cudaDeviceSynchronize(); + } clock_t forward_start = clock(); for (int i = 0; i < layers.size(); ++i) { const string& layername = layers[i]->layer_param().name(); + if (Caffe::mode() == Caffe::GPU) { + cudaDeviceSynchronize(); + } clock_t start = clock(); for (int j = 0; j < total_iter; ++j) { layers[i]->Forward(bottom_vecs[i], &top_vecs[i]); } + if (Caffe::mode() == Caffe::GPU) { + cudaDeviceSynchronize(); + } LOG(ERROR) << layername << "\tforward: " << static_cast<float>(clock() - start) / CLOCKS_PER_SEC << " seconds."; @@ -84,10 +93,16 @@ int main(int argc, char** argv) { clock_t backward_start = clock(); for (int i = layers.size() - 1; i >= 0; --i) { const string& layername = layers[i]->layer_param().name(); + if (Caffe::mode() == Caffe::GPU) { + cudaDeviceSynchronize(); + } clock_t start = clock(); for (int j = 0; j < total_iter; ++j) { layers[i]->Backward(top_vecs[i], true, &bottom_vecs[i]); } + if (Caffe::mode() == Caffe::GPU) { + cudaDeviceSynchronize(); + } LOG(ERROR) << layername << "\tbackward: " << static_cast<float>(clock() - start) / CLOCKS_PER_SEC << " seconds."; |