Multi-GPU

- Parallelize batches among GPUs and tree-reduce the gradients - The effective batch size scales with the number of devices - Batch size is multiplied by the number of devices - Split batches between GPUs, and tree-reduce the gradients - Detect machine topology (twin-GPU boards, P2P connectivity) - Track device in syncedmem (thanks @thatguymike) - Insert a callback in the solver for minimal code change - Accept list for gpu flag of caffe tool, e.g. '-gpu 0,1' or '-gpu all'. Run on default GPU if no ID given. - Add multi-GPU solver test - Deterministic architecture for reproducible runs
author: Cyprien Noel <cyprien.noel@gmail.com> 2015-05-19 11:11:05 -0700
committer: Evan Shelhamer <shelhamer@imaginarynumber.net> 2015-08-09 15:16:00 -0700
commit: e5575cf17a43a56e4ba9bc5465548ac0512197d8 (patch)
tree: c88686bf3df4b4b9678ac82e2939e798f4d44812 /src/caffe/util
parent: d2f045768cba7d494abb4d168fc366d6fce80b85 (diff)
download: caffeonacl-e5575cf17a43a56e4ba9bc5465548ac0512197d8.tar.gz
caffeonacl-e5575cf17a43a56e4ba9bc5465548ac0512197d8.tar.bz2
caffeonacl-e5575cf17a43a56e4ba9bc5465548ac0512197d8.zip
1 files changed, 3 insertions, 0 deletions
diff --git a/src/caffe/util/blocking_queue.cpp b/src/caffe/util/blocking_queue.cpp
index f7c53f22..d1d1fa86 100644
--- a/src/caffe/util/blocking_queue.cpp
+++ b/src/caffe/util/blocking_queue.cpp
@@ -3,6 +3,7 @@
 
 #include "caffe/data_layers.hpp"
 #include "caffe/data_reader.hpp"
+#include "caffe/parallel.hpp"
 #include "caffe/util/blocking_queue.hpp"
 
 namespace caffe {
@@ -89,5 +90,7 @@ template class BlockingQueue<Batch<float>*>;
 template class BlockingQueue<Batch<double>*>;
 template class BlockingQueue<Datum*>;
 template class BlockingQueue<shared_ptr<DataReader::QueuePair> >;
+template class BlockingQueue<P2PSync<float>*>;
+template class BlockingQueue<P2PSync<double>*>;
 
 }  // namespace caffe
author	Cyprien Noel <cyprien.noel@gmail.com>	2015-05-19 11:11:05 -0700
committer	Evan Shelhamer <shelhamer@imaginarynumber.net>	2015-08-09 15:16:00 -0700
commit	e5575cf17a43a56e4ba9bc5465548ac0512197d8 (patch)
tree	c88686bf3df4b4b9678ac82e2939e798f4d44812 /src/caffe/util
parent	d2f045768cba7d494abb4d168fc366d6fce80b85 (diff)
download	caffeonacl-e5575cf17a43a56e4ba9bc5465548ac0512197d8.tar.gz caffeonacl-e5575cf17a43a56e4ba9bc5465548ac0512197d8.tar.bz2 caffeonacl-e5575cf17a43a56e4ba9bc5465548ac0512197d8.zip