summaryrefslogtreecommitdiff
path: root/src/caffe/util
diff options
context:
space:
mode:
authorCyprien Noel <cyprien.noel@gmail.com>2015-05-19 11:11:05 -0700
committerEvan Shelhamer <shelhamer@imaginarynumber.net>2015-08-09 15:16:00 -0700
commite5575cf17a43a56e4ba9bc5465548ac0512197d8 (patch)
treec88686bf3df4b4b9678ac82e2939e798f4d44812 /src/caffe/util
parentd2f045768cba7d494abb4d168fc366d6fce80b85 (diff)
downloadcaffeonacl-e5575cf17a43a56e4ba9bc5465548ac0512197d8.tar.gz
caffeonacl-e5575cf17a43a56e4ba9bc5465548ac0512197d8.tar.bz2
caffeonacl-e5575cf17a43a56e4ba9bc5465548ac0512197d8.zip
Multi-GPU
- Parallelize batches among GPUs and tree-reduce the gradients - The effective batch size scales with the number of devices - Batch size is multiplied by the number of devices - Split batches between GPUs, and tree-reduce the gradients - Detect machine topology (twin-GPU boards, P2P connectivity) - Track device in syncedmem (thanks @thatguymike) - Insert a callback in the solver for minimal code change - Accept list for gpu flag of caffe tool, e.g. '-gpu 0,1' or '-gpu all'. Run on default GPU if no ID given. - Add multi-GPU solver test - Deterministic architecture for reproducible runs
Diffstat (limited to 'src/caffe/util')
-rw-r--r--src/caffe/util/blocking_queue.cpp3
1 files changed, 3 insertions, 0 deletions
diff --git a/src/caffe/util/blocking_queue.cpp b/src/caffe/util/blocking_queue.cpp
index f7c53f22..d1d1fa86 100644
--- a/src/caffe/util/blocking_queue.cpp
+++ b/src/caffe/util/blocking_queue.cpp
@@ -3,6 +3,7 @@
#include "caffe/data_layers.hpp"
#include "caffe/data_reader.hpp"
+#include "caffe/parallel.hpp"
#include "caffe/util/blocking_queue.hpp"
namespace caffe {
@@ -89,5 +90,7 @@ template class BlockingQueue<Batch<float>*>;
template class BlockingQueue<Batch<double>*>;
template class BlockingQueue<Datum*>;
template class BlockingQueue<shared_ptr<DataReader::QueuePair> >;
+template class BlockingQueue<P2PSync<float>*>;
+template class BlockingQueue<P2PSync<double>*>;
} // namespace caffe