#ifndef CAFFE_SYNCEDMEM_HPP_ #define CAFFE_SYNCEDMEM_HPP_ #include #include "caffe/common.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { // If CUDA is available and in GPU mode, host memory will be allocated pinned, // using cudaMallocHost. It avoids dynamic pinning for transfers (DMA). // The improvement in performance seems negligible in the single GPU case, // but might be more significant for parallel training. Most importantly, // it improved stability for large models on many GPUs. inline void CaffeMallocHost(void** ptr, size_t size) { #ifndef CPU_ONLY if (Caffe::mode() == Caffe::GPU) { CUDA_CHECK(cudaMallocHost(ptr, size)); return; } #endif *ptr = malloc(size); CHECK(*ptr) << "host allocation of size " << size << " failed"; } inline void CaffeFreeHost(void* ptr) { #ifndef CPU_ONLY if (Caffe::mode() == Caffe::GPU) { CUDA_CHECK(cudaFreeHost(ptr)); return; } #endif free(ptr); } /** * @brief Manages memory allocation and synchronization between the host (CPU) * and device (GPU). * * TODO(dox): more thorough description. */ class SyncedMemory { public: SyncedMemory() : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED), own_cpu_data_(false), own_gpu_data_(false), gpu_device_(-1) {} explicit SyncedMemory(size_t size) : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED), own_cpu_data_(false), own_gpu_data_(false), gpu_device_(-1) {} ~SyncedMemory(); const void* cpu_data(); void set_cpu_data(void* data); const void* gpu_data(); void set_gpu_data(void* data); void* mutable_cpu_data(); void* mutable_gpu_data(); enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED }; SyncedHead head() { return head_; } size_t size() { return size_; } #ifndef CPU_ONLY void async_gpu_push(const cudaStream_t& stream); #endif private: void to_cpu(); void to_gpu(); void* cpu_ptr_; void* gpu_ptr_; size_t size_; SyncedHead head_; bool own_cpu_data_; bool own_gpu_data_; int gpu_device_; DISABLE_COPY_AND_ASSIGN(SyncedMemory); }; // class SyncedMemory } // namespace caffe #endif // CAFFE_SYNCEDMEM_HPP_