diff options
author | Evan Shelhamer <shelhamer@imaginarynumber.net> | 2014-06-29 17:27:50 -0700 |
---|---|---|
committer | Evan Shelhamer <shelhamer@imaginarynumber.net> | 2014-06-29 17:27:50 -0700 |
commit | 55d8ac36772e8634f78455ff8f965a58152d572e (patch) | |
tree | 67d43cd67a48559aab549ec21da0f90ca1de8bfa /src/caffe | |
parent | e6a46c2be6f5917c84198de49848887daa28098a (diff) | |
parent | a63feadb088f1fdcfc0cdc5d1ec73978f574375e (diff) | |
download | caffeonacl-55d8ac36772e8634f78455ff8f965a58152d572e.tar.gz caffeonacl-55d8ac36772e8634f78455ff8f965a58152d572e.tar.bz2 caffeonacl-55d8ac36772e8634f78455ff8f965a58152d572e.zip |
Merge pull request #545 from jamt9000/im2col-kernel-test
Test for im2col kernel
Diffstat (limited to 'src/caffe')
-rw-r--r-- | src/caffe/test/test_im2col_kernel.cu | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/src/caffe/test/test_im2col_kernel.cu b/src/caffe/test/test_im2col_kernel.cu new file mode 100644 index 00000000..f49a5b7e --- /dev/null +++ b/src/caffe/test/test_im2col_kernel.cu @@ -0,0 +1,125 @@ +// Copyright 2014 BVLC and contributors. + +#include <cstring> +#include <vector> + +#include "cuda_runtime.h" +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/im2col.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +// Forward declare kernel functions +template <typename Dtype> +__global__ void im2col_gpu_kernel(const int n, const Dtype* data_im, + const int height, const int width, const int ksize, const int pad, + const int stride, const int height_col, const int width_col, + Dtype* data_col); + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +template <typename Dtype> +class Im2colKernelTest : public ::testing::Test { + protected: + Im2colKernelTest() + // big so launches > 1024 threads + : blob_bottom_(new Blob<Dtype>(5, 500, 10, 10)), + blob_top_(new Blob<Dtype>()), + blob_top_cpu_(new Blob<Dtype>()) { + FillerParameter filler_param; + GaussianFiller<Dtype> filler(filler_param); + filler.Fill(this->blob_bottom_); + + height_ = blob_bottom_->height(); + width_ = blob_bottom_->width(); + channels_ = blob_bottom_->channels(); + pad_ = 0; + stride_ = 2; + kernel_size_ = 3; + height_col_ = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1; + width_col_ = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1; + } + + virtual ~Im2colKernelTest() { + delete blob_bottom_; + delete blob_top_; + delete blob_top_cpu_; + } + + Blob<Dtype>* const blob_bottom_; + Blob<Dtype>* const blob_top_; + Blob<Dtype>* const blob_top_cpu_; + int height_; + int width_; + int channels_; + int pad_; + int stride_; + int kernel_size_; + int height_col_; + int width_col_; +}; + +typedef ::testing::Types<float, double> Dtypes; +TYPED_TEST_CASE(Im2colKernelTest, Dtypes); + +TYPED_TEST(Im2colKernelTest, TestGPU) { + Caffe::set_mode(Caffe::GPU); + + // Reshape the blobs to correct size for im2col output + this->blob_top_->Reshape(this->blob_bottom_->num(), + this->channels_ * this->kernel_size_ * this->kernel_size_, + this->height_col_, + this->width_col_); + + this->blob_top_cpu_->Reshape(this->blob_bottom_->num(), + this->channels_ * this->kernel_size_ * this->kernel_size_, + this->height_col_, + this->width_col_); + + const TypeParam* bottom_data = this->blob_bottom_->gpu_data(); + TypeParam* top_data = this->blob_top_->mutable_gpu_data(); + TypeParam* cpu_data = this->blob_top_cpu_->mutable_cpu_data(); + + // CPU Version + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + im2col_cpu(this->blob_bottom_->cpu_data() + this->blob_bottom_->offset(n), + this->channels_, this->height_, this->width_, this->kernel_size_, + this->pad_, this->stride_, cpu_data + this->blob_top_cpu_->offset(n)); + } + + // GPU version + int num_kernels = this->channels_ * this->height_col_ * this->width_col_; + int default_grid_dim = CAFFE_GET_BLOCKS(num_kernels); + + // Launch with different grid sizes + for (int grid_div = 2; grid_div <= 8; grid_div++) { + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + int grid_dim = default_grid_dim/grid_div; + // NOLINT_NEXT_LINE(whitespace/operators) + im2col_gpu_kernel<TypeParam><<<grid_dim, CAFFE_CUDA_NUM_THREADS>>>( + num_kernels, bottom_data + this->blob_bottom_->offset(n), + this->height_, this->width_, this->kernel_size_, this->pad_, + this->stride_, this->height_col_, this->width_col_, + top_data + this->blob_top_->offset(n)); + CUDA_POST_KERNEL_CHECK; + } + + // Compare results against CPU version + for (int i = 0; i < this->blob_top_->count(); ++i) { + TypeParam cpuval = cpu_data[i]; + TypeParam gpuval = this->blob_top_->cpu_data()[i]; + EXPECT_EQ(cpuval, gpuval); + if (cpuval != gpuval) { + break; + } + } + } +} + +} // namespace caffe |