diff options
author | Yanghan Wang <yanghan@instagram.com> | 2018-01-24 12:47:39 -0800 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2018-01-24 13:05:27 -0800 |
commit | 2828c7a391b209302a318b3817cfd6cd85b05cf4 (patch) | |
tree | 1bdd43b92e64f0c5fcb0ebc997137c8db8e70648 /modules/detectron | |
parent | 09a1ef54ab4e08ffcdd852bfe63a09daf013e624 (diff) | |
download | pytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.tar.gz pytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.tar.bz2 pytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.zip |
Moved RoIAlign to OSS.
Reviewed By: newstzpz
Differential Revision: D6775228
fbshipit-source-id: a9a6689fb5f6004f13ec03db8410fd81e2e6468e
Diffstat (limited to 'modules/detectron')
-rw-r--r-- | modules/detectron/roi_align_op.cc | 98 | ||||
-rw-r--r-- | modules/detectron/roi_align_op.cu | 363 | ||||
-rw-r--r-- | modules/detectron/roi_align_op.h | 89 |
3 files changed, 0 insertions, 550 deletions
diff --git a/modules/detectron/roi_align_op.cc b/modules/detectron/roi_align_op.cc deleted file mode 100644 index 38094ff210..0000000000 --- a/modules/detectron/roi_align_op.cc +++ /dev/null @@ -1,98 +0,0 @@ -/** - * Copyright (c) 2016-present, Facebook, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "roi_align_op.h" - -namespace caffe2 { - -REGISTER_CPU_OPERATOR(RoIAlign, RoIAlignOp<float, CPUContext>); -REGISTER_CPU_OPERATOR(RoIAlignGradient, RoIAlignGradientOp<float, CPUContext>); - -OPERATOR_SCHEMA(RoIAlign) - .NumInputs(2) - .NumOutputs(1) - .SetDoc(R"DOC( -Region of Interest (RoI) align operation as used in Mask R-CNN. -)DOC") - .Arg( - "spatial_scale", - "(float) default 1.0; Spatial scale of the input feature map X " - "relative to the input image. E.g., 0.0625 if X has a stride of 16 " - "w.r.t. the input image.") - .Arg( - "pooled_h", - "(int) default 1; Pooled output Y's height.") - .Arg( - "pooled_w", - "(int) default 1; Pooled output Y's width.") - .Arg( - "sampling_ratio", - "(int) default -1; number of sampling points in the interpolation grid " - "used to compute the output value of each pooled output bin. If > 0, " - "then exactly sampling_ratio x sampling_ratio grid points are used. If " - "<= 0, then an adaptive number of grid points are used (computed as " - "ceil(roi_width / pooled_w), and likewise for height)." - ) - .Input( - 0, - "X", - "4D feature map input of shape (N, C, H, W).") - .Input( - 1, - "RoIs", - "2D input of shape (R, 5) specifying R RoIs with five columns " - "representing: batch index in [0, N - 1], x1, y1, x2, y2. The RoI " - "coordinates are in the coordinate system of the input image.") - .Output( - 0, - "Y", - "4D output of shape (R, C, pooled_h, pooled_w). The r-th batch element " - "is a pooled feature map cooresponding to the r-th RoI."); - -OPERATOR_SCHEMA(RoIAlignGradient) - .NumInputs(3) - .NumOutputs(1) - .Input( - 0, - "X", - "See RoIPoolF.") - .Input( - 1, - "RoIs", - "See RoIPoolF.") - .Input( - 2, - "dY", - "Gradient of forward output 0 (Y)") - .Output( - 0, - "dX", - "Gradient of forward input 0 (X)"); - -class GetRoIAlignGradient : public GradientMakerBase { - using GradientMakerBase::GradientMakerBase; - vector<OperatorDef> GetGradientDefs() override { - return SingleGradientDef( - "RoIAlignGradient", - "", - vector<string>{I(0), I(1), GO(0)}, - vector<string>{GI(0)}); - } -}; - -REGISTER_GRADIENT(RoIAlign, GetRoIAlignGradient); - -} // namespace caffe2 diff --git a/modules/detectron/roi_align_op.cu b/modules/detectron/roi_align_op.cu deleted file mode 100644 index 01d67f63b0..0000000000 --- a/modules/detectron/roi_align_op.cu +++ /dev/null @@ -1,363 +0,0 @@ -/** - * Copyright (c) 2016-present, Facebook, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// RoIAlign for Mask R-CNN -// This is the per-cell centered algined versio of RoIAlign. -// This is the official version. - -#include <cfloat> - -#include "caffe2/core/context_gpu.h" -#include "roi_align_op.h" - -#include <stdio.h> - -namespace caffe2 { - -namespace { - -template <typename T> -inline __device__ T gpu_atomic_add(const T val, T* address); - -template <> -inline __device__ -float gpu_atomic_add(const float val, float* address) { - return atomicAdd(address, val); -} - -template <typename T> -__device__ T bilinear_interpolate(const T* bottom_data, - const int height, const int width, - T y, T x, - const int index /* index for debug only*/) { - - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - //empty - return 0; - } - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - int y_low = (int) y; - int x_low = (int) x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T) y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T) x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - // do bilinear interpolation - T v1 = bottom_data[y_low * width + x_low]; - T v2 = bottom_data[y_low * width + x_high]; - T v3 = bottom_data[y_high * width + x_low]; - T v4 = bottom_data[y_high * width + x_high]; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - return val; -} - -template <typename T> -__global__ void RoIAlignForward(const int nthreads, const T* bottom_data, - const T spatial_scale, const int channels, - const int height, const int width, - const int pooled_height, const int pooled_width, - const int sampling_ratio, - const T* bottom_rois, T* top_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not using rounding; this implementation detail is critical - T roi_start_w = offset_bottom_rois[1] * spatial_scale; - T roi_start_h = offset_bottom_rois[2] * spatial_scale; - T roi_end_w = offset_bottom_rois[3] * spatial_scale; - T roi_end_h = offset_bottom_rois[4] * spatial_scale; - // T roi_start_w = round(offset_bottom_rois[1] * spatial_scale); - // T roi_start_h = round(offset_bottom_rois[2] * spatial_scale); - // T roi_end_w = round(offset_bottom_rois[3] * spatial_scale); - // T roi_end_h = round(offset_bottom_rois[4] * spatial_scale); - - // Force malformed ROIs to be 1x1 - T roi_width = max(roi_end_w - roi_start_w, (T)1.); - T roi_height = max(roi_end_h - roi_start_h, (T)1.); - T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height); - T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width); - - const T* offset_bottom_data = bottom_data + (roi_batch_ind * channels + c) * height * width; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1 - { - const T y = roi_start_h + ph * bin_size_h + static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix ++) - { - const T x = roi_start_w + pw * bin_size_w + static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w); - - T val = bilinear_interpolate(offset_bottom_data, height, width, y, x, index); - output_val += val; - } - } - output_val /= count; - - top_data[index] = output_val; - } -} - -template <typename T> -__device__ void bilinear_interpolate_gradient( - const int height, const int width, - T y, T x, - T & w1, T & w2, T & w3, T & w4, - int & x_low, int & x_high, int & y_low, int & y_high, - const int index /* index for debug only*/) { - - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - //empty - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - y_low = (int) y; - x_low = (int) x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T) y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T) x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - - // reference in forward - // T v1 = bottom_data[y_low * width + x_low]; - // T v2 = bottom_data[y_low * width + x_high]; - // T v3 = bottom_data[y_high * width + x_low]; - // T v4 = bottom_data[y_high * width + x_high]; - // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} - -template <typename T> -__global__ void RoIAlignBackwardFeature(const int nthreads, const T* top_diff, - const int num_rois, const T spatial_scale, - const int channels, const int height, const int width, - const int pooled_height, const int pooled_width, - const int sampling_ratio, - T* bottom_diff, - const T* bottom_rois) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not using rounding; this implementation detail is critical - T roi_start_w = offset_bottom_rois[1] * spatial_scale; - T roi_start_h = offset_bottom_rois[2] * spatial_scale; - T roi_end_w = offset_bottom_rois[3] * spatial_scale; - T roi_end_h = offset_bottom_rois[4] * spatial_scale; - // T roi_start_w = round(offset_bottom_rois[1] * spatial_scale); - // T roi_start_h = round(offset_bottom_rois[2] * spatial_scale); - // T roi_end_w = round(offset_bottom_rois[3] * spatial_scale); - // T roi_end_h = round(offset_bottom_rois[4] * spatial_scale); - - // Force malformed ROIs to be 1x1 - T roi_width = max(roi_end_w - roi_start_w, (T)1.); - T roi_height = max(roi_end_h - roi_start_h, (T)1.); - T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height); - T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width); - - T* offset_bottom_diff = bottom_diff + (roi_batch_ind * channels + c) * height * width; - - int top_offset = (n * channels + c) * pooled_height * pooled_width; - const T* offset_top_diff = top_diff + top_offset; - const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1 - { - const T y = roi_start_h + ph * bin_size_h + static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix ++) - { - const T x = roi_start_w + pw * bin_size_w + static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w); - - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient(height, width, y, x, - w1, w2, w3, w4, - x_low, x_high, y_low, y_high, - index); - - T g1 = top_diff_this_bin * w1 / count; - T g2 = top_diff_this_bin * w2 / count; - T g3 = top_diff_this_bin * w3 / count; - T g4 = top_diff_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) - { - gpu_atomic_add(static_cast<T>(g1), offset_bottom_diff + y_low * width + x_low); - gpu_atomic_add(static_cast<T>(g2), offset_bottom_diff + y_low * width + x_high); - gpu_atomic_add(static_cast<T>(g3), offset_bottom_diff + y_high * width + x_low); - gpu_atomic_add(static_cast<T>(g4), offset_bottom_diff + y_high * width + x_high); - } // if - } // ix - } // iy - } // CUDA_1D_KERNEL_LOOP -} // RoIAlignBackward - - -} // namespace - -template<> -bool RoIAlignOp<float, CUDAContext>::RunOnDevice() { - auto& X = Input(0); // Input data to pool - auto& R = Input(1); // RoIs - auto* Y = Output(0); // RoI pooled data - - if (R.size() == 0) { - // Handle empty rois - Y->Resize(0, X.dim32(1), pooled_height_, pooled_width_); - // The following mutable_data calls are needed to allocate the tensors - Y->mutable_data<float>(); - return true; - } - - assert(sampling_ratio_ >= 0); - - Y->Resize(R.dim32(0), X.dim32(1), pooled_height_, pooled_width_); - int output_size = Y->size(); - RoIAlignForward<float> - <<<CAFFE_GET_BLOCKS(output_size), - CAFFE_CUDA_NUM_THREADS, - 0, - context_.cuda_stream()>>>( - output_size, - X.data<float>(), - spatial_scale_, - X.dim32(1), - X.dim32(2), - X.dim32(3), - pooled_height_, - pooled_width_, - sampling_ratio_, - R.data<float>(), - Y->mutable_data<float>()); - return true; -} - -template<> -bool RoIAlignGradientOp<float, CUDAContext>::RunOnDevice() { - auto& X = Input(0); // Input data to pool - auto& R = Input(1); // RoIs - auto& dY = Input(2); // Gradient of net w.r.t. output of "forward" op - // (aka "gradOutput") - auto* dX = Output(0); // Gradient of net w.r.t. input to "forward" op - // (aka "gradInput") - - dX->ResizeLike(X); - - // Must zero-out dX before accumulating gradients - math::Set<float, CUDAContext>( - dX->size(), 0.f, dX->mutable_data<float>(), &context_); - - if (dY.size() > 0) { // Handle possibly empty gradient if there were no rois - RoIAlignBackwardFeature<float> - <<<CAFFE_GET_BLOCKS(dY.size()), - CAFFE_CUDA_NUM_THREADS, - 0, - context_.cuda_stream()>>>( - dY.size(), - dY.data<float>(), - R.dim32(0), - spatial_scale_, - X.dim32(1), - X.dim32(2), - X.dim32(3), - pooled_height_, - pooled_width_, - sampling_ratio_, - dX->mutable_data<float>(), - R.data<float>()); - } - return true; -} - - -REGISTER_CUDA_OPERATOR(RoIAlign, - RoIAlignOp<float, CUDAContext>); -REGISTER_CUDA_OPERATOR(RoIAlignGradient, - RoIAlignGradientOp<float, CUDAContext>); -} // namespace caffe2 diff --git a/modules/detectron/roi_align_op.h b/modules/detectron/roi_align_op.h deleted file mode 100644 index 8283d4b242..0000000000 --- a/modules/detectron/roi_align_op.h +++ /dev/null @@ -1,89 +0,0 @@ -/** - * Copyright (c) 2016-present, Facebook, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ROI_ALIGN_OP_H_ -#define ROI_ALIGN_OP_H_ - -#include "caffe2/core/context.h" -#include "caffe2/core/logging.h" -#include "caffe2/core/operator.h" -#include "caffe2/utils/math.h" - -namespace caffe2 { - -template <typename T, class Context> -class RoIAlignOp final : public Operator<Context> { - public: - RoIAlignOp(const OperatorDef& operator_def, Workspace* ws) - : Operator<Context>(operator_def, ws), - spatial_scale_( - OperatorBase::GetSingleArgument<float>("spatial_scale", 1.)), - pooled_height_(OperatorBase::GetSingleArgument<int>("pooled_h", 1)), - pooled_width_(OperatorBase::GetSingleArgument<int>("pooled_w", 1)), - sampling_ratio_( - OperatorBase::GetSingleArgument<int>("sampling_ratio", -1)) { - DCHECK_GT(spatial_scale_, 0); - DCHECK_GT(pooled_height_, 0); - DCHECK_GT(pooled_width_, 0); - DCHECK_GE(sampling_ratio_, 0); - } - USE_OPERATOR_CONTEXT_FUNCTIONS; - - bool RunOnDevice() override { - // No CPU implementation for now - CAFFE_NOT_IMPLEMENTED; - } - - protected: - float spatial_scale_; - int pooled_height_; - int pooled_width_; - int sampling_ratio_; -}; - -template <typename T, class Context> -class RoIAlignGradientOp final : public Operator<Context> { - public: - RoIAlignGradientOp(const OperatorDef& def, Workspace* ws) - : Operator<Context>(def, ws), - spatial_scale_( - OperatorBase::GetSingleArgument<float>("spatial_scale", 1.)), - pooled_height_(OperatorBase::GetSingleArgument<int>("pooled_h", 1)), - pooled_width_(OperatorBase::GetSingleArgument<int>("pooled_w", 1)), - sampling_ratio_( - OperatorBase::GetSingleArgument<int>("sampling_ratio", -1)) { - DCHECK_GT(spatial_scale_, 0); - DCHECK_GT(pooled_height_, 0); - DCHECK_GT(pooled_width_, 0); - DCHECK_GE(sampling_ratio_, 0); - } - USE_OPERATOR_CONTEXT_FUNCTIONS; - - bool RunOnDevice() override { - // No CPU implementation for now - CAFFE_NOT_IMPLEMENTED; - } - - protected: - float spatial_scale_; - int pooled_height_; - int pooled_width_; - int sampling_ratio_; -}; - -} // namespace caffe2 - -#endif // ROI_ALIGN_OP_H_ |