summaryrefslogtreecommitdiff
path: root/modules/detectron
diff options
context:
space:
mode:
authorYanghan Wang <yanghan@instagram.com>2018-01-24 12:47:39 -0800
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-01-24 13:05:27 -0800
commit2828c7a391b209302a318b3817cfd6cd85b05cf4 (patch)
tree1bdd43b92e64f0c5fcb0ebc997137c8db8e70648 /modules/detectron
parent09a1ef54ab4e08ffcdd852bfe63a09daf013e624 (diff)
downloadpytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.tar.gz
pytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.tar.bz2
pytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.zip
Moved RoIAlign to OSS.
Reviewed By: newstzpz Differential Revision: D6775228 fbshipit-source-id: a9a6689fb5f6004f13ec03db8410fd81e2e6468e
Diffstat (limited to 'modules/detectron')
-rw-r--r--modules/detectron/roi_align_op.cc98
-rw-r--r--modules/detectron/roi_align_op.cu363
-rw-r--r--modules/detectron/roi_align_op.h89
3 files changed, 0 insertions, 550 deletions
diff --git a/modules/detectron/roi_align_op.cc b/modules/detectron/roi_align_op.cc
deleted file mode 100644
index 38094ff210..0000000000
--- a/modules/detectron/roi_align_op.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * Copyright (c) 2016-present, Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "roi_align_op.h"
-
-namespace caffe2 {
-
-REGISTER_CPU_OPERATOR(RoIAlign, RoIAlignOp<float, CPUContext>);
-REGISTER_CPU_OPERATOR(RoIAlignGradient, RoIAlignGradientOp<float, CPUContext>);
-
-OPERATOR_SCHEMA(RoIAlign)
- .NumInputs(2)
- .NumOutputs(1)
- .SetDoc(R"DOC(
-Region of Interest (RoI) align operation as used in Mask R-CNN.
-)DOC")
- .Arg(
- "spatial_scale",
- "(float) default 1.0; Spatial scale of the input feature map X "
- "relative to the input image. E.g., 0.0625 if X has a stride of 16 "
- "w.r.t. the input image.")
- .Arg(
- "pooled_h",
- "(int) default 1; Pooled output Y's height.")
- .Arg(
- "pooled_w",
- "(int) default 1; Pooled output Y's width.")
- .Arg(
- "sampling_ratio",
- "(int) default -1; number of sampling points in the interpolation grid "
- "used to compute the output value of each pooled output bin. If > 0, "
- "then exactly sampling_ratio x sampling_ratio grid points are used. If "
- "<= 0, then an adaptive number of grid points are used (computed as "
- "ceil(roi_width / pooled_w), and likewise for height)."
- )
- .Input(
- 0,
- "X",
- "4D feature map input of shape (N, C, H, W).")
- .Input(
- 1,
- "RoIs",
- "2D input of shape (R, 5) specifying R RoIs with five columns "
- "representing: batch index in [0, N - 1], x1, y1, x2, y2. The RoI "
- "coordinates are in the coordinate system of the input image.")
- .Output(
- 0,
- "Y",
- "4D output of shape (R, C, pooled_h, pooled_w). The r-th batch element "
- "is a pooled feature map cooresponding to the r-th RoI.");
-
-OPERATOR_SCHEMA(RoIAlignGradient)
- .NumInputs(3)
- .NumOutputs(1)
- .Input(
- 0,
- "X",
- "See RoIPoolF.")
- .Input(
- 1,
- "RoIs",
- "See RoIPoolF.")
- .Input(
- 2,
- "dY",
- "Gradient of forward output 0 (Y)")
- .Output(
- 0,
- "dX",
- "Gradient of forward input 0 (X)");
-
-class GetRoIAlignGradient : public GradientMakerBase {
- using GradientMakerBase::GradientMakerBase;
- vector<OperatorDef> GetGradientDefs() override {
- return SingleGradientDef(
- "RoIAlignGradient",
- "",
- vector<string>{I(0), I(1), GO(0)},
- vector<string>{GI(0)});
- }
-};
-
-REGISTER_GRADIENT(RoIAlign, GetRoIAlignGradient);
-
-} // namespace caffe2
diff --git a/modules/detectron/roi_align_op.cu b/modules/detectron/roi_align_op.cu
deleted file mode 100644
index 01d67f63b0..0000000000
--- a/modules/detectron/roi_align_op.cu
+++ /dev/null
@@ -1,363 +0,0 @@
-/**
- * Copyright (c) 2016-present, Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// RoIAlign for Mask R-CNN
-// This is the per-cell centered algined versio of RoIAlign.
-// This is the official version.
-
-#include <cfloat>
-
-#include "caffe2/core/context_gpu.h"
-#include "roi_align_op.h"
-
-#include <stdio.h>
-
-namespace caffe2 {
-
-namespace {
-
-template <typename T>
-inline __device__ T gpu_atomic_add(const T val, T* address);
-
-template <>
-inline __device__
-float gpu_atomic_add(const float val, float* address) {
- return atomicAdd(address, val);
-}
-
-template <typename T>
-__device__ T bilinear_interpolate(const T* bottom_data,
- const int height, const int width,
- T y, T x,
- const int index /* index for debug only*/) {
-
- // deal with cases that inverse elements are out of feature map boundary
- if (y < -1.0 || y > height || x < -1.0 || x > width) {
- //empty
- return 0;
- }
-
- if (y <= 0) y = 0;
- if (x <= 0) x = 0;
-
- int y_low = (int) y;
- int x_low = (int) x;
- int y_high;
- int x_high;
-
- if (y_low >= height - 1) {
- y_high = y_low = height - 1;
- y = (T) y_low;
- } else {
- y_high = y_low + 1;
- }
-
- if (x_low >= width - 1) {
- x_high = x_low = width - 1;
- x = (T) x_low;
- } else {
- x_high = x_low + 1;
- }
-
- T ly = y - y_low;
- T lx = x - x_low;
- T hy = 1. - ly, hx = 1. - lx;
- // do bilinear interpolation
- T v1 = bottom_data[y_low * width + x_low];
- T v2 = bottom_data[y_low * width + x_high];
- T v3 = bottom_data[y_high * width + x_low];
- T v4 = bottom_data[y_high * width + x_high];
- T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
- T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-
- return val;
-}
-
-template <typename T>
-__global__ void RoIAlignForward(const int nthreads, const T* bottom_data,
- const T spatial_scale, const int channels,
- const int height, const int width,
- const int pooled_height, const int pooled_width,
- const int sampling_ratio,
- const T* bottom_rois, T* top_data) {
- CUDA_1D_KERNEL_LOOP(index, nthreads) {
- // (n, c, ph, pw) is an element in the pooled output
- int pw = index % pooled_width;
- int ph = (index / pooled_width) % pooled_height;
- int c = (index / pooled_width / pooled_height) % channels;
- int n = index / pooled_width / pooled_height / channels;
-
- const T* offset_bottom_rois = bottom_rois + n * 5;
- int roi_batch_ind = offset_bottom_rois[0];
-
- // Do not using rounding; this implementation detail is critical
- T roi_start_w = offset_bottom_rois[1] * spatial_scale;
- T roi_start_h = offset_bottom_rois[2] * spatial_scale;
- T roi_end_w = offset_bottom_rois[3] * spatial_scale;
- T roi_end_h = offset_bottom_rois[4] * spatial_scale;
- // T roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
- // T roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
- // T roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
- // T roi_end_h = round(offset_bottom_rois[4] * spatial_scale);
-
- // Force malformed ROIs to be 1x1
- T roi_width = max(roi_end_w - roi_start_w, (T)1.);
- T roi_height = max(roi_end_h - roi_start_h, (T)1.);
- T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
- T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
- const T* offset_bottom_data = bottom_data + (roi_batch_ind * channels + c) * height * width;
-
- // We use roi_bin_grid to sample the grid and mimic integral
- int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2
- int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
- // We do average (integral) pooling inside a bin
- const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
-
- T output_val = 0.;
- for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1
- {
- const T y = roi_start_h + ph * bin_size_h + static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
- for (int ix = 0; ix < roi_bin_grid_w; ix ++)
- {
- const T x = roi_start_w + pw * bin_size_w + static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w);
-
- T val = bilinear_interpolate(offset_bottom_data, height, width, y, x, index);
- output_val += val;
- }
- }
- output_val /= count;
-
- top_data[index] = output_val;
- }
-}
-
-template <typename T>
-__device__ void bilinear_interpolate_gradient(
- const int height, const int width,
- T y, T x,
- T & w1, T & w2, T & w3, T & w4,
- int & x_low, int & x_high, int & y_low, int & y_high,
- const int index /* index for debug only*/) {
-
- // deal with cases that inverse elements are out of feature map boundary
- if (y < -1.0 || y > height || x < -1.0 || x > width) {
- //empty
- w1 = w2 = w3 = w4 = 0.;
- x_low = x_high = y_low = y_high = -1;
- return;
- }
-
- if (y <= 0) y = 0;
- if (x <= 0) x = 0;
-
- y_low = (int) y;
- x_low = (int) x;
-
- if (y_low >= height - 1) {
- y_high = y_low = height - 1;
- y = (T) y_low;
- } else {
- y_high = y_low + 1;
- }
-
- if (x_low >= width - 1) {
- x_high = x_low = width - 1;
- x = (T) x_low;
- } else {
- x_high = x_low + 1;
- }
-
- T ly = y - y_low;
- T lx = x - x_low;
- T hy = 1. - ly, hx = 1. - lx;
-
- // reference in forward
- // T v1 = bottom_data[y_low * width + x_low];
- // T v2 = bottom_data[y_low * width + x_high];
- // T v3 = bottom_data[y_high * width + x_low];
- // T v4 = bottom_data[y_high * width + x_high];
- // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-
- w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
- return;
-}
-
-template <typename T>
-__global__ void RoIAlignBackwardFeature(const int nthreads, const T* top_diff,
- const int num_rois, const T spatial_scale,
- const int channels, const int height, const int width,
- const int pooled_height, const int pooled_width,
- const int sampling_ratio,
- T* bottom_diff,
- const T* bottom_rois) {
- CUDA_1D_KERNEL_LOOP(index, nthreads) {
- // (n, c, ph, pw) is an element in the pooled output
- int pw = index % pooled_width;
- int ph = (index / pooled_width) % pooled_height;
- int c = (index / pooled_width / pooled_height) % channels;
- int n = index / pooled_width / pooled_height / channels;
-
- const T* offset_bottom_rois = bottom_rois + n * 5;
- int roi_batch_ind = offset_bottom_rois[0];
-
- // Do not using rounding; this implementation detail is critical
- T roi_start_w = offset_bottom_rois[1] * spatial_scale;
- T roi_start_h = offset_bottom_rois[2] * spatial_scale;
- T roi_end_w = offset_bottom_rois[3] * spatial_scale;
- T roi_end_h = offset_bottom_rois[4] * spatial_scale;
- // T roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
- // T roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
- // T roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
- // T roi_end_h = round(offset_bottom_rois[4] * spatial_scale);
-
- // Force malformed ROIs to be 1x1
- T roi_width = max(roi_end_w - roi_start_w, (T)1.);
- T roi_height = max(roi_end_h - roi_start_h, (T)1.);
- T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
- T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
- T* offset_bottom_diff = bottom_diff + (roi_batch_ind * channels + c) * height * width;
-
- int top_offset = (n * channels + c) * pooled_height * pooled_width;
- const T* offset_top_diff = top_diff + top_offset;
- const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];
-
- // We use roi_bin_grid to sample the grid and mimic integral
- int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2
- int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
- // We do average (integral) pooling inside a bin
- const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
-
- for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1
- {
- const T y = roi_start_h + ph * bin_size_h + static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
- for (int ix = 0; ix < roi_bin_grid_w; ix ++)
- {
- const T x = roi_start_w + pw * bin_size_w + static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w);
-
- T w1, w2, w3, w4;
- int x_low, x_high, y_low, y_high;
-
- bilinear_interpolate_gradient(height, width, y, x,
- w1, w2, w3, w4,
- x_low, x_high, y_low, y_high,
- index);
-
- T g1 = top_diff_this_bin * w1 / count;
- T g2 = top_diff_this_bin * w2 / count;
- T g3 = top_diff_this_bin * w3 / count;
- T g4 = top_diff_this_bin * w4 / count;
-
- if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0)
- {
- gpu_atomic_add(static_cast<T>(g1), offset_bottom_diff + y_low * width + x_low);
- gpu_atomic_add(static_cast<T>(g2), offset_bottom_diff + y_low * width + x_high);
- gpu_atomic_add(static_cast<T>(g3), offset_bottom_diff + y_high * width + x_low);
- gpu_atomic_add(static_cast<T>(g4), offset_bottom_diff + y_high * width + x_high);
- } // if
- } // ix
- } // iy
- } // CUDA_1D_KERNEL_LOOP
-} // RoIAlignBackward
-
-
-} // namespace
-
-template<>
-bool RoIAlignOp<float, CUDAContext>::RunOnDevice() {
- auto& X = Input(0); // Input data to pool
- auto& R = Input(1); // RoIs
- auto* Y = Output(0); // RoI pooled data
-
- if (R.size() == 0) {
- // Handle empty rois
- Y->Resize(0, X.dim32(1), pooled_height_, pooled_width_);
- // The following mutable_data calls are needed to allocate the tensors
- Y->mutable_data<float>();
- return true;
- }
-
- assert(sampling_ratio_ >= 0);
-
- Y->Resize(R.dim32(0), X.dim32(1), pooled_height_, pooled_width_);
- int output_size = Y->size();
- RoIAlignForward<float>
- <<<CAFFE_GET_BLOCKS(output_size),
- CAFFE_CUDA_NUM_THREADS,
- 0,
- context_.cuda_stream()>>>(
- output_size,
- X.data<float>(),
- spatial_scale_,
- X.dim32(1),
- X.dim32(2),
- X.dim32(3),
- pooled_height_,
- pooled_width_,
- sampling_ratio_,
- R.data<float>(),
- Y->mutable_data<float>());
- return true;
-}
-
-template<>
-bool RoIAlignGradientOp<float, CUDAContext>::RunOnDevice() {
- auto& X = Input(0); // Input data to pool
- auto& R = Input(1); // RoIs
- auto& dY = Input(2); // Gradient of net w.r.t. output of "forward" op
- // (aka "gradOutput")
- auto* dX = Output(0); // Gradient of net w.r.t. input to "forward" op
- // (aka "gradInput")
-
- dX->ResizeLike(X);
-
- // Must zero-out dX before accumulating gradients
- math::Set<float, CUDAContext>(
- dX->size(), 0.f, dX->mutable_data<float>(), &context_);
-
- if (dY.size() > 0) { // Handle possibly empty gradient if there were no rois
- RoIAlignBackwardFeature<float>
- <<<CAFFE_GET_BLOCKS(dY.size()),
- CAFFE_CUDA_NUM_THREADS,
- 0,
- context_.cuda_stream()>>>(
- dY.size(),
- dY.data<float>(),
- R.dim32(0),
- spatial_scale_,
- X.dim32(1),
- X.dim32(2),
- X.dim32(3),
- pooled_height_,
- pooled_width_,
- sampling_ratio_,
- dX->mutable_data<float>(),
- R.data<float>());
- }
- return true;
-}
-
-
-REGISTER_CUDA_OPERATOR(RoIAlign,
- RoIAlignOp<float, CUDAContext>);
-REGISTER_CUDA_OPERATOR(RoIAlignGradient,
- RoIAlignGradientOp<float, CUDAContext>);
-} // namespace caffe2
diff --git a/modules/detectron/roi_align_op.h b/modules/detectron/roi_align_op.h
deleted file mode 100644
index 8283d4b242..0000000000
--- a/modules/detectron/roi_align_op.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/**
- * Copyright (c) 2016-present, Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ROI_ALIGN_OP_H_
-#define ROI_ALIGN_OP_H_
-
-#include "caffe2/core/context.h"
-#include "caffe2/core/logging.h"
-#include "caffe2/core/operator.h"
-#include "caffe2/utils/math.h"
-
-namespace caffe2 {
-
-template <typename T, class Context>
-class RoIAlignOp final : public Operator<Context> {
- public:
- RoIAlignOp(const OperatorDef& operator_def, Workspace* ws)
- : Operator<Context>(operator_def, ws),
- spatial_scale_(
- OperatorBase::GetSingleArgument<float>("spatial_scale", 1.)),
- pooled_height_(OperatorBase::GetSingleArgument<int>("pooled_h", 1)),
- pooled_width_(OperatorBase::GetSingleArgument<int>("pooled_w", 1)),
- sampling_ratio_(
- OperatorBase::GetSingleArgument<int>("sampling_ratio", -1)) {
- DCHECK_GT(spatial_scale_, 0);
- DCHECK_GT(pooled_height_, 0);
- DCHECK_GT(pooled_width_, 0);
- DCHECK_GE(sampling_ratio_, 0);
- }
- USE_OPERATOR_CONTEXT_FUNCTIONS;
-
- bool RunOnDevice() override {
- // No CPU implementation for now
- CAFFE_NOT_IMPLEMENTED;
- }
-
- protected:
- float spatial_scale_;
- int pooled_height_;
- int pooled_width_;
- int sampling_ratio_;
-};
-
-template <typename T, class Context>
-class RoIAlignGradientOp final : public Operator<Context> {
- public:
- RoIAlignGradientOp(const OperatorDef& def, Workspace* ws)
- : Operator<Context>(def, ws),
- spatial_scale_(
- OperatorBase::GetSingleArgument<float>("spatial_scale", 1.)),
- pooled_height_(OperatorBase::GetSingleArgument<int>("pooled_h", 1)),
- pooled_width_(OperatorBase::GetSingleArgument<int>("pooled_w", 1)),
- sampling_ratio_(
- OperatorBase::GetSingleArgument<int>("sampling_ratio", -1)) {
- DCHECK_GT(spatial_scale_, 0);
- DCHECK_GT(pooled_height_, 0);
- DCHECK_GT(pooled_width_, 0);
- DCHECK_GE(sampling_ratio_, 0);
- }
- USE_OPERATOR_CONTEXT_FUNCTIONS;
-
- bool RunOnDevice() override {
- // No CPU implementation for now
- CAFFE_NOT_IMPLEMENTED;
- }
-
- protected:
- float spatial_scale_;
- int pooled_height_;
- int pooled_width_;
- int sampling_ratio_;
-};
-
-} // namespace caffe2
-
-#endif // ROI_ALIGN_OP_H_