Moved RoIAlign to OSS.

Reviewed By: newstzpz Differential Revision: D6775228 fbshipit-source-id: a9a6689fb5f6004f13ec03db8410fd81e2e6468e
author: Yanghan Wang <yanghan@instagram.com> 2018-01-24 12:47:39 -0800
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> 2018-01-24 13:05:27 -0800
commit: 2828c7a391b209302a318b3817cfd6cd85b05cf4 (patch)
tree: 1bdd43b92e64f0c5fcb0ebc997137c8db8e70648 /modules/detectron
parent: 09a1ef54ab4e08ffcdd852bfe63a09daf013e624 (diff)
download: pytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.tar.gz
pytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.tar.bz2
pytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.zip
3 files changed, 0 insertions, 550 deletions
diff --git a/modules/detectron/roi_align_op.cc b/modules/detectron/roi_align_op.cc
deleted file mode 100644
index 38094ff210..0000000000
--- a/modules/detectron/roi_align_op.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * Copyright (c) 2016-present, Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "roi_align_op.h"
-
-namespace caffe2 {
-
-REGISTER_CPU_OPERATOR(RoIAlign, RoIAlignOp<float, CPUContext>);
-REGISTER_CPU_OPERATOR(RoIAlignGradient, RoIAlignGradientOp<float, CPUContext>);
-
-OPERATOR_SCHEMA(RoIAlign)
-    .NumInputs(2)
-    .NumOutputs(1)
-    .SetDoc(R"DOC(
-Region of Interest (RoI) align operation as used in Mask R-CNN.
-)DOC")
-    .Arg(
-        "spatial_scale",
-        "(float) default 1.0; Spatial scale of the input feature map X "
-        "relative to the input image. E.g., 0.0625 if X has a stride of 16 "
-        "w.r.t. the input image.")
-    .Arg(
-        "pooled_h",
-        "(int) default 1; Pooled output Y's height.")
-    .Arg(
-        "pooled_w",
-        "(int) default 1; Pooled output Y's width.")
-    .Arg(
-        "sampling_ratio",
-        "(int) default -1; number of sampling points in the interpolation grid "
-        "used to compute the output value of each pooled output bin. If > 0, "
-        "then exactly sampling_ratio x sampling_ratio grid points are used. If "
-        "<= 0, then an adaptive number of grid points are used (computed as "
-        "ceil(roi_width / pooled_w), and likewise for height)."
-    )
-    .Input(
-        0,
-        "X",
-        "4D feature map input of shape (N, C, H, W).")
-    .Input(
-        1,
-        "RoIs",
-        "2D input of shape (R, 5) specifying R RoIs with five columns "
-        "representing: batch index in [0, N - 1], x1, y1, x2, y2. The RoI "
-        "coordinates are in the coordinate system of the input image.")
-    .Output(
-        0,
-        "Y",
-        "4D output of shape (R, C, pooled_h, pooled_w). The r-th batch element "
-        "is a pooled feature map cooresponding to the r-th RoI.");
-
-OPERATOR_SCHEMA(RoIAlignGradient)
-    .NumInputs(3)
-    .NumOutputs(1)
-    .Input(
-        0,
-        "X",
-        "See RoIPoolF.")
-    .Input(
-        1,
-        "RoIs",
-        "See RoIPoolF.")
-    .Input(
-        2,
-        "dY",
-        "Gradient of forward output 0 (Y)")
-    .Output(
-        0,
-        "dX",
-        "Gradient of forward input 0 (X)");
-
-class GetRoIAlignGradient : public GradientMakerBase {
-  using GradientMakerBase::GradientMakerBase;
-  vector<OperatorDef> GetGradientDefs() override {
-    return SingleGradientDef(
-        "RoIAlignGradient",
-        "",
-        vector<string>{I(0), I(1), GO(0)},
-        vector<string>{GI(0)});
-  }
-};
-
-REGISTER_GRADIENT(RoIAlign, GetRoIAlignGradient);
-
-} // namespace caffe2
diff --git a/modules/detectron/roi_align_op.cu b/modules/detectron/roi_align_op.cu
deleted file mode 100644
index 01d67f63b0..0000000000
--- a/modules/detectron/roi_align_op.cu
+++ /dev/null
@@ -1,363 +0,0 @@
-/**
- * Copyright (c) 2016-present, Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// RoIAlign for Mask R-CNN
-// This is the per-cell centered algined versio of RoIAlign.
-// This is the official version.
-
-#include <cfloat>
-
-#include "caffe2/core/context_gpu.h"
-#include "roi_align_op.h"
-
-#include <stdio.h>
-
-namespace caffe2 {
-
-namespace {
-
-template <typename T>
-inline __device__ T gpu_atomic_add(const T val, T* address);
-
-template <>
-inline __device__
-float gpu_atomic_add(const float val, float* address) {
-  return atomicAdd(address, val);
-}
-
-template <typename T>
-__device__ T bilinear_interpolate(const T* bottom_data,
-    const int height, const int width,
-    T y, T x,
-    const int index /* index for debug only*/) {
-
-  // deal with cases that inverse elements are out of feature map boundary
-  if (y < -1.0 || y > height || x < -1.0 || x > width) {
-    //empty
-    return 0;
-  }
-
-  if (y <= 0) y = 0;
-  if (x <= 0) x = 0;
-
-  int y_low = (int) y;
-  int x_low = (int) x;
-  int y_high;
-  int x_high;
-
-  if (y_low >= height - 1) {
-    y_high = y_low = height - 1;
-    y = (T) y_low;
-  } else {
-    y_high = y_low + 1;
-  }
-
-  if (x_low >= width - 1) {
-    x_high = x_low = width - 1;
-    x = (T) x_low;
-  } else {
-    x_high = x_low + 1;
-  }
-
-  T ly = y - y_low;
-  T lx = x - x_low;
-  T hy = 1. - ly, hx = 1. - lx;
-  // do bilinear interpolation
-  T v1 = bottom_data[y_low * width + x_low];
-  T v2 = bottom_data[y_low * width + x_high];
-  T v3 = bottom_data[y_high * width + x_low];
-  T v4 = bottom_data[y_high * width + x_high];
-  T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
-  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-
-  return val;
-}
-
-template <typename T>
-__global__ void RoIAlignForward(const int nthreads, const T* bottom_data,
-    const T spatial_scale, const int channels,
-    const int height, const int width,
-    const int pooled_height, const int pooled_width,
-    const int sampling_ratio,
-    const T* bottom_rois, T* top_data) {
-  CUDA_1D_KERNEL_LOOP(index, nthreads) {
-    // (n, c, ph, pw) is an element in the pooled output
-    int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
-
-    const T* offset_bottom_rois = bottom_rois + n * 5;
-    int roi_batch_ind = offset_bottom_rois[0];
-
-    // Do not using rounding; this implementation detail is critical
-    T roi_start_w = offset_bottom_rois[1] * spatial_scale;
-    T roi_start_h = offset_bottom_rois[2] * spatial_scale;
-    T roi_end_w = offset_bottom_rois[3] * spatial_scale;
-    T roi_end_h = offset_bottom_rois[4] * spatial_scale;
-    // T roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
-    // T roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
-    // T roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
-    // T roi_end_h = round(offset_bottom_rois[4] * spatial_scale);
-
-    // Force malformed ROIs to be 1x1
-    T roi_width = max(roi_end_w - roi_start_w, (T)1.);
-    T roi_height = max(roi_end_h - roi_start_h, (T)1.);
-    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
-    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
-    const T* offset_bottom_data = bottom_data + (roi_batch_ind * channels + c) * height * width;
-
-    // We use roi_bin_grid to sample the grid and mimic integral
-    int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2
-    int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
-    // We do average (integral) pooling inside a bin
-    const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
-
-    T output_val = 0.;
-    for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1
-    {
-      const T y = roi_start_h + ph * bin_size_h + static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
-      for (int ix = 0; ix < roi_bin_grid_w; ix ++)
-      {
-        const T x = roi_start_w + pw * bin_size_w + static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w);
-
-        T val = bilinear_interpolate(offset_bottom_data, height, width, y, x, index);
-        output_val += val;
-      }
-    }
-    output_val /= count;
-
-    top_data[index] = output_val;
-  }
-}
-
-template <typename T>
-__device__ void bilinear_interpolate_gradient(
-    const int height, const int width,
-    T y, T x,
-    T & w1, T & w2, T & w3, T & w4,
-    int & x_low, int & x_high, int & y_low, int & y_high,
-    const int index /* index for debug only*/) {
-
-  // deal with cases that inverse elements are out of feature map boundary
-  if (y < -1.0 || y > height || x < -1.0 || x > width) {
-    //empty
-    w1 = w2 = w3 = w4 = 0.;
-    x_low = x_high = y_low = y_high = -1;
-    return;
-  }
-
-  if (y <= 0) y = 0;
-  if (x <= 0) x = 0;
-
-  y_low = (int) y;
-  x_low = (int) x;
-
-  if (y_low >= height - 1) {
-    y_high = y_low = height - 1;
-    y = (T) y_low;
-  } else {
-    y_high = y_low + 1;
-  }
-
-  if (x_low >= width - 1) {
-    x_high = x_low = width - 1;
-    x = (T) x_low;
-  } else {
-    x_high = x_low + 1;
-  }
-
-  T ly = y - y_low;
-  T lx = x - x_low;
-  T hy = 1. - ly, hx = 1. - lx;
-
-  // reference in forward
-  // T v1 = bottom_data[y_low * width + x_low];
-  // T v2 = bottom_data[y_low * width + x_high];
-  // T v3 = bottom_data[y_high * width + x_low];
-  // T v4 = bottom_data[y_high * width + x_high];
-  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-
-  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
-
-  return;
-}
-
-template <typename T>
-__global__ void RoIAlignBackwardFeature(const int nthreads, const T* top_diff,
-    const int num_rois, const T spatial_scale,
-    const int channels, const int height, const int width,
-    const int pooled_height, const int pooled_width,
-    const int sampling_ratio,
-    T* bottom_diff,
-    const T* bottom_rois) {
-  CUDA_1D_KERNEL_LOOP(index, nthreads) {
-    // (n, c, ph, pw) is an element in the pooled output
-    int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
-
-    const T* offset_bottom_rois = bottom_rois + n * 5;
-    int roi_batch_ind = offset_bottom_rois[0];
-
-    // Do not using rounding; this implementation detail is critical
-    T roi_start_w = offset_bottom_rois[1] * spatial_scale;
-    T roi_start_h = offset_bottom_rois[2] * spatial_scale;
-    T roi_end_w = offset_bottom_rois[3] * spatial_scale;
-    T roi_end_h = offset_bottom_rois[4] * spatial_scale;
-    // T roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
-    // T roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
-    // T roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
-    // T roi_end_h = round(offset_bottom_rois[4] * spatial_scale);
-
-    // Force malformed ROIs to be 1x1
-    T roi_width = max(roi_end_w - roi_start_w, (T)1.);
-    T roi_height = max(roi_end_h - roi_start_h, (T)1.);
-    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
-    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-
-    T* offset_bottom_diff = bottom_diff + (roi_batch_ind * channels + c) * height * width;
-
-    int top_offset    = (n * channels + c) * pooled_height * pooled_width;
-    const T* offset_top_diff = top_diff + top_offset;
-    const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];
-
-    // We use roi_bin_grid to sample the grid and mimic integral
-    int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2
-    int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
-
-    // We do average (integral) pooling inside a bin
-    const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
-
-    for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1
-    {
-      const T y = roi_start_h + ph * bin_size_h + static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
-      for (int ix = 0; ix < roi_bin_grid_w; ix ++)
-      {
-        const T x = roi_start_w + pw * bin_size_w + static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w);
-
-        T w1, w2, w3, w4;
-        int x_low, x_high, y_low, y_high;
-
-        bilinear_interpolate_gradient(height, width, y, x,
-            w1, w2, w3, w4,
-            x_low, x_high, y_low, y_high,
-            index);
-
-        T g1 = top_diff_this_bin * w1 / count;
-        T g2 = top_diff_this_bin * w2 / count;
-        T g3 = top_diff_this_bin * w3 / count;
-        T g4 = top_diff_this_bin * w4 / count;
-
-        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0)
-        {
-          gpu_atomic_add(static_cast<T>(g1), offset_bottom_diff + y_low * width + x_low);
-          gpu_atomic_add(static_cast<T>(g2), offset_bottom_diff + y_low * width + x_high);
-          gpu_atomic_add(static_cast<T>(g3), offset_bottom_diff + y_high * width + x_low);
-          gpu_atomic_add(static_cast<T>(g4), offset_bottom_diff + y_high * width + x_high);
-        } // if
-      } // ix
-    } // iy
-  } // CUDA_1D_KERNEL_LOOP
-} // RoIAlignBackward
-
-
-} // namespace
-
-template<>
-bool RoIAlignOp<float, CUDAContext>::RunOnDevice() {
-  auto& X = Input(0);  // Input data to pool
-  auto& R = Input(1);  // RoIs
-  auto* Y = Output(0); // RoI pooled data
-
-  if (R.size() == 0) {
-    // Handle empty rois
-    Y->Resize(0, X.dim32(1), pooled_height_, pooled_width_);
-    // The following mutable_data calls are needed to allocate the tensors
-    Y->mutable_data<float>();
-    return true;
-  }
-
-  assert(sampling_ratio_ >= 0);
-
-  Y->Resize(R.dim32(0), X.dim32(1), pooled_height_, pooled_width_);
-  int output_size = Y->size();
-  RoIAlignForward<float>
-      <<<CAFFE_GET_BLOCKS(output_size),
-         CAFFE_CUDA_NUM_THREADS,
-         0,
-         context_.cuda_stream()>>>(
-          output_size,
-          X.data<float>(),
-          spatial_scale_,
-          X.dim32(1),
-          X.dim32(2),
-          X.dim32(3),
-          pooled_height_,
-          pooled_width_,
-          sampling_ratio_,
-          R.data<float>(),
-          Y->mutable_data<float>());
-  return true;
-}
-
-template<>
-bool RoIAlignGradientOp<float, CUDAContext>::RunOnDevice() {
-  auto& X  = Input(0);  // Input data to pool
-  auto& R  = Input(1);  // RoIs
-  auto& dY = Input(2);  // Gradient of net w.r.t. output of "forward" op
-                        // (aka "gradOutput")
-  auto* dX = Output(0); // Gradient of net w.r.t. input to "forward" op
-                        // (aka "gradInput")
-
-  dX->ResizeLike(X);
-
-  // Must zero-out dX before accumulating gradients
-  math::Set<float, CUDAContext>(
-      dX->size(), 0.f, dX->mutable_data<float>(), &context_);
-
-  if (dY.size() > 0) {  // Handle possibly empty gradient if there were no rois
-    RoIAlignBackwardFeature<float>
-        <<<CAFFE_GET_BLOCKS(dY.size()),
-           CAFFE_CUDA_NUM_THREADS,
-           0,
-           context_.cuda_stream()>>>(
-            dY.size(),
-            dY.data<float>(),
-            R.dim32(0),
-            spatial_scale_,
-            X.dim32(1),
-            X.dim32(2),
-            X.dim32(3),
-            pooled_height_,
-            pooled_width_,
-            sampling_ratio_,
-            dX->mutable_data<float>(),
-            R.data<float>());
-  }
-  return true;
-}
-
-
-REGISTER_CUDA_OPERATOR(RoIAlign,
-                       RoIAlignOp<float, CUDAContext>);
-REGISTER_CUDA_OPERATOR(RoIAlignGradient,
-                       RoIAlignGradientOp<float, CUDAContext>);
-} // namespace caffe2
diff --git a/modules/detectron/roi_align_op.h b/modules/detectron/roi_align_op.h
deleted file mode 100644
index 8283d4b242..0000000000
--- a/modules/detectron/roi_align_op.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/**
- * Copyright (c) 2016-present, Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ROI_ALIGN_OP_H_
-#define ROI_ALIGN_OP_H_
-
-#include "caffe2/core/context.h"
-#include "caffe2/core/logging.h"
-#include "caffe2/core/operator.h"
-#include "caffe2/utils/math.h"
-
-namespace caffe2 {
-
-template <typename T, class Context>
-class RoIAlignOp final : public Operator<Context> {
- public:
-  RoIAlignOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<Context>(operator_def, ws),
-        spatial_scale_(
-            OperatorBase::GetSingleArgument<float>("spatial_scale", 1.)),
-        pooled_height_(OperatorBase::GetSingleArgument<int>("pooled_h", 1)),
-        pooled_width_(OperatorBase::GetSingleArgument<int>("pooled_w", 1)),
-        sampling_ratio_(
-            OperatorBase::GetSingleArgument<int>("sampling_ratio", -1)) {
-    DCHECK_GT(spatial_scale_, 0);
-    DCHECK_GT(pooled_height_, 0);
-    DCHECK_GT(pooled_width_, 0);
-    DCHECK_GE(sampling_ratio_, 0);
-  }
-  USE_OPERATOR_CONTEXT_FUNCTIONS;
-
-  bool RunOnDevice() override {
-    // No CPU implementation for now
-    CAFFE_NOT_IMPLEMENTED;
-  }
-
- protected:
-  float spatial_scale_;
-  int pooled_height_;
-  int pooled_width_;
-  int sampling_ratio_;
-};
-
-template <typename T, class Context>
-class RoIAlignGradientOp final : public Operator<Context> {
- public:
-  RoIAlignGradientOp(const OperatorDef& def, Workspace* ws)
-      : Operator<Context>(def, ws),
-        spatial_scale_(
-            OperatorBase::GetSingleArgument<float>("spatial_scale", 1.)),
-        pooled_height_(OperatorBase::GetSingleArgument<int>("pooled_h", 1)),
-        pooled_width_(OperatorBase::GetSingleArgument<int>("pooled_w", 1)),
-        sampling_ratio_(
-            OperatorBase::GetSingleArgument<int>("sampling_ratio", -1)) {
-    DCHECK_GT(spatial_scale_, 0);
-    DCHECK_GT(pooled_height_, 0);
-    DCHECK_GT(pooled_width_, 0);
-    DCHECK_GE(sampling_ratio_, 0);
-  }
-  USE_OPERATOR_CONTEXT_FUNCTIONS;
-
-  bool RunOnDevice() override {
-    // No CPU implementation for now
-    CAFFE_NOT_IMPLEMENTED;
-  }
-
- protected:
-  float spatial_scale_;
-  int pooled_height_;
-  int pooled_width_;
-  int sampling_ratio_;
-};
-
-} // namespace caffe2
-
-#endif // ROI_ALIGN_OP_H_
author	Yanghan Wang <yanghan@instagram.com>	2018-01-24 12:47:39 -0800
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	2018-01-24 13:05:27 -0800
commit	2828c7a391b209302a318b3817cfd6cd85b05cf4 (patch)
tree	1bdd43b92e64f0c5fcb0ebc997137c8db8e70648 /modules/detectron
parent	09a1ef54ab4e08ffcdd852bfe63a09daf013e624 (diff)
download	pytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.tar.gz pytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.tar.bz2 pytorch-2828c7a391b209302a318b3817cfd6cd85b05cf4.zip