Add filler for SparseLengthsWeightedSum (#13949)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/13949 This diff adds support to fillers for `SparseLengthsWeight*` ops. It does 3 things: 1. Add the fillers for `SparseLengthsWeight*` ops 2. Add filling heuristics to consider the path of `LengthsRangeFill` -> `Gather` -> `SparseLengthsWeightedSum`, where the length input is shared by `LengthsRangeFill` and `SparseLengthsWeightedSum`. Therefore, we need to carefully bound the value of that length input so that at `Gather`, it does not index out-of-bound for the weight input of `Gather`. 3. Fix and simplify the logic of `math::RandFixedSum`, where we just keep rejecting the generated value if it violates the invariants. Reviewed By: highker Differential Revision: D13048216 fbshipit-source-id: bfe402e07e6421b28548047d18b298c148e0ec87
author: Yinghai Lu <yinghai@fb.com> 2018-11-16 11:27:45 -0800
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> 2018-11-16 11:31:05 -0800
commit: 7c053b7e643ce1936271a6fed299c31982bf1272 (patch)
tree: 0cf5b7c38170c5136d18cff5174f25bb8ddbde18 /caffe2/utils
parent: 3c7b575a141d4eb3a4a83d3bdcfd8f2739c0efcc (diff)
download: pytorch-7c053b7e643ce1936271a6fed299c31982bf1272.tar.gz
pytorch-7c053b7e643ce1936271a6fed299c31982bf1272.tar.bz2
pytorch-7c053b7e643ce1936271a6fed299c31982bf1272.zip
3 files changed, 56 insertions, 33 deletions
diff --git a/caffe2/utils/filler.h b/caffe2/utils/filler.h
index c915bd37e0..926a72f235 100644
--- a/caffe2/utils/filler.h
+++ b/caffe2/utils/filler.h
@@ -77,10 +77,13 @@ class TensorFiller {
     return *this;
   }
 
-  // a helper function to construct the lengths vector for sparse features
+  // A helper function to construct the lengths vector for sparse features
+  // We try to pad least one index per batch unless the total_length is 0
   template <class Type>
   TensorFiller& SparseLengths(Type total_length) {
-    return FixedSum(total_length).Min(0).Max(total_length);
+    return FixedSum(total_length)
+        .Min(std::min(static_cast<Type>(1), total_length))
+        .Max(total_length);
   }
 
   // a helper function to construct the segments vector for sparse features
diff --git a/caffe2/utils/math_cpu.cc b/caffe2/utils/math_cpu.cc
index 397fb43241..b6dda5933b 100644
--- a/caffe2/utils/math_cpu.cc
+++ b/caffe2/utils/math_cpu.cc
@@ -2321,37 +2321,42 @@ CAFFE2_RAND_UNIFORM_INT(uint64_t);
 // to between a and b.
 // The algorithm is non-trivial given the adjustment would be different towards
 // each value.
-#define CAFFE2_RAND_FIXED_SUM(T)                                        \
-  template <>                                                           \
-  C10_EXPORT void RandFixedSum<T, CPUContext>(                          \
-      const size_t n,                                                   \
-      const T a,                                                        \
-      const T b,                                                        \
-      const T sum,                                                      \
-      T* r,                                                             \
-      CPUContext* context) {                                            \
-    CAFFE_ENFORCE_GE(a, 0);                                             \
-    CAFFE_ENFORCE_GE(sum / (double)n, a);                               \
-    CAFFE_ENFORCE_LE(sum / (double)n, b);                               \
-    T current_sum = 0;                                                  \
-    for (size_t i = 0; i < n - 1; ++i) {                                \
-      auto remaining_numbers = n - 1 - i;                               \
-      double mean = (sum - current_sum) / remaining_numbers;            \
-      double stdev = std::min(mean - a, b - mean);                      \
-      std::normal_distribution<double> distribution{mean, stdev / 4.0}; \
-      T value = distribution(context->RandGenerator());                 \
-      auto remaining_sum = sum - current_sum - value;                   \
-      if (value < a || remaining_sum > b * remaining_numbers) {         \
-        value = a;                                                      \
-      } else if (value > b || remaining_sum < a * remaining_numbers) {  \
-        value = b;                                                      \
-      }                                                                 \
-      r[i] = value;                                                     \
-      CAFFE_ENFORCE(a <= value && value <= b);                          \
-      current_sum += value;                                             \
-    }                                                                   \
-    r[n - 1] = sum - current_sum;                                       \
-    CAFFE_ENFORCE(a <= r[n - 1] && r[n - 1] <= b);                      \
+#define CAFFE2_RAND_FIXED_SUM(T)                                          \
+  template <>                                                             \
+  C10_EXPORT void RandFixedSum<T, CPUContext>(                            \
+      const size_t n,                                                     \
+      const T a,                                                          \
+      const T b,                                                          \
+      const T sum,                                                        \
+      T* r,                                                               \
+      CPUContext* context) {                                              \
+    CAFFE_ENFORCE_GE(a, 0);                                               \
+    CAFFE_ENFORCE_GE(sum / (double)n, a);                                 \
+    CAFFE_ENFORCE_LE(sum / (double)n, b);                                 \
+    T current_sum = 0;                                                    \
+    T remaining_sum = sum;                                                \
+    for (size_t i = 0; i < n; ++i) {                                      \
+      auto remaining_numbers = n - 1 - i;                                 \
+      double mean = (sum - current_sum) / (remaining_numbers + 1);        \
+      double stdev = std::min(mean - a, b - mean);                        \
+      std::normal_distribution<double> distribution{mean, stdev / 4.0};   \
+      T value, remaining_sum_test;                                        \
+      do {                                                                \
+        value = distribution(context->RandGenerator());                   \
+        remaining_sum_test = remaining_sum - value;                       \
+      } while (value < a || remaining_sum_test < a * remaining_numbers || \
+               value > b || remaining_sum_test > b * remaining_numbers);  \
+      r[i] = value;                                                       \
+      CAFFE_ENFORCE(a <= value && value <= b);                            \
+      current_sum += value;                                               \
+      remaining_sum -= value;                                             \
+      CAFFE_ENFORCE_GE(remaining_sum, a* remaining_numbers);              \
+      CAFFE_ENFORCE_LE(remaining_sum, b* remaining_numbers);              \
+    }                                                                     \
+    r[n - 1] += remaining_sum;                                            \
+    current_sum += remaining_sum;                                         \
+    CAFFE_ENFORCE(a <= r[n - 1] && r[n - 1] <= b);                        \
+    CAFFE_ENFORCE_EQ(current_sum, sum);                                   \
   }
 CAFFE2_RAND_FIXED_SUM(float);
 CAFFE2_RAND_FIXED_SUM(double);
diff --git a/caffe2/utils/math_test.cc b/caffe2/utils/math_test.cc
index 85efaf6aa5..c527b924c7 100644
--- a/caffe2/utils/math_test.cc
+++ b/caffe2/utils/math_test.cc
@@ -716,6 +716,21 @@ TEST_F(BroadcastTest, BroadcastFloatTest) {
       {1.0f, 1.0f, 2.0f, 2.0f, 1.0f, 1.0f, 2.0f, 2.0f});
 }
 
+class RandFixedSumTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    cpu_context_ = make_unique<CPUContext>(option_);
+  }
+  DeviceOption option_;
+  std::unique_ptr<CPUContext> cpu_context_;
+};
+
+TEST_F(RandFixedSumTest, UpperBound) {
+  std::vector<int> l(20);
+  math::RandFixedSum<int, CPUContext>(
+      20, 1, 1000, 1000, l.data(), cpu_context_.get());
+}
+
 class MomentsTest : public testing::Test {
  protected:
   void SetUp() override {
author	Yinghai Lu <yinghai@fb.com>	2018-11-16 11:27:45 -0800
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	2018-11-16 11:31:05 -0800
commit	7c053b7e643ce1936271a6fed299c31982bf1272 (patch)
tree	0cf5b7c38170c5136d18cff5174f25bb8ddbde18 /caffe2/utils
parent	3c7b575a141d4eb3a4a83d3bdcfd8f2739c0efcc (diff)
download	pytorch-7c053b7e643ce1936271a6fed299c31982bf1272.tar.gz pytorch-7c053b7e643ce1936271a6fed299c31982bf1272.tar.bz2 pytorch-7c053b7e643ce1936271a6fed299c31982bf1272.zip