summaryrefslogtreecommitdiff
path: root/caffe2/utils
diff options
context:
space:
mode:
authorYinghai Lu <yinghai@fb.com>2018-11-16 11:27:45 -0800
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-11-16 11:31:05 -0800
commit7c053b7e643ce1936271a6fed299c31982bf1272 (patch)
tree0cf5b7c38170c5136d18cff5174f25bb8ddbde18 /caffe2/utils
parent3c7b575a141d4eb3a4a83d3bdcfd8f2739c0efcc (diff)
downloadpytorch-7c053b7e643ce1936271a6fed299c31982bf1272.tar.gz
pytorch-7c053b7e643ce1936271a6fed299c31982bf1272.tar.bz2
pytorch-7c053b7e643ce1936271a6fed299c31982bf1272.zip
Add filler for SparseLengthsWeightedSum (#13949)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/13949 This diff adds support to fillers for `SparseLengthsWeight*` ops. It does 3 things: 1. Add the fillers for `SparseLengthsWeight*` ops 2. Add filling heuristics to consider the path of `LengthsRangeFill` -> `Gather` -> `SparseLengthsWeightedSum`, where the length input is shared by `LengthsRangeFill` and `SparseLengthsWeightedSum`. Therefore, we need to carefully bound the value of that length input so that at `Gather`, it does not index out-of-bound for the weight input of `Gather`. 3. Fix and simplify the logic of `math::RandFixedSum`, where we just keep rejecting the generated value if it violates the invariants. Reviewed By: highker Differential Revision: D13048216 fbshipit-source-id: bfe402e07e6421b28548047d18b298c148e0ec87
Diffstat (limited to 'caffe2/utils')
-rw-r--r--caffe2/utils/filler.h7
-rw-r--r--caffe2/utils/math_cpu.cc67
-rw-r--r--caffe2/utils/math_test.cc15
3 files changed, 56 insertions, 33 deletions
diff --git a/caffe2/utils/filler.h b/caffe2/utils/filler.h
index c915bd37e0..926a72f235 100644
--- a/caffe2/utils/filler.h
+++ b/caffe2/utils/filler.h
@@ -77,10 +77,13 @@ class TensorFiller {
return *this;
}
- // a helper function to construct the lengths vector for sparse features
+ // A helper function to construct the lengths vector for sparse features
+ // We try to pad least one index per batch unless the total_length is 0
template <class Type>
TensorFiller& SparseLengths(Type total_length) {
- return FixedSum(total_length).Min(0).Max(total_length);
+ return FixedSum(total_length)
+ .Min(std::min(static_cast<Type>(1), total_length))
+ .Max(total_length);
}
// a helper function to construct the segments vector for sparse features
diff --git a/caffe2/utils/math_cpu.cc b/caffe2/utils/math_cpu.cc
index 397fb43241..b6dda5933b 100644
--- a/caffe2/utils/math_cpu.cc
+++ b/caffe2/utils/math_cpu.cc
@@ -2321,37 +2321,42 @@ CAFFE2_RAND_UNIFORM_INT(uint64_t);
// to between a and b.
// The algorithm is non-trivial given the adjustment would be different towards
// each value.
-#define CAFFE2_RAND_FIXED_SUM(T) \
- template <> \
- C10_EXPORT void RandFixedSum<T, CPUContext>( \
- const size_t n, \
- const T a, \
- const T b, \
- const T sum, \
- T* r, \
- CPUContext* context) { \
- CAFFE_ENFORCE_GE(a, 0); \
- CAFFE_ENFORCE_GE(sum / (double)n, a); \
- CAFFE_ENFORCE_LE(sum / (double)n, b); \
- T current_sum = 0; \
- for (size_t i = 0; i < n - 1; ++i) { \
- auto remaining_numbers = n - 1 - i; \
- double mean = (sum - current_sum) / remaining_numbers; \
- double stdev = std::min(mean - a, b - mean); \
- std::normal_distribution<double> distribution{mean, stdev / 4.0}; \
- T value = distribution(context->RandGenerator()); \
- auto remaining_sum = sum - current_sum - value; \
- if (value < a || remaining_sum > b * remaining_numbers) { \
- value = a; \
- } else if (value > b || remaining_sum < a * remaining_numbers) { \
- value = b; \
- } \
- r[i] = value; \
- CAFFE_ENFORCE(a <= value && value <= b); \
- current_sum += value; \
- } \
- r[n - 1] = sum - current_sum; \
- CAFFE_ENFORCE(a <= r[n - 1] && r[n - 1] <= b); \
+#define CAFFE2_RAND_FIXED_SUM(T) \
+ template <> \
+ C10_EXPORT void RandFixedSum<T, CPUContext>( \
+ const size_t n, \
+ const T a, \
+ const T b, \
+ const T sum, \
+ T* r, \
+ CPUContext* context) { \
+ CAFFE_ENFORCE_GE(a, 0); \
+ CAFFE_ENFORCE_GE(sum / (double)n, a); \
+ CAFFE_ENFORCE_LE(sum / (double)n, b); \
+ T current_sum = 0; \
+ T remaining_sum = sum; \
+ for (size_t i = 0; i < n; ++i) { \
+ auto remaining_numbers = n - 1 - i; \
+ double mean = (sum - current_sum) / (remaining_numbers + 1); \
+ double stdev = std::min(mean - a, b - mean); \
+ std::normal_distribution<double> distribution{mean, stdev / 4.0}; \
+ T value, remaining_sum_test; \
+ do { \
+ value = distribution(context->RandGenerator()); \
+ remaining_sum_test = remaining_sum - value; \
+ } while (value < a || remaining_sum_test < a * remaining_numbers || \
+ value > b || remaining_sum_test > b * remaining_numbers); \
+ r[i] = value; \
+ CAFFE_ENFORCE(a <= value && value <= b); \
+ current_sum += value; \
+ remaining_sum -= value; \
+ CAFFE_ENFORCE_GE(remaining_sum, a* remaining_numbers); \
+ CAFFE_ENFORCE_LE(remaining_sum, b* remaining_numbers); \
+ } \
+ r[n - 1] += remaining_sum; \
+ current_sum += remaining_sum; \
+ CAFFE_ENFORCE(a <= r[n - 1] && r[n - 1] <= b); \
+ CAFFE_ENFORCE_EQ(current_sum, sum); \
}
CAFFE2_RAND_FIXED_SUM(float);
CAFFE2_RAND_FIXED_SUM(double);
diff --git a/caffe2/utils/math_test.cc b/caffe2/utils/math_test.cc
index 85efaf6aa5..c527b924c7 100644
--- a/caffe2/utils/math_test.cc
+++ b/caffe2/utils/math_test.cc
@@ -716,6 +716,21 @@ TEST_F(BroadcastTest, BroadcastFloatTest) {
{1.0f, 1.0f, 2.0f, 2.0f, 1.0f, 1.0f, 2.0f, 2.0f});
}
+class RandFixedSumTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ cpu_context_ = make_unique<CPUContext>(option_);
+ }
+ DeviceOption option_;
+ std::unique_ptr<CPUContext> cpu_context_;
+};
+
+TEST_F(RandFixedSumTest, UpperBound) {
+ std::vector<int> l(20);
+ math::RandFixedSum<int, CPUContext>(
+ 20, 1, 1000, 1000, l.data(), cpu_context_.get());
+}
+
class MomentsTest : public testing::Test {
protected:
void SetUp() override {