diff options
author | Yinghai Lu <yinghai@fb.com> | 2018-11-16 11:27:45 -0800 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2018-11-16 11:31:05 -0800 |
commit | 7c053b7e643ce1936271a6fed299c31982bf1272 (patch) | |
tree | 0cf5b7c38170c5136d18cff5174f25bb8ddbde18 /caffe2/utils | |
parent | 3c7b575a141d4eb3a4a83d3bdcfd8f2739c0efcc (diff) | |
download | pytorch-7c053b7e643ce1936271a6fed299c31982bf1272.tar.gz pytorch-7c053b7e643ce1936271a6fed299c31982bf1272.tar.bz2 pytorch-7c053b7e643ce1936271a6fed299c31982bf1272.zip |
Add filler for SparseLengthsWeightedSum (#13949)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/13949
This diff adds support to fillers for `SparseLengthsWeight*` ops. It does 3 things:
1. Add the fillers for `SparseLengthsWeight*` ops
2. Add filling heuristics to consider the path of `LengthsRangeFill` -> `Gather` -> `SparseLengthsWeightedSum`, where the length input is shared by `LengthsRangeFill` and `SparseLengthsWeightedSum`. Therefore, we need to carefully bound the value of that length input so that at `Gather`, it does not index out-of-bound for the weight input of `Gather`.
3. Fix and simplify the logic of `math::RandFixedSum`, where we just keep rejecting the generated value if it violates the invariants.
Reviewed By: highker
Differential Revision: D13048216
fbshipit-source-id: bfe402e07e6421b28548047d18b298c148e0ec87
Diffstat (limited to 'caffe2/utils')
-rw-r--r-- | caffe2/utils/filler.h | 7 | ||||
-rw-r--r-- | caffe2/utils/math_cpu.cc | 67 | ||||
-rw-r--r-- | caffe2/utils/math_test.cc | 15 |
3 files changed, 56 insertions, 33 deletions
diff --git a/caffe2/utils/filler.h b/caffe2/utils/filler.h index c915bd37e0..926a72f235 100644 --- a/caffe2/utils/filler.h +++ b/caffe2/utils/filler.h @@ -77,10 +77,13 @@ class TensorFiller { return *this; } - // a helper function to construct the lengths vector for sparse features + // A helper function to construct the lengths vector for sparse features + // We try to pad least one index per batch unless the total_length is 0 template <class Type> TensorFiller& SparseLengths(Type total_length) { - return FixedSum(total_length).Min(0).Max(total_length); + return FixedSum(total_length) + .Min(std::min(static_cast<Type>(1), total_length)) + .Max(total_length); } // a helper function to construct the segments vector for sparse features diff --git a/caffe2/utils/math_cpu.cc b/caffe2/utils/math_cpu.cc index 397fb43241..b6dda5933b 100644 --- a/caffe2/utils/math_cpu.cc +++ b/caffe2/utils/math_cpu.cc @@ -2321,37 +2321,42 @@ CAFFE2_RAND_UNIFORM_INT(uint64_t); // to between a and b. // The algorithm is non-trivial given the adjustment would be different towards // each value. -#define CAFFE2_RAND_FIXED_SUM(T) \ - template <> \ - C10_EXPORT void RandFixedSum<T, CPUContext>( \ - const size_t n, \ - const T a, \ - const T b, \ - const T sum, \ - T* r, \ - CPUContext* context) { \ - CAFFE_ENFORCE_GE(a, 0); \ - CAFFE_ENFORCE_GE(sum / (double)n, a); \ - CAFFE_ENFORCE_LE(sum / (double)n, b); \ - T current_sum = 0; \ - for (size_t i = 0; i < n - 1; ++i) { \ - auto remaining_numbers = n - 1 - i; \ - double mean = (sum - current_sum) / remaining_numbers; \ - double stdev = std::min(mean - a, b - mean); \ - std::normal_distribution<double> distribution{mean, stdev / 4.0}; \ - T value = distribution(context->RandGenerator()); \ - auto remaining_sum = sum - current_sum - value; \ - if (value < a || remaining_sum > b * remaining_numbers) { \ - value = a; \ - } else if (value > b || remaining_sum < a * remaining_numbers) { \ - value = b; \ - } \ - r[i] = value; \ - CAFFE_ENFORCE(a <= value && value <= b); \ - current_sum += value; \ - } \ - r[n - 1] = sum - current_sum; \ - CAFFE_ENFORCE(a <= r[n - 1] && r[n - 1] <= b); \ +#define CAFFE2_RAND_FIXED_SUM(T) \ + template <> \ + C10_EXPORT void RandFixedSum<T, CPUContext>( \ + const size_t n, \ + const T a, \ + const T b, \ + const T sum, \ + T* r, \ + CPUContext* context) { \ + CAFFE_ENFORCE_GE(a, 0); \ + CAFFE_ENFORCE_GE(sum / (double)n, a); \ + CAFFE_ENFORCE_LE(sum / (double)n, b); \ + T current_sum = 0; \ + T remaining_sum = sum; \ + for (size_t i = 0; i < n; ++i) { \ + auto remaining_numbers = n - 1 - i; \ + double mean = (sum - current_sum) / (remaining_numbers + 1); \ + double stdev = std::min(mean - a, b - mean); \ + std::normal_distribution<double> distribution{mean, stdev / 4.0}; \ + T value, remaining_sum_test; \ + do { \ + value = distribution(context->RandGenerator()); \ + remaining_sum_test = remaining_sum - value; \ + } while (value < a || remaining_sum_test < a * remaining_numbers || \ + value > b || remaining_sum_test > b * remaining_numbers); \ + r[i] = value; \ + CAFFE_ENFORCE(a <= value && value <= b); \ + current_sum += value; \ + remaining_sum -= value; \ + CAFFE_ENFORCE_GE(remaining_sum, a* remaining_numbers); \ + CAFFE_ENFORCE_LE(remaining_sum, b* remaining_numbers); \ + } \ + r[n - 1] += remaining_sum; \ + current_sum += remaining_sum; \ + CAFFE_ENFORCE(a <= r[n - 1] && r[n - 1] <= b); \ + CAFFE_ENFORCE_EQ(current_sum, sum); \ } CAFFE2_RAND_FIXED_SUM(float); CAFFE2_RAND_FIXED_SUM(double); diff --git a/caffe2/utils/math_test.cc b/caffe2/utils/math_test.cc index 85efaf6aa5..c527b924c7 100644 --- a/caffe2/utils/math_test.cc +++ b/caffe2/utils/math_test.cc @@ -716,6 +716,21 @@ TEST_F(BroadcastTest, BroadcastFloatTest) { {1.0f, 1.0f, 2.0f, 2.0f, 1.0f, 1.0f, 2.0f, 2.0f}); } +class RandFixedSumTest : public testing::Test { + protected: + void SetUp() override { + cpu_context_ = make_unique<CPUContext>(option_); + } + DeviceOption option_; + std::unique_ptr<CPUContext> cpu_context_; +}; + +TEST_F(RandFixedSumTest, UpperBound) { + std::vector<int> l(20); + math::RandFixedSum<int, CPUContext>( + 20, 1, 1000, 1000, l.data(), cpu_context_.get()); +} + class MomentsTest : public testing::Test { protected: void SetUp() override { |