summaryrefslogtreecommitdiff
path: root/test/cpp
diff options
context:
space:
mode:
authorAdam Paszke <adam.paszke@gmail.com>2018-12-04 00:13:24 -0800
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-12-04 00:16:21 -0800
commit8812a5d42e7be858334a4f5bd6acc7e6e942815f (patch)
tree841dbbe407584a0c6d18571ea19edee45e7adfda /test/cpp
parent862b8cae51321a16936d87567e8a455186ca2d85 (diff)
downloadpytorch-8812a5d42e7be858334a4f5bd6acc7e6e942815f.tar.gz
pytorch-8812a5d42e7be858334a4f5bd6acc7e6e942815f.tar.bz2
pytorch-8812a5d42e7be858334a4f5bd6acc7e6e942815f.zip
Reduce broadcasted inputs in derivative code (#14485)
Summary: Previously symbolic AD formulas assumed that no broadcasting happened, and would return gradients of incorrect shapes (possibly leading to silent errors later). Fixes a few bugs (known and unknown): - #11736 - ArgumentSpec didn't compute the input types correctly [(it didn't advance the offset for non-tensor args)](https://github.com/pytorch/pytorch/pull/14485/files#diff-4fd3157a056596aefb8cdf41022a208bR153) - Symbolic AD could suffer from use after free (dangling pointers in grad map), because [`EliminateDeadCode` could have removed nodes](https://github.com/pytorch/pytorch/pull/14485/files#diff-25d33ad1ed6855684dec79d927ca6142L781) that referenced gradients of certain values. - Undefined behavior in `aten::size` During my tests I've also found a few new problems, and I have opened issues for them: - FusionGroup seems to think that cat nodes broadcast their inputs (#14483) - `prim::ConstantChunk` derivative formula doesn't handle undefined inputs (#14484) This patch unfortunately deoptimizes some of our code (Fusion doesn't happen past chunk nodes, and outputs more tensors only because we have to get their size). I know how to fix those issues, but wanted to fix this terrible bug quickly. cc zou3519 zdevito ngimel Pull Request resolved: https://github.com/pytorch/pytorch/pull/14485 Reviewed By: eellison Differential Revision: D13312888 Pulled By: suo fbshipit-source-id: ad46bfb4d0a306ad9451002f8270f7a790f72d58
Diffstat (limited to 'test/cpp')
-rw-r--r--test/cpp/jit/tests.h45
1 files changed, 22 insertions, 23 deletions
diff --git a/test/cpp/jit/tests.h b/test/cpp/jit/tests.h
index 2a4edc1794..d728a65e9b 100644
--- a/test/cpp/jit/tests.h
+++ b/test/cpp/jit/tests.h
@@ -439,38 +439,38 @@ std::shared_ptr<Graph> build_lstm() {
return r;
}
-void run(InterpreterState & interp, const std::vector<at::Tensor> & inputs, std::vector<at::Tensor> & outputs) {
+std::vector<at::Tensor> run(InterpreterState & interp, const std::vector<at::Tensor> & inputs) {
std::vector<IValue> stack(inputs.begin(), inputs.end());
interp.run(stack);
- outputs.clear();
- for (auto& ivalue : stack) {
- outputs.push_back(std::move(ivalue).toTensor());
- }
+ return fmap(stack, [](const IValue& i) { return i.toTensor(); });
}
std::pair<tensor_list, tensor_list> runGradient(
Gradient& grad_spec,
tensor_list& tensors_in,
tensor_list& tensor_grads_in) {
- tensor_list tensors_out, tensor_grads_out;
+ static const auto as_tensorlist = [](const Stack& stack) {
+ return fmap(stack, [](const IValue& i) { return i.toTensor(); });
+ };
Code f_code{grad_spec.f}, df_code{grad_spec.df};
InterpreterState f_interpreter{f_code}, df_interpreter{df_code};
- run(f_interpreter, tensors_in, tensors_out);
+ auto f_stack = fmap<IValue>(tensors_in);
+ f_interpreter.run(f_stack);
- tensor_list df_inputs;
- df_inputs.insert(
- df_inputs.end(), tensor_grads_in.begin(), tensor_grads_in.end());
+ Stack df_stack;
+ df_stack.insert(
+ df_stack.end(), tensor_grads_in.begin(), tensor_grads_in.end());
for (auto offset : grad_spec.df_input_captured_inputs)
- df_inputs.push_back(tensors_in[offset]);
+ df_stack.push_back(tensors_in[offset]);
for (auto offset : grad_spec.df_input_captured_outputs)
- df_inputs.push_back(tensors_out[offset]);
- run(df_interpreter, df_inputs, tensor_grads_out);
+ df_stack.push_back(f_stack[offset]);
+ df_interpreter.run(df_stack);
// Outputs of f needs to be sliced
- tensors_out.erase(
- tensors_out.begin() + grad_spec.f_real_outputs, tensors_out.end());
- return std::make_pair(tensors_out, tensor_grads_out);
+ f_stack.erase(
+ f_stack.begin() + grad_spec.f_real_outputs, f_stack.end());
+ return std::make_pair(as_tensorlist(f_stack), as_tensorlist(df_stack));
}
void assertAllClose(const tensor_list& a, const tensor_list& b) {
@@ -496,9 +496,8 @@ void testInterp() {
auto lstm_g = build_lstm();
Code lstm_function(lstm_g);
- std::vector<at::Tensor> outputs;
InterpreterState lstm_interp(lstm_function);
- run(lstm_interp, {input[0], hx, cx, w_ih, w_hh}, outputs);
+ auto outputs = run(lstm_interp, {input[0], hx, cx, w_ih, w_hh});
std::tie(hx, cx) = lstm(input[0], hx, cx, w_ih, w_hh);
// std::cout << almostEqual(outputs[0],hx) << "\n";
@@ -836,8 +835,8 @@ void testDifferentiate(std::ostream& out = std::cout) {
auto grad_spec = differentiate(graph);
std::vector<size_t> expected_captured_inputs = {0, 1};
- std::vector<size_t> expected_captured_outputs = {1};
- std::vector<size_t> expected_input_vjps = {0, 1};
+ std::vector<size_t> expected_captured_outputs = {1, 2, 3, 4, 5, 6, 7};
+ std::vector<size_t> expected_input_vjps = {0, 3};
std::vector<size_t> expected_output_vjps = {0, 1};
ASSERT_EQ(grad_spec.f_real_outputs, 1);
ASSERT_EQ(grad_spec.df_input_captured_inputs, expected_captured_inputs);
@@ -867,11 +866,11 @@ void testDifferentiateWithRequiresGrad(std::ostream& out = std::cout) {
PropagateRequiresGrad(graph);
auto grad_spec = differentiate(graph);
- std::vector<size_t> expected_input_vjps = {1, 2}; // for e and %4 = (d + a)
+ std::vector<size_t> expected_input_vjps = {1, 4}; // for e and %4 = (d + a)
std::vector<size_t> expected_output_vjps = {0}; // only a requires grad
- ASSERT_EQ(grad_spec.f_real_outputs, 2); // we need one temporary %4 = (d + a)
+ ASSERT_EQ(grad_spec.f_real_outputs, 2);
ASSERT_EQ(grad_spec.df_input_captured_inputs, std::vector<size_t>({0}));
- ASSERT_EQ(grad_spec.df_input_captured_outputs, std::vector<size_t>({2}));
+ ASSERT_EQ(grad_spec.df_input_captured_outputs, std::vector<size_t>({2, 3, 4, 5, 6, 7, 8}));
ASSERT_EQ(grad_spec.df_input_vjps, expected_input_vjps);
ASSERT_EQ(grad_spec.df_output_vjps, expected_output_vjps);
out << "testDifferentiateWithRequiresGrad\n";