summaryrefslogtreecommitdiff
path: root/aten
diff options
context:
space:
mode:
authorWill Feng <willfeng@fb.com>2019-01-24 14:29:06 -0800
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-01-24 14:33:03 -0800
commit2a70f24cce3a5c834082bd8b6efe64a17dc7df12 (patch)
treefe09672645b09975374d7d2b00b43de908c736fa /aten
parentf0dd85d1410b2407bf5198e8cc2f628691c7c2e6 (diff)
downloadpytorch-2a70f24cce3a5c834082bd8b6efe64a17dc7df12.tar.gz
pytorch-2a70f24cce3a5c834082bd8b6efe64a17dc7df12.tar.bz2
pytorch-2a70f24cce3a5c834082bd8b6efe64a17dc7df12.zip
Add thread-local guard: at::AutoNonVariableTypeMode (#15939)
Summary: This PR adds thread-local guard (`at::AutoNonVariableTypeMode`) to make sure that in VariableType.cpp the operations on baseType still dispatch to non-Variable type, even if the parameters will become Variables after the Tensor/Variable merge. We achieve this by making `legacyTensorType()` and `getType()` check the `at::AutoNonVariableTypeMode` guard to decide whether to return non-Variable type for a variable. This is part of the VariableImpl/TensorImpl merge work: https://github.com/pytorch/pytorch/issues/13638. Pull Request resolved: https://github.com/pytorch/pytorch/pull/15939 Reviewed By: ezyang Differential Revision: D13640980 Pulled By: yf225 fbshipit-source-id: d12c2543822958558d7d70d36c50999a5eb8783f
Diffstat (limited to 'aten')
-rw-r--r--aten/src/ATen/Context.cpp5
-rw-r--r--aten/src/ATen/SparseTensorImpl.cpp2
-rw-r--r--aten/src/ATen/SparseTensorUtils.h2
-rw-r--r--aten/src/ATen/Utils.h4
-rw-r--r--aten/src/ATen/core/LegacyTypeDispatch.cpp48
-rw-r--r--aten/src/ATen/core/LegacyTypeDispatch.h29
-rw-r--r--aten/src/ATen/native/TensorFactories.cpp2
-rw-r--r--aten/src/ATen/native/cuda/TensorFactories.cu2
-rw-r--r--aten/src/ATen/native/sparse/SparseTensor.cpp4
9 files changed, 88 insertions, 10 deletions
diff --git a/aten/src/ATen/Context.cpp b/aten/src/ATen/Context.cpp
index b05475699c..45ebade245 100644
--- a/aten/src/ATen/Context.cpp
+++ b/aten/src/ATen/Context.cpp
@@ -101,10 +101,13 @@ TypeExtendedInterface& getType(TensorOptions options) {
options.backend(), typeMetaToScalarType(options.dtype()), options.is_variable());
}
+// NOTE: We also check `at::NonVariableTypeMode`, and if it's enabled we always
+// return non-Variable type in this function.
+// See NOTE [ Treating Variables as non-Variables in type dispatch ]
TypeExtendedInterface& getType(const TensorImpl* impl) {
Backend backend = tensorTypeIdToBackend(impl->type_id());
return globalContext().getType(
- backend, typeMetaToScalarType(impl->dtype()), impl->is_variable());
+ backend, typeMetaToScalarType(impl->dtype()), impl->is_variable() && !at::NonVariableTypeMode::is_enabled());
}
TypeExtendedInterface& getType(const Tensor& t) {
diff --git a/aten/src/ATen/SparseTensorImpl.cpp b/aten/src/ATen/SparseTensorImpl.cpp
index 779ac5d28f..2d4cb24c8d 100644
--- a/aten/src/ATen/SparseTensorImpl.cpp
+++ b/aten/src/ATen/SparseTensorImpl.cpp
@@ -80,7 +80,7 @@ int64_t SparseTensorImpl::storage_offset() const {
}
void SparseTensorImpl::set_indices_and_values_unsafe(const Tensor& indices, const Tensor& values) {
AT_CHECK(allow_tensor_metadata_change(), "set_indices_and_values_unsafe is not allowed on Tensor created from .data or .detach()");
- AT_ASSERT(!indices.is_variable() && !values.is_variable()); // They should be plain tensors!
+ AT_ASSERT(!indices.is_variable() && !values.is_variable()); // They should be plain tensors! // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged
AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout());
AT_CHECK(!values.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values.layout());
diff --git a/aten/src/ATen/SparseTensorUtils.h b/aten/src/ATen/SparseTensorUtils.h
index c8fb9e0bac..5aeebd3cd6 100644
--- a/aten/src/ATen/SparseTensorUtils.h
+++ b/aten/src/ATen/SparseTensorUtils.h
@@ -17,7 +17,7 @@ using SparseType = Type;
//
// This may be called repeatedly, so make sure it's pretty cheap.
inline SparseTensorImpl* get_sparse_impl(const SparseTensor& self) {
- AT_ASSERTM(!self.is_variable(), "_internal_get_SparseTensorImpl: should not be a variable");
+ AT_ASSERTM(!self.is_variable(), "_internal_get_SparseTensorImpl: should not be a variable"); // TODO: remove this when Variable and Tensor are merged
AT_ASSERTM(self.is_sparse(), "_internal_get_SparseTensorImpl: not a sparse tensor");
return static_cast<SparseTensorImpl*>(self.unsafeGetTensorImpl());
}
diff --git a/aten/src/ATen/Utils.h b/aten/src/ATen/Utils.h
index 67201e1de3..ec7e009072 100644
--- a/aten/src/ATen/Utils.h
+++ b/aten/src/ATen/Utils.h
@@ -73,7 +73,7 @@ static inline TensorImpl* checked_tensor_unwrap(const Tensor& expr, const char *
AT_ERROR("Expected object of scalar type ", scalar_type, " but got scalar type ", expr.scalar_type(),
" for argument #", pos, " '", name, "'");
}
- if (expr.is_variable()) {
+ if (expr.is_variable()) { // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged
AT_ERROR("Expected Tensor (not Variable) for argument #", pos, " '", name, "'");
}
return expr.unsafeGetTensorImpl();
@@ -93,7 +93,7 @@ static inline std::vector<TensorImpl*> checked_tensor_list_unwrap(ArrayRef<Tenso
AT_ERROR("Expected object of scalar type ", scalar_type, " but got scalar type ", expr.scalar_type(),
" for sequence element ", i , " in sequence argument at position #", pos, " '", name, "'");
}
- if (expr.is_variable()) {
+ if (expr.is_variable()) { // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged
AT_ERROR("Expected Tensor (not Variable) for sequence element ",
i , " in sequence argument at position #", pos, " '", name, "'");
}
diff --git a/aten/src/ATen/core/LegacyTypeDispatch.cpp b/aten/src/ATen/core/LegacyTypeDispatch.cpp
index f20062dbd3..c1bf2afdca 100644
--- a/aten/src/ATen/core/LegacyTypeDispatch.cpp
+++ b/aten/src/ATen/core/LegacyTypeDispatch.cpp
@@ -2,6 +2,54 @@
namespace at {
+/// NOTE [ Treating Variables as non-Variables in type dispatch ]
+///
+/// Previously, in VariableType_*.cpp (generated by gen_variable_type.py), when
+/// a function is using the 'use_derived' strategy, we call its implementation
+/// on the base non-Variable type (`baseType`), passing unwrapped tensors to the
+/// call so that any `.type()` calls in the implementation can treat the passed
+/// tensors as non-Variables and won't dispatch back to functions in VariableType.
+///
+/// However, after the Variable/Tensor merge, there is no concept of unwrapping
+/// a tensor anymore, and directly passing variables to the base type calls will
+/// cause the `.type()` dispatch in the implementation to treat the tensor as a
+/// variable, and any function dispatch based on `.type()` will dispatch back to
+/// VariableType, which is not what we want.
+///
+/// The solution to the above problem is to add `at::NonVariableTypeMode`, which
+/// when enabled will cause `legacyTensorType()` and `getType()` to always return
+/// non-Variable type, even if the tensor being called on is a variable.
+///
+/// TODO: Since `torch::NoGradGuard` serves the same purpose in libtorch, we should
+/// merge these two thread-local guards.
+
+/// In the CAFFE2_FB_LIMITED_MOBILE_CAPABILITY build setting,
+/// thread_local is not supported. In that case, we don't provide
+/// `at::NonVariableTypeMode`.
+#if !C10_MOBILE && !defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
+
+thread_local bool NonVariableTypeMode_enabled = false;
+
+bool NonVariableTypeMode::is_enabled() {
+ return NonVariableTypeMode_enabled;
+}
+
+void NonVariableTypeMode::set_enabled(bool enabled) {
+ NonVariableTypeMode_enabled = enabled;
+}
+
+#else // C10_MOBILE || defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
+
+bool NonVariableTypeMode::is_enabled() {
+ throw std::runtime_error("NonVariableTypeMode is not supported on mobile");
+}
+
+void NonVariableTypeMode::set_enabled(bool enabled) {
+ throw std::runtime_error("NonVariableTypeMode is not supported on mobile");
+}
+
+#endif
+
// TODO: This could be bad juju if someone calls globalContext() in the
// destructor of an object with static lifetime.
LegacyTypeDispatch & globalLegacyTypeDispatch() {
diff --git a/aten/src/ATen/core/LegacyTypeDispatch.h b/aten/src/ATen/core/LegacyTypeDispatch.h
index 0234b965f8..9c9d44e84f 100644
--- a/aten/src/ATen/core/LegacyTypeDispatch.h
+++ b/aten/src/ATen/core/LegacyTypeDispatch.h
@@ -140,18 +140,45 @@ private:
CAFFE2_API LegacyTypeDispatch& globalLegacyTypeDispatch();
+struct CAFFE2_API NonVariableTypeMode {
+ static bool is_enabled();
+ static void set_enabled(bool enabled);
+};
+
+// A RAII, thread local (!) guard that has the following effect:
+//
+// Upon construction: sets NonVariableTypeMode_enabled for the current thread to
+// control whether we are in non-Variable-type mode.
+//
+// Upon destruction: sets NonVariableTypeMode_enabled back to the original value.
+//
+// See NOTE [ Treating Variables as non-Variables in type dispatch ] for details.
+struct CAFFE2_API AutoNonVariableTypeMode {
+ AutoNonVariableTypeMode(bool enabled) : prev_mode(NonVariableTypeMode::is_enabled()) {
+ NonVariableTypeMode::set_enabled(enabled);
+ }
+ ~AutoNonVariableTypeMode() {
+ NonVariableTypeMode::set_enabled(prev_mode);
+ }
+ bool prev_mode;
+};
+
/**
* Return the Type object corresponding to this Tensor, which we can
* use to do dynamic dispatch to operators from. This method is NOT
* intended to be used by end-users; it is purely an implementation
* detail.
+ *
+ * NOTE: We also check `at::NonVariableTypeMode`, and if it's enabled
+ * we always return non-Variable type in this function.
+ * See NOTE [ Treating Variables as non-Variables in type dispatch ]
*/
inline Type& legacyTensorType(const TensorImpl& tensor) {
// NB: It's valid to use getTypeRaw here, because the TensorImpl
// could not have been created without initializing the Type first.
// TODO: This is not actually true via the Caffe2 codepath! Make
// it so.
- return *globalLegacyTypeDispatch().getTypeRaw(tensorTypeIdToBackend(tensor.type_id()), typeMetaToScalarType(tensor.dtype()), tensor.is_variable());
+ return *globalLegacyTypeDispatch().getTypeRaw(tensorTypeIdToBackend(tensor.type_id()), typeMetaToScalarType(tensor.dtype()), tensor.is_variable() && !at::NonVariableTypeMode::is_enabled());
}
} // namespace at
diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp
index a2ca7bd21c..9a4684f4bc 100644
--- a/aten/src/ATen/native/TensorFactories.cpp
+++ b/aten/src/ATen/native/TensorFactories.cpp
@@ -90,7 +90,7 @@ Tensor _dim_arange(const Tensor& like, int64_t dim) {
Tensor empty_cpu(IntList size, const TensorOptions& options) {
AT_ASSERT(options.backend() == Backend::CPU);
- AT_ASSERT(!options.is_variable()); // is_variable should have been 'unpacked'
+ AT_ASSERT(!options.is_variable()); // is_variable should have been 'unpacked' // TODO: remove this when Variable and Tensor are merged
auto* allocator = at::getCPUAllocator();
int64_t nelements = prod_intlist(size);
diff --git a/aten/src/ATen/native/cuda/TensorFactories.cu b/aten/src/ATen/native/cuda/TensorFactories.cu
index 6413277dda..b809948554 100644
--- a/aten/src/ATen/native/cuda/TensorFactories.cu
+++ b/aten/src/ATen/native/cuda/TensorFactories.cu
@@ -45,7 +45,7 @@ Tensor& eye_out_cuda(Tensor& result, int64_t n, int64_t m) {
Tensor empty_cuda(IntList size, const TensorOptions& options) {
AT_ASSERT(options.backend() == at::Backend::CUDA);
- AT_ASSERT(!options.is_variable()); // is_variable should have been 'unpacked'
+ AT_ASSERT(!options.is_variable()); // is_variable should have been 'unpacked' // TODO: remove this when Variable and Tensor are merged
auto* allocator = at::cuda::getCUDADeviceAllocator();
int64_t nelements = prod_intlist(size);
diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp
index ce69e5c219..216c581cb8 100644
--- a/aten/src/ATen/native/sparse/SparseTensor.cpp
+++ b/aten/src/ATen/native/sparse/SparseTensor.cpp
@@ -70,7 +70,7 @@ Tensor values_sparse(const Tensor& self) {
/*** Helper methods ***/
SparseTensor new_sparse(const TensorOptions& options) {
- AT_ASSERT(!options.is_variable());
+ AT_ASSERT(!options.is_variable()); // TODO: remove this when Variable and Tensor are merged
AT_ASSERT(options.layout() == kSparse);
TensorTypeId type_id;
if (options.device().is_cuda()) {
@@ -329,7 +329,7 @@ SparseTensor& copy_sparse_(SparseTensor& self, const SparseTensor& src, bool non
SparseTensor coalesce_sparse_cpu(const SparseTensor& self) {
AT_ASSERT(self.defined());
- AT_ASSERT(!self.is_variable());
+ AT_ASSERT(!self.is_variable()); // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged
AT_ASSERT(self.is_sparse());
if (self.is_coalesced()) {