diff options
author | Will Feng <willfeng@fb.com> | 2019-01-24 14:29:06 -0800 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2019-01-24 14:33:03 -0800 |
commit | 2a70f24cce3a5c834082bd8b6efe64a17dc7df12 (patch) | |
tree | fe09672645b09975374d7d2b00b43de908c736fa /aten | |
parent | f0dd85d1410b2407bf5198e8cc2f628691c7c2e6 (diff) | |
download | pytorch-2a70f24cce3a5c834082bd8b6efe64a17dc7df12.tar.gz pytorch-2a70f24cce3a5c834082bd8b6efe64a17dc7df12.tar.bz2 pytorch-2a70f24cce3a5c834082bd8b6efe64a17dc7df12.zip |
Add thread-local guard: at::AutoNonVariableTypeMode (#15939)
Summary:
This PR adds thread-local guard (`at::AutoNonVariableTypeMode`) to make sure that in VariableType.cpp the operations on baseType still dispatch to non-Variable type, even if the parameters will become Variables after the Tensor/Variable merge. We achieve this by making `legacyTensorType()` and `getType()` check the `at::AutoNonVariableTypeMode` guard to decide whether to return non-Variable type for a variable.
This is part of the VariableImpl/TensorImpl merge work: https://github.com/pytorch/pytorch/issues/13638.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15939
Reviewed By: ezyang
Differential Revision: D13640980
Pulled By: yf225
fbshipit-source-id: d12c2543822958558d7d70d36c50999a5eb8783f
Diffstat (limited to 'aten')
-rw-r--r-- | aten/src/ATen/Context.cpp | 5 | ||||
-rw-r--r-- | aten/src/ATen/SparseTensorImpl.cpp | 2 | ||||
-rw-r--r-- | aten/src/ATen/SparseTensorUtils.h | 2 | ||||
-rw-r--r-- | aten/src/ATen/Utils.h | 4 | ||||
-rw-r--r-- | aten/src/ATen/core/LegacyTypeDispatch.cpp | 48 | ||||
-rw-r--r-- | aten/src/ATen/core/LegacyTypeDispatch.h | 29 | ||||
-rw-r--r-- | aten/src/ATen/native/TensorFactories.cpp | 2 | ||||
-rw-r--r-- | aten/src/ATen/native/cuda/TensorFactories.cu | 2 | ||||
-rw-r--r-- | aten/src/ATen/native/sparse/SparseTensor.cpp | 4 |
9 files changed, 88 insertions, 10 deletions
diff --git a/aten/src/ATen/Context.cpp b/aten/src/ATen/Context.cpp index b05475699c..45ebade245 100644 --- a/aten/src/ATen/Context.cpp +++ b/aten/src/ATen/Context.cpp @@ -101,10 +101,13 @@ TypeExtendedInterface& getType(TensorOptions options) { options.backend(), typeMetaToScalarType(options.dtype()), options.is_variable()); } +// NOTE: We also check `at::NonVariableTypeMode`, and if it's enabled we always +// return non-Variable type in this function. +// See NOTE [ Treating Variables as non-Variables in type dispatch ] TypeExtendedInterface& getType(const TensorImpl* impl) { Backend backend = tensorTypeIdToBackend(impl->type_id()); return globalContext().getType( - backend, typeMetaToScalarType(impl->dtype()), impl->is_variable()); + backend, typeMetaToScalarType(impl->dtype()), impl->is_variable() && !at::NonVariableTypeMode::is_enabled()); } TypeExtendedInterface& getType(const Tensor& t) { diff --git a/aten/src/ATen/SparseTensorImpl.cpp b/aten/src/ATen/SparseTensorImpl.cpp index 779ac5d28f..2d4cb24c8d 100644 --- a/aten/src/ATen/SparseTensorImpl.cpp +++ b/aten/src/ATen/SparseTensorImpl.cpp @@ -80,7 +80,7 @@ int64_t SparseTensorImpl::storage_offset() const { } void SparseTensorImpl::set_indices_and_values_unsafe(const Tensor& indices, const Tensor& values) { AT_CHECK(allow_tensor_metadata_change(), "set_indices_and_values_unsafe is not allowed on Tensor created from .data or .detach()"); - AT_ASSERT(!indices.is_variable() && !values.is_variable()); // They should be plain tensors! + AT_ASSERT(!indices.is_variable() && !values.is_variable()); // They should be plain tensors! // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); AT_CHECK(!values.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values.layout()); diff --git a/aten/src/ATen/SparseTensorUtils.h b/aten/src/ATen/SparseTensorUtils.h index c8fb9e0bac..5aeebd3cd6 100644 --- a/aten/src/ATen/SparseTensorUtils.h +++ b/aten/src/ATen/SparseTensorUtils.h @@ -17,7 +17,7 @@ using SparseType = Type; // // This may be called repeatedly, so make sure it's pretty cheap. inline SparseTensorImpl* get_sparse_impl(const SparseTensor& self) { - AT_ASSERTM(!self.is_variable(), "_internal_get_SparseTensorImpl: should not be a variable"); + AT_ASSERTM(!self.is_variable(), "_internal_get_SparseTensorImpl: should not be a variable"); // TODO: remove this when Variable and Tensor are merged AT_ASSERTM(self.is_sparse(), "_internal_get_SparseTensorImpl: not a sparse tensor"); return static_cast<SparseTensorImpl*>(self.unsafeGetTensorImpl()); } diff --git a/aten/src/ATen/Utils.h b/aten/src/ATen/Utils.h index 67201e1de3..ec7e009072 100644 --- a/aten/src/ATen/Utils.h +++ b/aten/src/ATen/Utils.h @@ -73,7 +73,7 @@ static inline TensorImpl* checked_tensor_unwrap(const Tensor& expr, const char * AT_ERROR("Expected object of scalar type ", scalar_type, " but got scalar type ", expr.scalar_type(), " for argument #", pos, " '", name, "'"); } - if (expr.is_variable()) { + if (expr.is_variable()) { // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged AT_ERROR("Expected Tensor (not Variable) for argument #", pos, " '", name, "'"); } return expr.unsafeGetTensorImpl(); @@ -93,7 +93,7 @@ static inline std::vector<TensorImpl*> checked_tensor_list_unwrap(ArrayRef<Tenso AT_ERROR("Expected object of scalar type ", scalar_type, " but got scalar type ", expr.scalar_type(), " for sequence element ", i , " in sequence argument at position #", pos, " '", name, "'"); } - if (expr.is_variable()) { + if (expr.is_variable()) { // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged AT_ERROR("Expected Tensor (not Variable) for sequence element ", i , " in sequence argument at position #", pos, " '", name, "'"); } diff --git a/aten/src/ATen/core/LegacyTypeDispatch.cpp b/aten/src/ATen/core/LegacyTypeDispatch.cpp index f20062dbd3..c1bf2afdca 100644 --- a/aten/src/ATen/core/LegacyTypeDispatch.cpp +++ b/aten/src/ATen/core/LegacyTypeDispatch.cpp @@ -2,6 +2,54 @@ namespace at { +/// NOTE [ Treating Variables as non-Variables in type dispatch ] +/// +/// Previously, in VariableType_*.cpp (generated by gen_variable_type.py), when +/// a function is using the 'use_derived' strategy, we call its implementation +/// on the base non-Variable type (`baseType`), passing unwrapped tensors to the +/// call so that any `.type()` calls in the implementation can treat the passed +/// tensors as non-Variables and won't dispatch back to functions in VariableType. +/// +/// However, after the Variable/Tensor merge, there is no concept of unwrapping +/// a tensor anymore, and directly passing variables to the base type calls will +/// cause the `.type()` dispatch in the implementation to treat the tensor as a +/// variable, and any function dispatch based on `.type()` will dispatch back to +/// VariableType, which is not what we want. +/// +/// The solution to the above problem is to add `at::NonVariableTypeMode`, which +/// when enabled will cause `legacyTensorType()` and `getType()` to always return +/// non-Variable type, even if the tensor being called on is a variable. +/// +/// TODO: Since `torch::NoGradGuard` serves the same purpose in libtorch, we should +/// merge these two thread-local guards. + +/// In the CAFFE2_FB_LIMITED_MOBILE_CAPABILITY build setting, +/// thread_local is not supported. In that case, we don't provide +/// `at::NonVariableTypeMode`. +#if !C10_MOBILE && !defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY) + +thread_local bool NonVariableTypeMode_enabled = false; + +bool NonVariableTypeMode::is_enabled() { + return NonVariableTypeMode_enabled; +} + +void NonVariableTypeMode::set_enabled(bool enabled) { + NonVariableTypeMode_enabled = enabled; +} + +#else // C10_MOBILE || defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY) + +bool NonVariableTypeMode::is_enabled() { + throw std::runtime_error("NonVariableTypeMode is not supported on mobile"); +} + +void NonVariableTypeMode::set_enabled(bool enabled) { + throw std::runtime_error("NonVariableTypeMode is not supported on mobile"); +} + +#endif + // TODO: This could be bad juju if someone calls globalContext() in the // destructor of an object with static lifetime. LegacyTypeDispatch & globalLegacyTypeDispatch() { diff --git a/aten/src/ATen/core/LegacyTypeDispatch.h b/aten/src/ATen/core/LegacyTypeDispatch.h index 0234b965f8..9c9d44e84f 100644 --- a/aten/src/ATen/core/LegacyTypeDispatch.h +++ b/aten/src/ATen/core/LegacyTypeDispatch.h @@ -140,18 +140,45 @@ private: CAFFE2_API LegacyTypeDispatch& globalLegacyTypeDispatch(); +struct CAFFE2_API NonVariableTypeMode { + static bool is_enabled(); + static void set_enabled(bool enabled); +}; + +// A RAII, thread local (!) guard that has the following effect: +// +// Upon construction: sets NonVariableTypeMode_enabled for the current thread to +// control whether we are in non-Variable-type mode. +// +// Upon destruction: sets NonVariableTypeMode_enabled back to the original value. +// +// See NOTE [ Treating Variables as non-Variables in type dispatch ] for details. +struct CAFFE2_API AutoNonVariableTypeMode { + AutoNonVariableTypeMode(bool enabled) : prev_mode(NonVariableTypeMode::is_enabled()) { + NonVariableTypeMode::set_enabled(enabled); + } + ~AutoNonVariableTypeMode() { + NonVariableTypeMode::set_enabled(prev_mode); + } + bool prev_mode; +}; + /** * Return the Type object corresponding to this Tensor, which we can * use to do dynamic dispatch to operators from. This method is NOT * intended to be used by end-users; it is purely an implementation * detail. + * + * NOTE: We also check `at::NonVariableTypeMode`, and if it's enabled + * we always return non-Variable type in this function. + * See NOTE [ Treating Variables as non-Variables in type dispatch ] */ inline Type& legacyTensorType(const TensorImpl& tensor) { // NB: It's valid to use getTypeRaw here, because the TensorImpl // could not have been created without initializing the Type first. // TODO: This is not actually true via the Caffe2 codepath! Make // it so. - return *globalLegacyTypeDispatch().getTypeRaw(tensorTypeIdToBackend(tensor.type_id()), typeMetaToScalarType(tensor.dtype()), tensor.is_variable()); + return *globalLegacyTypeDispatch().getTypeRaw(tensorTypeIdToBackend(tensor.type_id()), typeMetaToScalarType(tensor.dtype()), tensor.is_variable() && !at::NonVariableTypeMode::is_enabled()); } } // namespace at diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp index a2ca7bd21c..9a4684f4bc 100644 --- a/aten/src/ATen/native/TensorFactories.cpp +++ b/aten/src/ATen/native/TensorFactories.cpp @@ -90,7 +90,7 @@ Tensor _dim_arange(const Tensor& like, int64_t dim) { Tensor empty_cpu(IntList size, const TensorOptions& options) { AT_ASSERT(options.backend() == Backend::CPU); - AT_ASSERT(!options.is_variable()); // is_variable should have been 'unpacked' + AT_ASSERT(!options.is_variable()); // is_variable should have been 'unpacked' // TODO: remove this when Variable and Tensor are merged auto* allocator = at::getCPUAllocator(); int64_t nelements = prod_intlist(size); diff --git a/aten/src/ATen/native/cuda/TensorFactories.cu b/aten/src/ATen/native/cuda/TensorFactories.cu index 6413277dda..b809948554 100644 --- a/aten/src/ATen/native/cuda/TensorFactories.cu +++ b/aten/src/ATen/native/cuda/TensorFactories.cu @@ -45,7 +45,7 @@ Tensor& eye_out_cuda(Tensor& result, int64_t n, int64_t m) { Tensor empty_cuda(IntList size, const TensorOptions& options) { AT_ASSERT(options.backend() == at::Backend::CUDA); - AT_ASSERT(!options.is_variable()); // is_variable should have been 'unpacked' + AT_ASSERT(!options.is_variable()); // is_variable should have been 'unpacked' // TODO: remove this when Variable and Tensor are merged auto* allocator = at::cuda::getCUDADeviceAllocator(); int64_t nelements = prod_intlist(size); diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp index ce69e5c219..216c581cb8 100644 --- a/aten/src/ATen/native/sparse/SparseTensor.cpp +++ b/aten/src/ATen/native/sparse/SparseTensor.cpp @@ -70,7 +70,7 @@ Tensor values_sparse(const Tensor& self) { /*** Helper methods ***/ SparseTensor new_sparse(const TensorOptions& options) { - AT_ASSERT(!options.is_variable()); + AT_ASSERT(!options.is_variable()); // TODO: remove this when Variable and Tensor are merged AT_ASSERT(options.layout() == kSparse); TensorTypeId type_id; if (options.device().is_cuda()) { @@ -329,7 +329,7 @@ SparseTensor& copy_sparse_(SparseTensor& self, const SparseTensor& src, bool non SparseTensor coalesce_sparse_cpu(const SparseTensor& self) { AT_ASSERT(self.defined()); - AT_ASSERT(!self.is_variable()); + AT_ASSERT(!self.is_variable()); // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged AT_ASSERT(self.is_sparse()); if (self.is_coalesced()) { |