9 files changed, 88 insertions, 10 deletions
diff --git a/aten/src/ATen/Context.cpp b/aten/src/ATen/Context.cpp
index b05475699c..45ebade245 100644
--- a/aten/src/ATen/Context.cpp
+++ b/aten/src/ATen/Context.cpp
@@ -101,10 +101,13 @@ TypeExtendedInterface& getType(TensorOptions options) {
             options.backend(), typeMetaToScalarType(options.dtype()), options.is_variable());
 }
 
+// NOTE: We also check `at::NonVariableTypeMode`, and if it's enabled we always
+// return non-Variable type in this function.
+// See NOTE [ Treating Variables as non-Variables in type dispatch ]
 TypeExtendedInterface& getType(const TensorImpl* impl) {
   Backend backend = tensorTypeIdToBackend(impl->type_id());
   return globalContext().getType(
-            backend, typeMetaToScalarType(impl->dtype()), impl->is_variable());
+            backend, typeMetaToScalarType(impl->dtype()), impl->is_variable() && !at::NonVariableTypeMode::is_enabled());
 }
 
 TypeExtendedInterface& getType(const Tensor& t) {
diff --git a/aten/src/ATen/SparseTensorImpl.cpp b/aten/src/ATen/SparseTensorImpl.cpp
index 779ac5d28f..2d4cb24c8d 100644
--- a/aten/src/ATen/SparseTensorImpl.cpp
+++ b/aten/src/ATen/SparseTensorImpl.cpp
@@ -80,7 +80,7 @@ int64_t SparseTensorImpl::storage_offset() const {
 }
 void SparseTensorImpl::set_indices_and_values_unsafe(const Tensor& indices, const Tensor& values) {
   AT_CHECK(allow_tensor_metadata_change(), "set_indices_and_values_unsafe is not allowed on Tensor created from .data or .detach()");
-  AT_ASSERT(!indices.is_variable() && !values.is_variable());  // They should be plain tensors!
+  AT_ASSERT(!indices.is_variable() && !values.is_variable());  // They should be plain tensors!  // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged
 
   AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout());
   AT_CHECK(!values.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values.layout());
diff --git a/aten/src/ATen/SparseTensorUtils.h b/aten/src/ATen/SparseTensorUtils.h
index c8fb9e0bac..5aeebd3cd6 100644
--- a/aten/src/ATen/SparseTensorUtils.h
+++ b/aten/src/ATen/SparseTensorUtils.h
@@ -17,7 +17,7 @@ using SparseType = Type;
 //
 // This may be called repeatedly, so make sure it's pretty cheap.
 inline SparseTensorImpl* get_sparse_impl(const SparseTensor& self) {
-  AT_ASSERTM(!self.is_variable(), "_internal_get_SparseTensorImpl: should not be a variable");
+  AT_ASSERTM(!self.is_variable(), "_internal_get_SparseTensorImpl: should not be a variable");  // TODO: remove this when Variable and Tensor are merged
   AT_ASSERTM(self.is_sparse(), "_internal_get_SparseTensorImpl: not a sparse tensor");
   return static_cast<SparseTensorImpl*>(self.unsafeGetTensorImpl());
 }
diff --git a/aten/src/ATen/Utils.h b/aten/src/ATen/Utils.h
index 67201e1de3..ec7e009072 100644
--- a/aten/src/ATen/Utils.h
+++ b/aten/src/ATen/Utils.h
@@ -73,7 +73,7 @@ static inline TensorImpl* checked_tensor_unwrap(const Tensor& expr, const char *
     AT_ERROR("Expected object of scalar type ", scalar_type, " but got scalar type ", expr.scalar_type(),
              " for argument #", pos, " '", name, "'");
   }
-  if (expr.is_variable()) {
+  if (expr.is_variable()) {  // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged
     AT_ERROR("Expected Tensor (not Variable) for argument #", pos, " '", name, "'");
   }
   return expr.unsafeGetTensorImpl();
@@ -93,7 +93,7 @@ static inline std::vector<TensorImpl*> checked_tensor_list_unwrap(ArrayRef<Tenso
       AT_ERROR("Expected object of scalar type ", scalar_type, " but got scalar type ", expr.scalar_type(),
                " for sequence element ", i , " in sequence argument at position #", pos, " '", name, "'");
     }
-    if (expr.is_variable()) {
+    if (expr.is_variable()) {  // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged
       AT_ERROR("Expected Tensor (not Variable) for sequence element ",
                i , " in sequence argument at position #", pos, " '", name, "'");
     }
diff --git a/aten/src/ATen/core/LegacyTypeDispatch.cpp b/aten/src/ATen/core/LegacyTypeDispatch.cpp
index f20062dbd3..c1bf2afdca 100644
--- a/aten/src/ATen/core/LegacyTypeDispatch.cpp
+++ b/aten/src/ATen/core/LegacyTypeDispatch.cpp
@@ -2,6 +2,54 @@
 
 namespace at {
 
+/// NOTE [ Treating Variables as non-Variables in type dispatch ]
+///
+/// Previously, in VariableType_*.cpp (generated by gen_variable_type.py), when
+/// a function is using the 'use_derived' strategy, we call its implementation
+/// on the base non-Variable type (`baseType`), passing unwrapped tensors to the
+/// call so that any `.type()` calls in the implementation can treat the passed
+/// tensors as non-Variables and won't dispatch back to functions in VariableType.
+///
+/// However, after the Variable/Tensor merge, there is no concept of unwrapping
+/// a tensor anymore, and directly passing variables to the base type calls will
+/// cause the `.type()` dispatch in the implementation to treat the tensor as a
+/// variable, and any function dispatch based on `.type()` will dispatch back to
+/// VariableType, which is not what we want.
+///
+/// The solution to the above problem is to add `at::NonVariableTypeMode`, which
+/// when enabled will cause `legacyTensorType()` and `getType()` to always return
+/// non-Variable type, even if the tensor being called on is a variable.
+///
+/// TODO: Since `torch::NoGradGuard` serves the same purpose in libtorch, we should
+/// merge these two thread-local guards.
+
+/// In the CAFFE2_FB_LIMITED_MOBILE_CAPABILITY build setting,
+/// thread_local is not supported. In that case, we don't provide
+/// `at::NonVariableTypeMode`.
+#if !C10_MOBILE && !defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
+
+thread_local bool NonVariableTypeMode_enabled = false;
+
+bool NonVariableTypeMode::is_enabled() {
+  return NonVariableTypeMode_enabled;
+}
+
+void NonVariableTypeMode::set_enabled(bool enabled) {
+  NonVariableTypeMode_enabled = enabled;
+}
+
+#else // C10_MOBILE || defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
+
+bool NonVariableTypeMode::is_enabled() {
+  throw std::runtime_error("NonVariableTypeMode is not supported on mobile");
+}
+
+void NonVariableTypeMode::set_enabled(bool enabled) {
+  throw std::runtime_error("NonVariableTypeMode is not supported on mobile");
+}
+
+#endif
+
 // TODO: This could be bad juju if someone calls globalContext() in the
 // destructor of an object with static lifetime.
 LegacyTypeDispatch & globalLegacyTypeDispatch() {
diff --git a/aten/src/ATen/core/LegacyTypeDispatch.h b/aten/src/ATen/core/LegacyTypeDispatch.h
index 0234b965f8..9c9d44e84f 100644
--- a/aten/src/ATen/core/LegacyTypeDispatch.h
+++ b/aten/src/ATen/core/LegacyTypeDispatch.h
@@ -140,18 +140,45 @@ private:
 
 CAFFE2_API LegacyTypeDispatch& globalLegacyTypeDispatch();
 
+struct CAFFE2_API NonVariableTypeMode {
+  static bool is_enabled();
+  static void set_enabled(bool enabled);
+};
+
+// A RAII, thread local (!) guard that has the following effect:
+//
+// Upon construction: sets NonVariableTypeMode_enabled for the current thread to
+// control whether we are in non-Variable-type mode.
+//
+// Upon destruction: sets NonVariableTypeMode_enabled back to the original value.
+//
+// See NOTE [ Treating Variables as non-Variables in type dispatch ] for details.
+struct CAFFE2_API AutoNonVariableTypeMode {
+  AutoNonVariableTypeMode(bool enabled) : prev_mode(NonVariableTypeMode::is_enabled()) {
+    NonVariableTypeMode::set_enabled(enabled);
+  }
+  ~AutoNonVariableTypeMode() {
+    NonVariableTypeMode::set_enabled(prev_mode);
+  }
+  bool prev_mode;
+};
+
 /**
  * Return the Type object corresponding to this Tensor, which we can
  * use to do dynamic dispatch to operators from.  This method is NOT
  * intended to be used by end-users; it is purely an implementation
  * detail.
+ *
+ * NOTE: We also check `at::NonVariableTypeMode`, and if it's enabled
+ * we always return non-Variable type in this function.
+ * See NOTE [ Treating Variables as non-Variables in type dispatch ]
  */
 inline Type& legacyTensorType(const TensorImpl& tensor) {
   // NB: It's valid to use getTypeRaw here, because the TensorImpl
   // could not have been created without initializing the Type first.
   // TODO: This is not actually true via the Caffe2 codepath!  Make
   // it so.
-  return *globalLegacyTypeDispatch().getTypeRaw(tensorTypeIdToBackend(tensor.type_id()), typeMetaToScalarType(tensor.dtype()), tensor.is_variable());
+  return *globalLegacyTypeDispatch().getTypeRaw(tensorTypeIdToBackend(tensor.type_id()), typeMetaToScalarType(tensor.dtype()), tensor.is_variable() && !at::NonVariableTypeMode::is_enabled());
 }
 
 } // namespace at
diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp
index a2ca7bd21c..9a4684f4bc 100644
--- a/aten/src/ATen/native/TensorFactories.cpp
+++ b/aten/src/ATen/native/TensorFactories.cpp
@@ -90,7 +90,7 @@ Tensor _dim_arange(const Tensor& like, int64_t dim) {
 
 Tensor empty_cpu(IntList size, const TensorOptions& options) {
   AT_ASSERT(options.backend() == Backend::CPU);
-  AT_ASSERT(!options.is_variable());  // is_variable should have been 'unpacked'
+  AT_ASSERT(!options.is_variable());  // is_variable should have been 'unpacked'  // TODO: remove this when Variable and Tensor are merged
 
   auto* allocator = at::getCPUAllocator();
   int64_t nelements = prod_intlist(size);
diff --git a/aten/src/ATen/native/cuda/TensorFactories.cu b/aten/src/ATen/native/cuda/TensorFactories.cu
index 6413277dda..b809948554 100644
--- a/aten/src/ATen/native/cuda/TensorFactories.cu
+++ b/aten/src/ATen/native/cuda/TensorFactories.cu
@@ -45,7 +45,7 @@ Tensor& eye_out_cuda(Tensor& result, int64_t n, int64_t m) {
 
 Tensor empty_cuda(IntList size, const TensorOptions& options) {
   AT_ASSERT(options.backend() == at::Backend::CUDA);
-  AT_ASSERT(!options.is_variable());  // is_variable should have been 'unpacked'
+  AT_ASSERT(!options.is_variable());  // is_variable should have been 'unpacked'  // TODO: remove this when Variable and Tensor are merged
 
   auto* allocator = at::cuda::getCUDADeviceAllocator();
   int64_t nelements = prod_intlist(size);
diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp
index ce69e5c219..216c581cb8 100644
--- a/aten/src/ATen/native/sparse/SparseTensor.cpp
+++ b/aten/src/ATen/native/sparse/SparseTensor.cpp
@@ -70,7 +70,7 @@ Tensor values_sparse(const Tensor& self) {
 /*** Helper methods ***/
 
 SparseTensor new_sparse(const TensorOptions& options) {
-  AT_ASSERT(!options.is_variable());
+  AT_ASSERT(!options.is_variable());  // TODO: remove this when Variable and Tensor are merged
   AT_ASSERT(options.layout() == kSparse);
   TensorTypeId type_id;
   if (options.device().is_cuda()) {
@@ -329,7 +329,7 @@ SparseTensor& copy_sparse_(SparseTensor& self, const SparseTensor& src, bool non
 
 SparseTensor coalesce_sparse_cpu(const SparseTensor& self) {
   AT_ASSERT(self.defined());
-  AT_ASSERT(!self.is_variable());
+  AT_ASSERT(!self.is_variable());  // TODO: change this to check `.requires_grad()` and `GradMode::is_enabled()` when Variable and Tensor are merged
   AT_ASSERT(self.is_sparse());
 
   if (self.is_coalesced()) {