159 files changed, 375 insertions, 77 deletions
diff --git a/caffe2/contrib/prof/cuda_profile_ops.cc b/caffe2/contrib/prof/cuda_profile_ops.cc
index fa6a2feec6..cd9dd64f0e 100644
--- a/caffe2/contrib/prof/cuda_profile_ops.cc
+++ b/caffe2/contrib/prof/cuda_profile_ops.cc
@@ -7,6 +7,7 @@
 #include <cuda_profiler_api.h>
 
 namespace caffe2 {
+namespace {
 
 static std::vector<std::string> kCudaProfileConfiguration = {
     "gpustarttimestamp",
@@ -89,10 +90,6 @@ class CudaProfileStopOp : public OperatorBase {
   }
 };
 
-OPERATOR_SCHEMA(CudaProfileInitialize);
-OPERATOR_SCHEMA(CudaProfileStart);
-OPERATOR_SCHEMA(CudaProfileStop);
-
 REGISTER_CPU_OPERATOR(CudaProfileInitialize, CudaProfileInitializeOp);
 REGISTER_CPU_OPERATOR(CudaProfileStart, CudaProfileStartOp);
 REGISTER_CPU_OPERATOR(CudaProfileStop, CudaProfileStopOp);
@@ -101,4 +98,5 @@ REGISTER_CUDA_OPERATOR(CudaProfileInitialize, CudaProfileInitializeOp);
 REGISTER_CUDA_OPERATOR(CudaProfileStart, CudaProfileStartOp);
 REGISTER_CUDA_OPERATOR(CudaProfileStop, CudaProfileStopOp);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/contrib/torch/torch_op.cpp b/caffe2/contrib/torch/torch_op.cpp
index 3e9f9b0bdc..03a721e838 100644
--- a/caffe2/contrib/torch/torch_op.cpp
+++ b/caffe2/contrib/torch/torch_op.cpp
@@ -12,6 +12,8 @@ const char* TyTraits<CPUContext>::prelude = R"(
 )";
 }
 
+namespace {
+
 struct GetTorchGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   std::vector<OperatorDef> GetGradientDefs() override {
@@ -41,6 +43,6 @@ REGISTER_CPU_OPERATOR(TorchInit, TorchInitOp<CPUContext>);
 REGISTER_CPU_OPERATOR(TorchGradient, TorchGradientOp<CPUContext>);
 REGISTER_GRADIENT(Torch, GetTorchGradient);
 OPERATOR_SCHEMA(Torch).AllowInplace([](int, int) { return true; });
-OPERATOR_SCHEMA(TorchInit);
 OPERATOR_SCHEMA(TorchGradient).AllowInplace([](int, int) { return true; });
 }
+}
diff --git a/caffe2/contrib/torch/torch_op_gpu.cpp b/caffe2/contrib/torch/torch_op_gpu.cpp
index 5cfea19cba..7b63e6e49a 100644
--- a/caffe2/contrib/torch/torch_op_gpu.cpp
+++ b/caffe2/contrib/torch/torch_op_gpu.cpp
@@ -105,6 +105,9 @@ typename Torch<CUDAContext>::Traits::Tensor* Torch<CUDAContext>::newTensorAs(
 }
 }
 
+namespace {
+
 REGISTER_CUDA_OPERATOR(Torch, TorchOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(TorchGradient, TorchGradientOp<CUDAContext>);
 }
+}
diff --git a/caffe2/contrib/warpctc/ctc_op.cpp b/caffe2/contrib/warpctc/ctc_op.cpp
index 1a7f1aee91..27df0e6428 100644
--- a/caffe2/contrib/warpctc/ctc_op.cpp
+++ b/caffe2/contrib/warpctc/ctc_op.cpp
@@ -14,6 +14,7 @@ ctcComputeInfo workspaceInfo<CPUContext>(const CPUContext& context) {
 }
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(CTC, CTCOp<float, CPUContext>);
 OPERATOR_SCHEMA(CTC)
     .NumInputs(4)
@@ -22,3 +23,4 @@ OPERATOR_SCHEMA(CTC)
 NO_GRADIENT(CTC);
 
 }
+}
diff --git a/caffe2/contrib/warpctc/ctc_op_gpu.cpp b/caffe2/contrib/warpctc/ctc_op_gpu.cpp
index e6b399a10a..58dca07d1a 100644
--- a/caffe2/contrib/warpctc/ctc_op_gpu.cpp
+++ b/caffe2/contrib/warpctc/ctc_op_gpu.cpp
@@ -14,5 +14,7 @@ ctcComputeInfo workspaceInfo<CUDAContext>(const CUDAContext& context) {
 }
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(CTC, CTCOp<float, CUDAContext>);
 }
+}
diff --git a/caffe2/core/operator.h b/caffe2/core/operator.h
index 39a76afdef..ed49be8d20 100644
--- a/caffe2/core/operator.h
+++ b/caffe2/core/operator.h
@@ -529,12 +529,7 @@ CAFFE_DECLARE_REGISTRY(
     Workspace*);
 #define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
   CAFFE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__)
-#define REGISTER_CPU_OPERATOR(name, ...)              \
-  extern void OperatorSchemaCheckForOperator##name(); \
-  static void __attribute__((__unused__))             \
-      CAFFE_ANONYMOUS_VARIABLE_CPU##name() {          \
-    OperatorSchemaCheckForOperator##name();           \
-  }                                                   \
+#define REGISTER_CPU_OPERATOR(name, ...) \
   CAFFE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
 #define REGISTER_CPU_OPERATOR_STR(str_name, ...) \
   CAFFE_REGISTER_TYPED_CLASS(CPUOperatorRegistry, str_name, __VA_ARGS__)
@@ -549,12 +544,7 @@ CAFFE_DECLARE_REGISTRY(
     Workspace*);
 #define REGISTER_CUDA_OPERATOR_CREATOR(key, ...) \
   CAFFE_REGISTER_CREATOR(CUDAOperatorRegistry, key, __VA_ARGS__)
-#define REGISTER_CUDA_OPERATOR(name, ...)             \
-  extern void OperatorSchemaCheckForOperator##name(); \
-  static void __attribute__((__unused__))             \
-      CAFFE_ANONYMOUS_VARIABLE_CUDA##name() {         \
-    OperatorSchemaCheckForOperator##name();           \
-  }                                                   \
+#define REGISTER_CUDA_OPERATOR(name, ...) \
   CAFFE_REGISTER_CLASS(CUDAOperatorRegistry, name, __VA_ARGS__)
 #define REGISTER_CUDA_OPERATOR_STR(str_name, ...) \
   CAFFE_REGISTER_TYPED_CLASS(CUDAOperatorRegistry, str_name, __VA_ARGS__)
diff --git a/caffe2/core/operator_schema.h b/caffe2/core/operator_schema.h
index 0c3c1cc5e5..bd4908f17a 100644
--- a/caffe2/core/operator_schema.h
+++ b/caffe2/core/operator_schema.h
@@ -379,10 +379,9 @@ InferOpInputOutputDevice(const OperatorDef& op) {
 
 }  // namespace caffe2
 
-#define OPERATOR_SCHEMA(name)                       \
-  void OperatorSchemaCheckForOperator##name(){};    \
-  static OpSchema& CAFFE_ANONYMOUS_VARIABLE(name) = \
-      OpSchemaRegistry::NewSchema(#name, __FILE__, __LINE__)
+#define OPERATOR_SCHEMA(name)                                                 \
+  static OpSchema& CAFFE_ANONYMOUS_VARIABLE(name) =                           \
+    OpSchemaRegistry::NewSchema(#name, __FILE__, __LINE__)
 #define OPERATOR_SCHEMA_STR(name)                                  \
   static OpSchema& CAFFE_ANONYMOUS_VARIABLE(schema_registration) = \
       OpSchemaRegistry::NewSchema(name, __FILE__, __LINE__)
diff --git a/caffe2/core/operator_test.cc b/caffe2/core/operator_test.cc
index cc1f1a9bda..32c7fb2ec3 100644
--- a/caffe2/core/operator_test.cc
+++ b/caffe2/core/operator_test.cc
@@ -69,7 +69,6 @@ class ThrowException : public Operator<CPUContext> {
 OPERATOR_SCHEMA(JustTest).NumInputs(0, 1).NumOutputs(0, 1);
 OPERATOR_SCHEMA(JustTestCPUOnly).NumInputs(0, 1).NumOutputs(0, 1);
 OPERATOR_SCHEMA(ThrowException).NumInputs(0).NumOutputs(0);
-OPERATOR_SCHEMA(JustTestWithSomeOutput);
 
 REGISTER_CPU_OPERATOR(JustTest, JustTest);
 REGISTER_CPU_OPERATOR(JustTestCPUOnly, JustTest);
diff --git a/caffe2/core/parallel_net_test.cc b/caffe2/core/parallel_net_test.cc
index 446cfccb25..f7f78ddbfb 100644
--- a/caffe2/core/parallel_net_test.cc
+++ b/caffe2/core/parallel_net_test.cc
@@ -47,8 +47,10 @@ class SleepOp final : public Operator<CPUContext> {
 
 OPERATOR_SCHEMA(Sleep).NumInputs(0, INT_MAX).NumOutputs(0, 1);
 
+namespace {
 REGISTER_CPU_OPERATOR(Sleep, SleepOp);
 REGISTER_CUDA_OPERATOR(Sleep, SleepOp);
+}  // namespace
 
 const char kSleepNetDefString[] =
 "  name: \"sleepnet\""
diff --git a/caffe2/db/create_db_op.cc b/caffe2/db/create_db_op.cc
index d5b815f148..908079c5bd 100644
--- a/caffe2/db/create_db_op.cc
+++ b/caffe2/db/create_db_op.cc
@@ -1,9 +1,11 @@
 #include "caffe2/db/create_db_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CPU_OPERATOR(CreateDB, CreateDBOp<CPUContext>);
 
 OPERATOR_SCHEMA(CreateDB).NumInputs(0).NumOutputs(1);
 
 NO_GRADIENT(CreateDB);
+}
 }  // namespace caffe2
diff --git a/caffe2/db/create_db_op_gpu.cc b/caffe2/db/create_db_op_gpu.cc
index 07552aa445..2b80568ff5 100644
--- a/caffe2/db/create_db_op_gpu.cc
+++ b/caffe2/db/create_db_op_gpu.cc
@@ -2,5 +2,7 @@
 #include "caffe2/db/create_db_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(CreateDB, CreateDBOp<CUDAContext>);
+}
 } // namespace caffe2
diff --git a/caffe2/experiments/operators/fully_connected_op_decomposition.cc b/caffe2/experiments/operators/fully_connected_op_decomposition.cc
index 936e683b4b..74a1754a46 100644
--- a/caffe2/experiments/operators/fully_connected_op_decomposition.cc
+++ b/caffe2/experiments/operators/fully_connected_op_decomposition.cc
@@ -1,6 +1,7 @@
 #include "caffe2/experiments/operators/fully_connected_op_decomposition.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(FC_Decomp, FullyConnectedOpDecomp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(FCGradient_Decomp,
@@ -21,4 +22,5 @@ class GetFCDecompGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(FC_Decomp, GetFCDecompGradient);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/experiments/operators/fully_connected_op_decomposition_gpu.cc b/caffe2/experiments/operators/fully_connected_op_decomposition_gpu.cc
index f5bf92a02a..9101a9ad1a 100644
--- a/caffe2/experiments/operators/fully_connected_op_decomposition_gpu.cc
+++ b/caffe2/experiments/operators/fully_connected_op_decomposition_gpu.cc
@@ -2,9 +2,9 @@
 #include "caffe2/experiments/operators/fully_connected_op_decomposition.h"
 
 namespace caffe2 {
-
+namespace {
 REGISTER_CUDA_OPERATOR(FC_Dcomp, FullyConnectedOpDecomp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(FCGradient_Decomp,
                        FullyConnectedDecompGradientOp<float, CUDAContext>);
-
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/experiments/operators/tt_contraction_op.cc b/caffe2/experiments/operators/tt_contraction_op.cc
index 693311daa7..579abaa77c 100644
--- a/caffe2/experiments/operators/tt_contraction_op.cc
+++ b/caffe2/experiments/operators/tt_contraction_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/experiments/operators/tt_contraction_op.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(TTContraction, TTContractionOp<float, CPUContext>);
 
@@ -37,4 +38,5 @@ class GetTTContractionGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(TTContraction, GetTTContractionGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/mpi/mpi_ops.cc b/caffe2/mpi/mpi_ops.cc
index a766b98483..be5b05dec7 100644
--- a/caffe2/mpi/mpi_ops.cc
+++ b/caffe2/mpi/mpi_ops.cc
@@ -1,14 +1,7 @@
 #include "caffe2/mpi/mpi_ops.h"
 
 namespace caffe2 {
-
-OPERATOR_SCHEMA(MPICreateCommonWorld);
-OPERATOR_SCHEMA(MPIBroadcast);
-OPERATOR_SCHEMA(MPIReduce);
-OPERATOR_SCHEMA(MPIAllgather);
-OPERATOR_SCHEMA(MPIAllreduce);
-OPERATOR_SCHEMA(MPISendTensor);
-OPERATOR_SCHEMA(MPIReceiveTensor);
+namespace {
 
 REGISTER_CPU_OPERATOR(MPICreateCommonWorld, MPICreateCommonWorldOp<CPUContext>);
 REGISTER_CPU_OPERATOR(MPIBroadcast, MPIBroadcastOp<CPUContext>);
@@ -18,4 +11,5 @@ REGISTER_CPU_OPERATOR(MPIAllreduce, MPIAllreduceOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(MPISendTensor, MPISendTensorOp<CPUContext>);
 REGISTER_CPU_OPERATOR(MPIReceiveTensor, MPIReceiveTensorOp<CPUContext>);
 
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/mpi/mpi_ops_gpu.cc b/caffe2/mpi/mpi_ops_gpu.cc
index b28a31f00c..c9ed94f7a7 100644
--- a/caffe2/mpi/mpi_ops_gpu.cc
+++ b/caffe2/mpi/mpi_ops_gpu.cc
@@ -50,6 +50,8 @@ namespace caffe2 {
 #define CAFFE2_HAS_CUDA_MPI_ALLREDUCE 0
 #endif // CAFFE2_FORCE_FALLBACK_CUDA_MPI
 
+namespace {
+
 REGISTER_CUDA_OPERATOR(
     MPICreateCommonWorld,
     MPICreateCommonWorldOp<CUDAContext>);
@@ -82,5 +84,6 @@ REGISTER_CUDA_OPERATOR(
     MPIAllreduce,
     GPUFallbackOp<MPIAllreduceOp<float, CPUContext>>);
 #endif
+}  // namespace
 
 }  // namespace caffe2
diff --git a/caffe2/operators/abs_op.cc b/caffe2/operators/abs_op.cc
index f38d0827a1..adc3b33a51 100644
--- a/caffe2/operators/abs_op.cc
+++ b/caffe2/operators/abs_op.cc
@@ -22,6 +22,7 @@ struct AbsGradientCPUFunctor {
   }
 };
 
+namespace {
 REGISTER_CPU_OPERATOR(
     Abs,
     UnaryElementwiseOp<TensorTypes<float>, CPUContext, AbsCPUFunctor>);
@@ -58,4 +59,5 @@ class GetAbsGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(Abs, GetAbsGradient);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/abs_op.cu b/caffe2/operators/abs_op.cu
index c4ccdd7ffd..4873fe93ec 100644
--- a/caffe2/operators/abs_op.cu
+++ b/caffe2/operators/abs_op.cu
@@ -49,6 +49,7 @@ struct AbsGradientCUDAFunctor {
   }
 };
 
+namespace {
 REGISTER_CUDA_OPERATOR(
     Abs,
     UnaryElementwiseOp<TensorTypes<float>, CUDAContext, AbsCUDAFunctor>);
@@ -58,4 +59,5 @@ REGISTER_CUDA_OPERATOR(
         TensorTypes<float>,
         CUDAContext,
         WithoutBroadcast<AbsGradientCUDAFunctor>>);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/accumulate_op.cc b/caffe2/operators/accumulate_op.cc
index 0037bd7c95..958a258e56 100644
--- a/caffe2/operators/accumulate_op.cc
+++ b/caffe2/operators/accumulate_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/accumulate_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CPU_OPERATOR(Accumulate, AccumulateOp<float, CPUContext>);
 
 OPERATOR_SCHEMA(Accumulate)
@@ -26,4 +27,5 @@ argument.
   .Output(0, "output", "Accumulated output tensor");
 
 SHOULD_NOT_DO_GRADIENT(Accumulate);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/accumulate_op.cu b/caffe2/operators/accumulate_op.cu
index 96f0d0c5ba..d042c13ea2 100644
--- a/caffe2/operators/accumulate_op.cu
+++ b/caffe2/operators/accumulate_op.cu
@@ -2,5 +2,7 @@
 #include "caffe2/operators/accumulate_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(Accumulate, AccumulateOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/accuracy_op.cc b/caffe2/operators/accuracy_op.cc
index 2b3aa546ce..8cbbc9f06d 100644
--- a/caffe2/operators/accuracy_op.cc
+++ b/caffe2/operators/accuracy_op.cc
@@ -43,6 +43,7 @@ bool AccuracyOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(Accuracy, AccuracyOp<float, CPUContext>);
 
 OPERATOR_SCHEMA(Accuracy)
@@ -65,4 +66,5 @@ classes, it is considered a correct prediction.
           "accuracy");
 
 SHOULD_NOT_DO_GRADIENT(Accuracy);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/accuracy_op.cu b/caffe2/operators/accuracy_op.cu
index 0fa1b9097a..2299636546 100644
--- a/caffe2/operators/accuracy_op.cu
+++ b/caffe2/operators/accuracy_op.cu
@@ -69,5 +69,7 @@ bool AccuracyOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(Accuracy, AccuracyOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/batch_matmul_op.cc b/caffe2/operators/batch_matmul_op.cc
index b12974cb0b..722db5c149 100644
--- a/caffe2/operators/batch_matmul_op.cc
+++ b/caffe2/operators/batch_matmul_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/batch_matmul_op.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(BatchMatMul, BatchMatMulOp<float, CPUContext>);
 
@@ -119,4 +120,5 @@ class GetBatchMatMulGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(BatchMatMul, GetBatchMatMulGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/clip_op.cc b/caffe2/operators/clip_op.cc
index 5109f2c0be..ffe5454cbc 100644
--- a/caffe2/operators/clip_op.cc
+++ b/caffe2/operators/clip_op.cc
@@ -31,6 +31,7 @@ bool ClipGradientOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(Clip, ClipOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(ClipGradient, ClipGradientOp<float, CPUContext>);
 
@@ -71,4 +72,5 @@ class GetClipGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(Clip, GetClipGradient);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/clip_op.cu b/caffe2/operators/clip_op.cu
index fe9e7f4f36..c4c7c91a81 100644
--- a/caffe2/operators/clip_op.cu
+++ b/caffe2/operators/clip_op.cu
@@ -67,6 +67,8 @@ bool ClipGradientOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(Clip, ClipOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(ClipGradient, ClipGradientOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/concat_split_op.cc b/caffe2/operators/concat_split_op.cc
index 6f301a1875..d7953c875b 100644
--- a/caffe2/operators/concat_split_op.cc
+++ b/caffe2/operators/concat_split_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/concat_split_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CPU_OPERATOR(Split, SplitOp<CPUContext>);
 REGISTER_CPU_OPERATOR(Concat, ConcatOp<CPUContext>);
 OPERATOR_SCHEMA(Split)
@@ -73,4 +74,5 @@ class GetConcatGradient : public GradientMakerBase {
 };
 REGISTER_GRADIENT(Concat, GetConcatGradient);
 REGISTER_GRADIENT(DepthConcat, GetConcatGradient);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/concat_split_op_gpu.cc b/caffe2/operators/concat_split_op_gpu.cc
index 5e9d8bc34a..8e9dd9cd91 100644
--- a/caffe2/operators/concat_split_op_gpu.cc
+++ b/caffe2/operators/concat_split_op_gpu.cc
@@ -2,10 +2,13 @@
 #include "caffe2/operators/concat_split_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(Split, SplitOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(Concat, ConcatOp<CUDAContext>);
 
 // Backward compatibility settings
 REGISTER_CUDA_OPERATOR(DepthSplit, SplitOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(DepthConcat, ConcatOp<CUDAContext>);
+}  // namespace
 }  // namespace caffe2
+
diff --git a/caffe2/operators/conv_op.cc b/caffe2/operators/conv_op.cc
index e6fd1d2f61..229aa491cc 100644
--- a/caffe2/operators/conv_op.cc
+++ b/caffe2/operators/conv_op.cc
@@ -3,6 +3,7 @@
 #include "caffe2/operators/conv_pool_op_base.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(Conv, ConvOp<float, CPUContext>);
 
@@ -36,4 +37,5 @@ why they are separate files.
   "stride size, and pad lengths."
   "");
 
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/conv_op_gpu.cc b/caffe2/operators/conv_op_gpu.cc
index e3145bdde2..922f1d67b1 100644
--- a/caffe2/operators/conv_op_gpu.cc
+++ b/caffe2/operators/conv_op_gpu.cc
@@ -3,6 +3,8 @@
 #include "caffe2/core/context_gpu.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(Conv, ConvOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(ConvGradient, ConvGradientOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/conv_transpose_op.cc b/caffe2/operators/conv_transpose_op.cc
index 8164ce5096..495f9205c4 100644
--- a/caffe2/operators/conv_transpose_op.cc
+++ b/caffe2/operators/conv_transpose_op.cc
@@ -2,6 +2,7 @@
 #include "caffe2/operators/conv_transpose_op_impl.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(ConvTranspose, ConvTransposeOp<float, CPUContext>);
 
@@ -46,4 +47,5 @@ OPERATOR_SCHEMA(ConvTranspose)
         "transposed convolution. The output dimensions are functions of the kernel"
         " size, stride size, and pad lengths.");
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/conv_transpose_op_gpu.cc b/caffe2/operators/conv_transpose_op_gpu.cc
index d7993cf82d..4d9b419737 100644
--- a/caffe2/operators/conv_transpose_op_gpu.cc
+++ b/caffe2/operators/conv_transpose_op_gpu.cc
@@ -3,8 +3,10 @@
 #include "caffe2/operators/conv_transpose_op_impl.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(ConvTranspose, ConvTransposeOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(
     ConvTransposeGradient,
     ConvTransposeGradientOp<float, CUDAContext>);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/cos_op.cc b/caffe2/operators/cos_op.cc
index 8328a3c470..ce4115218f 100644
--- a/caffe2/operators/cos_op.cc
+++ b/caffe2/operators/cos_op.cc
@@ -21,6 +21,7 @@ struct CosGradientCPUFunctor {
   }
 };
 
+namespace {
 REGISTER_CPU_OPERATOR(
     Cos,
     UnaryElementwiseOp<TensorTypes<float>, CPUContext, CosCPUFunctor>);
@@ -57,4 +58,5 @@ class GetCosGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(Cos, GetCosGradient);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/cos_op.cu b/caffe2/operators/cos_op.cu
index 76aa1e443a..9bd1d53a17 100644
--- a/caffe2/operators/cos_op.cu
+++ b/caffe2/operators/cos_op.cu
@@ -49,6 +49,7 @@ struct CosGradientCUDAFunctor {
   }
 };
 
+namespace {
 REGISTER_CUDA_OPERATOR(
     Cos,
     UnaryElementwiseOp<TensorTypes<float>, CUDAContext, CosCUDAFunctor>);
@@ -58,4 +59,5 @@ REGISTER_CUDA_OPERATOR(
         TensorTypes<float>,
         CUDAContext,
         WithoutBroadcast<CosGradientCUDAFunctor>>);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/counter_ops.cc b/caffe2/operators/counter_ops.cc
index e636b08380..2673af7cbc 100644
--- a/caffe2/operators/counter_ops.cc
+++ b/caffe2/operators/counter_ops.cc
@@ -4,6 +4,7 @@
 
 namespace caffe2 {
 namespace {
+namespace {
 /**
  *  @brief CounterSerializer is the serializer for Counter type.
  *
@@ -135,6 +136,8 @@ SHOULD_NOT_DO_GRADIENT(CountDown);
 SHOULD_NOT_DO_GRADIENT(CountUp);
 SHOULD_NOT_DO_GRADIENT(RetrieveCount);
 
+} // namespace
+
 CAFFE_KNOWN_TYPE(std::unique_ptr<Counter<int64_t>>);
 REGISTER_BLOB_SERIALIZER(
     (TypeMeta::Id<std::unique_ptr<Counter<int64_t>>>()),
diff --git a/caffe2/operators/counter_ops_gpu.cc b/caffe2/operators/counter_ops_gpu.cc
index 7880aeeb41..de07e02d85 100644
--- a/caffe2/operators/counter_ops_gpu.cc
+++ b/caffe2/operators/counter_ops_gpu.cc
@@ -2,6 +2,7 @@
 #include "counter_ops.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(CreateCounter, CreateCounterOp<int64_t, CUDAContext>);
 REGISTER_CUDA_OPERATOR(ResetCounter, ResetCounterOp<int64_t, CUDAContext>);
 REGISTER_CUDA_OPERATOR(CountDown, CountDownOp<int64_t, CUDAContext>);
@@ -10,4 +11,5 @@ REGISTER_CUDA_OPERATOR(
     CheckCounterDoneOp<int64_t, CUDAContext>);
 REGISTER_CUDA_OPERATOR(CountUp, CountUpOp<int64_t, CUDAContext>);
 REGISTER_CUDA_OPERATOR(RetrieveCount, RetrieveCountOp<int64_t, CUDAContext>);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/cross_entropy_op.cc b/caffe2/operators/cross_entropy_op.cc
index 46d71b6dfc..9faaaee7c9 100644
--- a/caffe2/operators/cross_entropy_op.cc
+++ b/caffe2/operators/cross_entropy_op.cc
@@ -250,6 +250,8 @@ bool CrossEntropyGradientOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+
+namespace {
 REGISTER_CPU_OPERATOR(LabelCrossEntropy,
                       LabelCrossEntropyOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(LabelCrossEntropyGradient,
@@ -424,4 +426,5 @@ class GetCrossEntropyGradient : public GradientMakerBase {
 };
 REGISTER_GRADIENT(CrossEntropy, GetCrossEntropyGradient);
 
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/cross_entropy_op.cu b/caffe2/operators/cross_entropy_op.cu
index 61348c3a51..ea6813a3da 100644
--- a/caffe2/operators/cross_entropy_op.cu
+++ b/caffe2/operators/cross_entropy_op.cu
@@ -220,6 +220,7 @@ bool SigmoidCrossEntropyWithLogitsGradientOp<float, CUDAContext>::
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(LabelCrossEntropy,
                        LabelCrossEntropyOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(LabelCrossEntropyGradient,
@@ -243,4 +244,5 @@ REGISTER_CUDA_OPERATOR(CrossEntropy,
 REGISTER_CUDA_OPERATOR(CrossEntropyGradient,
                        GPUFallbackOp<CrossEntropyGradientOp<float, CPUContext>>);
 
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/distance_op.cc b/caffe2/operators/distance_op.cc
index e8b6af213c..93d5ae1535 100644
--- a/caffe2/operators/distance_op.cc
+++ b/caffe2/operators/distance_op.cc
@@ -263,6 +263,7 @@ bool DotProductWithPaddingOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 // L2
 REGISTER_CPU_OPERATOR(SquaredL2Distance,
                       SquaredL2DistanceOp<float, CPUContext>);
@@ -442,4 +443,5 @@ class GetDotProductWithPaddingGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(DotProductWithPadding, GetDotProductWithPaddingGradient);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/distance_op.cu b/caffe2/operators/distance_op.cu
index 82fd8d833f..7854306633 100644
--- a/caffe2/operators/distance_op.cu
+++ b/caffe2/operators/distance_op.cu
@@ -328,6 +328,7 @@ bool DotProductGradientOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(SquaredL2Distance,
                        SquaredL2DistanceOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(SquaredL2DistanceGradient,
@@ -342,4 +343,5 @@ REGISTER_CUDA_OPERATOR(DotProduct, DotProductOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(
     DotProductGradient,
     DotProductGradientOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/dropout_op.cc b/caffe2/operators/dropout_op.cc
index ac0618a72b..ac42a01a04 100644
--- a/caffe2/operators/dropout_op.cc
+++ b/caffe2/operators/dropout_op.cc
@@ -57,6 +57,8 @@ bool DropoutGradientOp<float, CPUContext>::RunOnDevice() {
   }
 }
 
+
+namespace {
 REGISTER_CPU_OPERATOR(Dropout, DropoutOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(DropoutGrad, DropoutGradientOp<float, CPUContext>);
 
@@ -92,4 +94,5 @@ class GetDropoutGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(Dropout, GetDropoutGradient);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/dropout_op.cu b/caffe2/operators/dropout_op.cu
index 7c660ff7d0..a25612a59e 100644
--- a/caffe2/operators/dropout_op.cu
+++ b/caffe2/operators/dropout_op.cu
@@ -78,6 +78,8 @@ bool DropoutGradientOp<float, CUDAContext>::RunOnDevice() {
 }
 
 
+namespace {
 REGISTER_CUDA_OPERATOR(Dropout, DropoutOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(DropoutGrad, DropoutGradientOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/elementwise_linear_op.cc b/caffe2/operators/elementwise_linear_op.cc
index b6482e09f3..59058c8445 100644
--- a/caffe2/operators/elementwise_linear_op.cc
+++ b/caffe2/operators/elementwise_linear_op.cc
@@ -77,6 +77,8 @@ bool ElementwiseLinearGradientOp<float, CPUContext>::RunOnDevice(){
   return true;
 }
 
+namespace {
+
 REGISTER_CPU_OPERATOR(
   ElementwiseLinear,
   ElementwiseLinearOp<float, CPUContext>);
@@ -121,4 +123,5 @@ REGISTER_GRADIENT(
   GetElementwiseLinearGradient
 );
 
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/elementwise_linear_op.cu b/caffe2/operators/elementwise_linear_op.cu
index 503675a86d..0bea8ec6ff 100644
--- a/caffe2/operators/elementwise_linear_op.cu
+++ b/caffe2/operators/elementwise_linear_op.cu
@@ -114,9 +114,13 @@ bool ElementwiseLinearGradientOp<float, CUDAContext>::RunOnDevice(){
   return true;
 }
 
+namespace {
+
 REGISTER_CUDA_OPERATOR(ElementwiseLinear,
                        ElementwiseLinearOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(ElementwiseLinearGradient,
                        ElementwiseLinearGradientOp<float, CUDAContext>);
 
+}  // namespace
+
 }  // namespace caffe2
diff --git a/caffe2/operators/elementwise_op.cu b/caffe2/operators/elementwise_op.cu
index 7aa1e895bb..32a6d2e8c5 100644
--- a/caffe2/operators/elementwise_op.cu
+++ b/caffe2/operators/elementwise_op.cu
@@ -426,6 +426,8 @@ class CUDAAddOp final : public Operator<CUDAContext> {
   string order_;
 };
 
+namespace {
 REGISTER_CUDA_OPERATOR(Add, CUDAAddOp);
+} // namespace
 
 }  // namespace caffe2
diff --git a/caffe2/operators/elu_op.cc b/caffe2/operators/elu_op.cc
index 4c9619635c..db36584b52 100644
--- a/caffe2/operators/elu_op.cc
+++ b/caffe2/operators/elu_op.cc
@@ -38,6 +38,7 @@ bool EluGradientOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(Elu, EluOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(EluGradient, EluGradientOp<float, CPUContext>);
 
@@ -79,4 +80,5 @@ class GetEluGradient : public GradientMakerBase {
 };
 REGISTER_GRADIENT(Elu, GetEluGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/exp_op.cc b/caffe2/operators/exp_op.cc
index cf8618d025..a381eca664 100644
--- a/caffe2/operators/exp_op.cc
+++ b/caffe2/operators/exp_op.cc
@@ -11,6 +11,7 @@ struct ExpCPUFunctor {
   }
 };
 
+namespace {
 REGISTER_CPU_OPERATOR(
     Exp,
     UnaryElementwiseOp<TensorTypes<float>, CPUContext, ExpCPUFunctor>);
@@ -43,4 +44,5 @@ class GetExpGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(Exp, GetExpGradient);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/exp_op.cu b/caffe2/operators/exp_op.cu
index c31b5441d7..a95397696e 100644
--- a/caffe2/operators/exp_op.cu
+++ b/caffe2/operators/exp_op.cu
@@ -25,6 +25,8 @@ struct ExpCUDAFunctor {
   }
 };
 
+namespace {
 REGISTER_CUDA_OPERATOR(
     Exp, UnaryElementwiseOp<TensorTypes<float>, CUDAContext, ExpCUDAFunctor>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/filler_op.cc b/caffe2/operators/filler_op.cc
index bf9020aecd..637dd5334d 100644
--- a/caffe2/operators/filler_op.cc
+++ b/caffe2/operators/filler_op.cc
@@ -12,6 +12,8 @@ bool RangeFillOp<float, CPUContext>::Fill(
   return true;
 }
 
+namespace {
+
 REGISTER_CPU_OPERATOR(UniformFill, UniformFillOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(UniformIntFill, UniformFillOp<int, CPUContext>);
 REGISTER_CPU_OPERATOR(UniqueUniformFill, UniqueUniformFillOp<CPUContext>);
@@ -190,4 +192,5 @@ output would be [0,1,2,3,0,1,2,0].
         "1D tensor whose size is the sum of `lengths`");
 NO_GRADIENT(LengthsRangeFill);
 
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/filler_op.cu b/caffe2/operators/filler_op.cu
index 71680908b6..f3a9b392b4 100644
--- a/caffe2/operators/filler_op.cu
+++ b/caffe2/operators/filler_op.cu
@@ -21,6 +21,8 @@ bool RangeFillOp<float, CUDAContext>::Fill(
   return true;
 }
 
+namespace {
+
 REGISTER_CUDA_OPERATOR(UniformFill, UniformFillOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(UniformIntFill, UniformFillOp<int, CUDAContext>);
 REGISTER_CUDA_OPERATOR(ConstantFill, ConstantFillOp<CUDAContext>);
@@ -29,4 +31,5 @@ REGISTER_CUDA_OPERATOR(XavierFill, XavierFillOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(MSRAFill, MSRAFillOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(RangeFill, RangeFillOp<float, CUDAContext>);
 
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/find_op.cc b/caffe2/operators/find_op.cc
index 6cd425c57d..cee7e8c37d 100644
--- a/caffe2/operators/find_op.cc
+++ b/caffe2/operators/find_op.cc
@@ -20,6 +20,8 @@ OPERATOR_SCHEMA(Find)
                 values are not found.
             )DOC");
 
+namespace {
 REGISTER_CPU_OPERATOR(Find, FindOp<CPUContext>)
+}
 
 } // namespace caffe2
diff --git a/caffe2/operators/find_op.cu b/caffe2/operators/find_op.cu
index 32bceda79a..73b0e5822a 100644
--- a/caffe2/operators/find_op.cu
+++ b/caffe2/operators/find_op.cu
@@ -51,6 +51,8 @@ bool FindOp<CUDAContext>::DoRunWithType() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(Find, FindOp<CUDAContext>)
+}
 
 } // namespace caffe2
diff --git a/caffe2/operators/fully_connected_op.cc b/caffe2/operators/fully_connected_op.cc
index df903ab319..c00f199d5b 100644
--- a/caffe2/operators/fully_connected_op.cc
+++ b/caffe2/operators/fully_connected_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/fully_connected_op.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(FC, FullyConnectedOp<CPUContext>);
 REGISTER_CPU_OPERATOR(FCGradient, FullyConnectedGradientOp<CPUContext>);
@@ -59,4 +60,5 @@ class GetFCGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(FC, GetFCGradient);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/fully_connected_op_gpu.cc b/caffe2/operators/fully_connected_op_gpu.cc
index c3c0d25cc4..07431862f8 100644
--- a/caffe2/operators/fully_connected_op_gpu.cc
+++ b/caffe2/operators/fully_connected_op_gpu.cc
@@ -53,6 +53,9 @@ bool FullyConnectedGradientOp<CUDAContext>::RunOnDevice() {
   return false;
 }
 
+namespace {
+
 REGISTER_CUDA_OPERATOR(FC, FullyConnectedOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(FCGradient, FullyConnectedGradientOp<CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/given_tensor_fill_op.cc b/caffe2/operators/given_tensor_fill_op.cc
index 6df0490a8e..7c2f92cbe7 100644
--- a/caffe2/operators/given_tensor_fill_op.cc
+++ b/caffe2/operators/given_tensor_fill_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/given_tensor_fill_op.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(GivenTensorFill, GivenTensorFillOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(GivenTensorBoolFill, GivenTensorFillOp<bool, CPUContext>);
@@ -43,4 +44,5 @@ OPERATOR_SCHEMA(GivenTensorStringFill)
     .AllowInplace({{0, 0}})
     .TensorInferenceFunction(FillerTensorInference);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/given_tensor_fill_op.cu b/caffe2/operators/given_tensor_fill_op.cu
index ef3f8f9997..b877852751 100644
--- a/caffe2/operators/given_tensor_fill_op.cu
+++ b/caffe2/operators/given_tensor_fill_op.cu
@@ -2,6 +2,7 @@
 #include "caffe2/operators/given_tensor_fill_op.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CUDA_OPERATOR(GivenTensorFill, GivenTensorFillOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(GivenTensorIntFill, GivenTensorFillOp<int, CUDAContext>);
@@ -9,3 +10,4 @@ REGISTER_CUDA_OPERATOR(
     GivenTensorBoolFill,
     GivenTensorFillOp<bool, CUDAContext>);
 }
+}
diff --git a/caffe2/operators/half_float_ops.cc b/caffe2/operators/half_float_ops.cc
index 295a0fd002..4601f258e5 100644
--- a/caffe2/operators/half_float_ops.cc
+++ b/caffe2/operators/half_float_ops.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/half_float_ops.h"
 
 namespace caffe2 {
+namespace {
 OPERATOR_SCHEMA(FloatToHalf).NumInputs(1).NumOutputs(1);
 OPERATOR_SCHEMA(HalfToFloat).NumInputs(1).NumOutputs(1);
 
@@ -21,4 +22,5 @@ class GetHalfToFloatGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(HalfToFloat, GetHalfToFloatGradient);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/half_float_ops.cu b/caffe2/operators/half_float_ops.cu
index fb1cd16db4..ca0be70cb0 100644
--- a/caffe2/operators/half_float_ops.cu
+++ b/caffe2/operators/half_float_ops.cu
@@ -51,8 +51,10 @@ bool HalfToFloatOp<CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(FloatToHalf, FloatToHalfOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(HalfToFloat, HalfToFloatOp<CUDAContext>);
+} // namespace
 } // namespace caffe2
 
 #endif // CAFFE_HAS_CUDA_FP16
diff --git a/caffe2/operators/im2col_op.cc b/caffe2/operators/im2col_op.cc
index ecc62b334b..d27992dbc7 100644
--- a/caffe2/operators/im2col_op.cc
+++ b/caffe2/operators/im2col_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/im2col_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CPU_OPERATOR(Im2Col, Im2ColOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(Col2Im, Col2ImOp<float, CPUContext>);
 
@@ -80,4 +81,5 @@ OPERATOR_SCHEMA(Im2Col)
 
 OPERATOR_SCHEMA(Col2Im).NumInputs(2).NumOutputs(1);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/im2col_op_gpu.cc b/caffe2/operators/im2col_op_gpu.cc
index ebfec0f198..6cd59a485f 100644
--- a/caffe2/operators/im2col_op_gpu.cc
+++ b/caffe2/operators/im2col_op_gpu.cc
@@ -2,8 +2,8 @@
 #include "caffe2/operators/im2col_op.h"
 
 namespace caffe2 {
-
+namespace {
 REGISTER_CUDA_OPERATOR(Im2Col, Im2ColOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(Col2Im, Col2ImOp<float, CUDAContext>);
-
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/instance_norm_gradient_op.cc b/caffe2/operators/instance_norm_gradient_op.cc
index 0799921400..e785f49138 100644
--- a/caffe2/operators/instance_norm_gradient_op.cc
+++ b/caffe2/operators/instance_norm_gradient_op.cc
@@ -232,6 +232,8 @@ bool InstanceNormGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
   return true;
 }
 
+namespace {
+
 class GetInstanceNormGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
@@ -258,3 +260,4 @@ OPERATOR_SCHEMA(InstanceNormGradient).NumInputs(4, 6).NumOutputs(3);
 
 REGISTER_GRADIENT(InstanceNorm, GetInstanceNormGradient);
 }
+}
diff --git a/caffe2/operators/instance_norm_op.cc b/caffe2/operators/instance_norm_op.cc
index 6ef62803c5..43098f013a 100644
--- a/caffe2/operators/instance_norm_op.cc
+++ b/caffe2/operators/instance_norm_op.cc
@@ -97,6 +97,8 @@ bool InstanceNormOp<T, Context>::RunOnDeviceWithOrderNCHW() {
   return true;
 }
 
+namespace {
+
 REGISTER_CPU_OPERATOR(InstanceNorm, InstanceNormOp<float, CPUContext>);
 
 OPERATOR_SCHEMA(InstanceNorm)
@@ -142,4 +144,7 @@ computation.
         "Optional saved inverse stdev used during training to speed up "
         "gradient computation. Should not be used for testing.");
 
+
+
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/leaky_relu_op.cc b/caffe2/operators/leaky_relu_op.cc
index 8274927929..a039766d83 100644
--- a/caffe2/operators/leaky_relu_op.cc
+++ b/caffe2/operators/leaky_relu_op.cc
@@ -31,6 +31,8 @@ bool LeakyReluGradientOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
+
 REGISTER_CPU_OPERATOR(LeakyRelu, LeakyReluOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(
     LeakyReluGradient,
@@ -68,4 +70,5 @@ class GetLeakyReluGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(LeakyRelu, GetLeakyReluGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/leaky_relu_op.cu b/caffe2/operators/leaky_relu_op.cu
index ece07b786a..bf90846522 100644
--- a/caffe2/operators/leaky_relu_op.cu
+++ b/caffe2/operators/leaky_relu_op.cu
@@ -60,8 +60,10 @@ bool LeakyReluGradientOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(LeakyRelu, LeakyReluOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(
     LeakyReluGradient,
     LeakyReluGradientOp<float, CUDAContext>);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/load_save_op.cc b/caffe2/operators/load_save_op.cc
index 43cf403cc9..93243b1758 100644
--- a/caffe2/operators/load_save_op.cc
+++ b/caffe2/operators/load_save_op.cc
@@ -9,6 +9,7 @@ void LoadOp<CPUContext>::SetCurrentDevice(BlobProto* proto) {
   }
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(DBExists, DBExistsOp<CPUContext>);
 REGISTER_CPU_OPERATOR(Load, LoadOp<CPUContext>);
 REGISTER_CPU_OPERATOR(Save, SaveOp<CPUContext>);
@@ -128,11 +129,10 @@ counter). This is determined whether we need to do checkpointing.
         "(int, default 1) the checkpointing is carried out when "
         "(iter mod every) is zero.");
 
-OPERATOR_SCHEMA(Snapshot);
-
 NO_GRADIENT(Load);
 SHOULD_NOT_DO_GRADIENT(DBExists);
 SHOULD_NOT_DO_GRADIENT(Save);
 SHOULD_NOT_DO_GRADIENT(Checkpoint);
 SHOULD_NOT_DO_GRADIENT(Snapshot);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/load_save_op_gpu.cc b/caffe2/operators/load_save_op_gpu.cc
index 9b7b64d9d4..542569b984 100644
--- a/caffe2/operators/load_save_op_gpu.cc
+++ b/caffe2/operators/load_save_op_gpu.cc
@@ -12,7 +12,9 @@ void LoadOp<CUDAContext>::SetCurrentDevice(BlobProto* proto) {
   }
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(Load, LoadOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(Save, SaveOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(Checkpoint, CheckpointOp<CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/local_response_normalization_op.cc b/caffe2/operators/local_response_normalization_op.cc
index 9080b7b456..56d7bc9c96 100644
--- a/caffe2/operators/local_response_normalization_op.cc
+++ b/caffe2/operators/local_response_normalization_op.cc
@@ -227,6 +227,7 @@ bool LRNGradientOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(LRN, LRNOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(LRNGradient, LRNGradientOp<float, CPUContext>);
 
@@ -250,4 +251,5 @@ class GetLRNGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(LRN, GetLRNGradient);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/local_response_normalization_op.cu b/caffe2/operators/local_response_normalization_op.cu
index 160153df9d..e511abb238 100644
--- a/caffe2/operators/local_response_normalization_op.cu
+++ b/caffe2/operators/local_response_normalization_op.cu
@@ -288,7 +288,9 @@ bool LRNGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
 }
 
 
+namespace {
 REGISTER_CUDA_OPERATOR(LRN, LRNOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(LRNGradient, LRNGradientOp<float, CUDAContext>);
+}
 
 }  // namespace caffe2
diff --git a/caffe2/operators/log_op.cc b/caffe2/operators/log_op.cc
index 73245424a7..d29316167b 100644
--- a/caffe2/operators/log_op.cc
+++ b/caffe2/operators/log_op.cc
@@ -12,6 +12,7 @@ struct LogCPUFunctor {
   }
 };
 
+namespace {
 REGISTER_CPU_OPERATOR(
     Log,
     UnaryElementwiseOp<TensorTypes<float>, CPUContext, LogCPUFunctor>);
@@ -45,4 +46,5 @@ class GetLogGradient : public GradientMakerBase {
 };
 REGISTER_GRADIENT(Log, GetLogGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/log_op.cu b/caffe2/operators/log_op.cu
index 3fb422cd7a..57fa89a44b 100644
--- a/caffe2/operators/log_op.cu
+++ b/caffe2/operators/log_op.cu
@@ -11,7 +11,10 @@ struct LogCUDAFunctor {
   }
 };
 
+namespace {
+
 REGISTER_CUDA_OPERATOR(
     Log,
     UnaryElementwiseOp<TensorTypes<float>, CUDAContext, LogCUDAFunctor>);
 }
+}
diff --git a/caffe2/operators/loss_op.cc b/caffe2/operators/loss_op.cc
index 67e0f5d1db..96c8aa19e9 100644
--- a/caffe2/operators/loss_op.cc
+++ b/caffe2/operators/loss_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/loss_op.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(AveragedLoss, AveragedLoss<float, CPUContext>);
 REGISTER_CPU_OPERATOR(AveragedLossGradient,
@@ -31,4 +32,5 @@ class GetAveragedLossGradient : public GradientMakerBase {
 };
 REGISTER_GRADIENT(AveragedLoss, GetAveragedLossGradient);
 
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/loss_op.cu b/caffe2/operators/loss_op.cu
index 6ad56d21a2..9fb1cb62a6 100644
--- a/caffe2/operators/loss_op.cu
+++ b/caffe2/operators/loss_op.cu
@@ -2,8 +2,10 @@
 #include "caffe2/operators/loss_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(AveragedLoss, AveragedLoss<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(
     AveragedLossGradient,
     AveragedLossGradient<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/lp_pool_op.cc b/caffe2/operators/lp_pool_op.cc
index 82351481a5..a50ef521f1 100644
--- a/caffe2/operators/lp_pool_op.cc
+++ b/caffe2/operators/lp_pool_op.cc
@@ -221,6 +221,8 @@ bool PoolGradientOp<float, CPUContext, LpPool>::RunOnDeviceWithOrderNHWC() {
   return true;
 }
 
+namespace {
+
 REGISTER_CPU_OPERATOR(LpPool, PoolOp<float, CPUContext, LpPool>);
 REGISTER_CPU_OPERATOR(
     LpPoolGradient,
@@ -268,3 +270,4 @@ class GetPoolGradient : public GradientMakerBase {
 };
 REGISTER_GRADIENT(LpPool, GetPoolGradient);
 }
+}
diff --git a/caffe2/operators/lp_pool_op.cu b/caffe2/operators/lp_pool_op.cu
index 53f6110294..06a34130ff 100644
--- a/caffe2/operators/lp_pool_op.cu
+++ b/caffe2/operators/lp_pool_op.cu
@@ -368,8 +368,10 @@ bool PoolGradientOp<float, CUDAContext, LpPool>::
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(LpPool, PoolOp<float, CUDAContext, LpPool>);
 REGISTER_CUDA_OPERATOR(
     LpPoolGradient,
     PoolGradientOp<float, CUDAContext, LpPool>);
 }
+}
diff --git a/caffe2/operators/lstm_unit_op.cc b/caffe2/operators/lstm_unit_op.cc
index 31db8d4b3c..9e74dd7dfb 100644
--- a/caffe2/operators/lstm_unit_op.cc
+++ b/caffe2/operators/lstm_unit_op.cc
@@ -1,6 +1,7 @@
 #include "lstm_unit_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CPU_OPERATOR(LSTMUnit, LSTMUnitOp<float, CPUContext>);
 OPERATOR_SCHEMA(LSTMUnit)
     .NumInputs(5)
@@ -31,3 +32,4 @@ class GetLSTMUnitGradient : public GradientMakerBase {
 };
 REGISTER_GRADIENT(LSTMUnit, GetLSTMUnitGradient);
 }
+}
diff --git a/caffe2/operators/lstm_unit_op_gpu.cu b/caffe2/operators/lstm_unit_op_gpu.cu
index 73e8f371e2..e772046e6a 100644
--- a/caffe2/operators/lstm_unit_op_gpu.cu
+++ b/caffe2/operators/lstm_unit_op_gpu.cu
@@ -177,8 +177,10 @@ void LSTMUnitGradient<float, CUDAContext>(
 }
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(LSTMUnit, LSTMUnitOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(
     LSTMUnitGradient,
     LSTMUnitGradientOp<float, CUDAContext>);
 }
+}
diff --git a/caffe2/operators/math_ops.cc b/caffe2/operators/math_ops.cc
index 0e0a07a483..e64b0ba453 100644
--- a/caffe2/operators/math_ops.cc
+++ b/caffe2/operators/math_ops.cc
@@ -11,6 +11,7 @@ struct SqrCPUFunctor {
   }
 };
 
+namespace {
 REGISTER_CPU_OPERATOR(
     Sqr,
     UnaryElementwiseOp<TensorTypes<float>, CPUContext, SqrCPUFunctor>);
@@ -99,4 +100,5 @@ class GetPowGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(Pow, GetPowGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/math_ops.cu b/caffe2/operators/math_ops.cu
index 28eddb0b50..547e98721c 100644
--- a/caffe2/operators/math_ops.cu
+++ b/caffe2/operators/math_ops.cu
@@ -11,9 +11,12 @@ struct SqrCUDAFunctor {
   }
 };
 
+namespace {
+
 REGISTER_CUDA_OPERATOR(
     Sqr,
     UnaryElementwiseOp<TensorTypes<float>, CUDAContext, SqrCUDAFunctor>);
+}
 REGISTER_CUDA_OPERATOR(
     Pow,
     UnaryElementwiseWithArgsOp<TensorTypes<float>, CUDAContext, PowFunctor>);
diff --git a/caffe2/operators/matmul_op.cc b/caffe2/operators/matmul_op.cc
index f4485ea3f7..eb4f03390e 100644
--- a/caffe2/operators/matmul_op.cc
+++ b/caffe2/operators/matmul_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/matmul_op.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(MatMul, MatMulOp<float, CPUContext>);
 
@@ -132,4 +133,5 @@ class GetMatMulGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(MatMul, GetMatMulGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/matmul_op_gpu.cc b/caffe2/operators/matmul_op_gpu.cc
index 973bb26282..ac7f44dee2 100644
--- a/caffe2/operators/matmul_op_gpu.cc
+++ b/caffe2/operators/matmul_op_gpu.cc
@@ -3,7 +3,9 @@
 #include "caffe2/core/context_gpu.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CUDA_OPERATOR(MatMul, MatMulOp<float, CUDAContext>);
 
 }
+}
diff --git a/caffe2/operators/max_pool_with_index.cu b/caffe2/operators/max_pool_with_index.cu
index b8e6d2b469..321b26a5b5 100644
--- a/caffe2/operators/max_pool_with_index.cu
+++ b/caffe2/operators/max_pool_with_index.cu
@@ -219,8 +219,6 @@ class GetMaxPoolWithIndexGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(MaxPoolWithIndex, GetMaxPoolWithIndexGradient);
 
-OPERATOR_SCHEMA(MaxPoolWithIndexGradient);
-
 OPERATOR_SCHEMA(MaxPoolWithIndex)
     .NumInputs(1)
     .NumOutputs(2)
diff --git a/caffe2/operators/multi_class_accuracy_op.cc b/caffe2/operators/multi_class_accuracy_op.cc
index 9eda6fbe2c..04630cd31c 100644
--- a/caffe2/operators/multi_class_accuracy_op.cc
+++ b/caffe2/operators/multi_class_accuracy_op.cc
@@ -52,6 +52,7 @@ bool MultiClassAccuracyOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(
   MultiClassAccuracy, MultiClassAccuracyOp<float, CPUContext>);
 
@@ -83,4 +84,5 @@ and predicted scores of each class for each instance.
     "1-D int tensor (D,) of number of instances for each class in the batch.");
 
 SHOULD_NOT_DO_GRADIENT(MultiClassAccuracy);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/multi_class_accuracy_op.cu b/caffe2/operators/multi_class_accuracy_op.cu
index f5672146fd..7f3caec530 100644
--- a/caffe2/operators/multi_class_accuracy_op.cu
+++ b/caffe2/operators/multi_class_accuracy_op.cu
@@ -65,6 +65,8 @@ bool MultiClassAccuracyOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(
   MultiClassAccuracy, MultiClassAccuracyOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/negative_op.cc b/caffe2/operators/negative_op.cc
index fcfe452a9b..ab52344e9b 100644
--- a/caffe2/operators/negative_op.cc
+++ b/caffe2/operators/negative_op.cc
@@ -13,6 +13,7 @@ struct NegativeCPUFunctor {
   }
 };
 
+namespace {
 REGISTER_CPU_OPERATOR(
     Negative, UnaryElementwiseOp<
         TensorTypes<float, double, int, long>, CPUContext, NegativeCPUFunctor>);
@@ -39,4 +40,5 @@ class GetNegativeGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(Negative, GetNegativeGradient);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/negative_op.cu b/caffe2/operators/negative_op.cu
index 1bd5cea474..cec9499e3b 100644
--- a/caffe2/operators/negative_op.cu
+++ b/caffe2/operators/negative_op.cu
@@ -20,8 +20,10 @@ struct NegativeCUDAFunctor {
   }
 };
 
+namespace {
 REGISTER_CUDA_OPERATOR(
     Negative, UnaryElementwiseOp<
         TensorTypes<float, double, int, long>, CUDAContext,
         NegativeCUDAFunctor>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/order_switch_ops.cc b/caffe2/operators/order_switch_ops.cc
index cd046d1dfc..74a205e9dd 100644
--- a/caffe2/operators/order_switch_ops.cc
+++ b/caffe2/operators/order_switch_ops.cc
@@ -45,6 +45,7 @@ bool NCHW2NHWCOp<float, CPUContext>::RunOnDevice() {
 }
 
 
+namespace {
 REGISTER_CPU_OPERATOR(NHWC2NCHW, NHWC2NCHWOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(NCHW2NHWC, NCHW2NHWCOp<float, CPUContext>);
 
@@ -100,4 +101,5 @@ class GetNCHW2NHWCGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(NCHW2NHWC, GetNCHW2NHWCGradient);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/order_switch_ops.cu b/caffe2/operators/order_switch_ops.cu
index 2d77b5da85..31e7854ce7 100644
--- a/caffe2/operators/order_switch_ops.cu
+++ b/caffe2/operators/order_switch_ops.cu
@@ -50,6 +50,8 @@ bool NCHW2NHWCOp<float, CUDAContext>::RunOnDevice() {
 }
 
 
+namespace {
 REGISTER_CUDA_OPERATOR(NHWC2NCHW, NHWC2NCHWOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(NCHW2NHWC, NCHW2NHWCOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/pack_segments.cc b/caffe2/operators/pack_segments.cc
index 36df8fd1ee..9c651169a9 100644
--- a/caffe2/operators/pack_segments.cc
+++ b/caffe2/operators/pack_segments.cc
@@ -2,6 +2,8 @@
 
 namespace caffe2 {
 
+namespace {
+
 REGISTER_CPU_OPERATOR(PackSegments, PackSegmentsOp<CPUContext>);
 REGISTER_CPU_OPERATOR(UnpackSegments, UnpackSegmentsOp<CPUContext>);
 
@@ -56,4 +58,5 @@ class GetUnpackSegmentsGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(UnpackSegments, GetUnpackSegmentsGradient);
+}
 } // namespace
diff --git a/caffe2/operators/pack_segments_op_gpu.cc b/caffe2/operators/pack_segments_op_gpu.cc
index 8a30582a59..86a19a94f5 100644
--- a/caffe2/operators/pack_segments_op_gpu.cc
+++ b/caffe2/operators/pack_segments_op_gpu.cc
@@ -3,8 +3,10 @@
 #include "caffe2/operators/pack_segments.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(PackSegments, GPUFallbackOp<PackSegmentsOp<CPUContext>>);
 REGISTER_CUDA_OPERATOR(
     UnpackSegments,
     GPUFallbackOp<UnpackSegmentsOp<CPUContext>>);
 }
+}
diff --git a/caffe2/operators/perplexity_op.cc b/caffe2/operators/perplexity_op.cc
index a7c4d52285..2d461158aa 100644
--- a/caffe2/operators/perplexity_op.cc
+++ b/caffe2/operators/perplexity_op.cc
@@ -21,6 +21,7 @@ bool PerplexityOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(Perplexity, PerplexityOp<float, CPUContext>);
 
 OPERATOR_SCHEMA(Perplexity).NumInputs(1).NumOutputs(1)
@@ -37,4 +38,5 @@ single (float) perplexity value for the batch.
         "batch");
 
 SHOULD_NOT_DO_GRADIENT(Perplexity);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/perplexity_op.cu b/caffe2/operators/perplexity_op.cu
index afb4d3dc27..199b6124db 100644
--- a/caffe2/operators/perplexity_op.cu
+++ b/caffe2/operators/perplexity_op.cu
@@ -42,5 +42,7 @@ bool PerplexityOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(Perplexity, PerplexityOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/piecewise_linear_transform_op.cc b/caffe2/operators/piecewise_linear_transform_op.cc
index 528b33619b..5fa493804c 100644
--- a/caffe2/operators/piecewise_linear_transform_op.cc
+++ b/caffe2/operators/piecewise_linear_transform_op.cc
@@ -2,6 +2,7 @@
 
 namespace caffe2 {
 
+namespace {
 REGISTER_CPU_OPERATOR(
     PiecewiseLinearTransform,
     PiecewiseLinearTransformOp<float, CPUContext>);
@@ -81,4 +82,5 @@ bound.
         "containing transformed predictions");
 
 SHOULD_NOT_DO_GRADIENT(PiecewiseLinearTransform);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/piecewise_linear_transform_op.cu b/caffe2/operators/piecewise_linear_transform_op.cu
index 877b795c19..fe6269f0ac 100644
--- a/caffe2/operators/piecewise_linear_transform_op.cu
+++ b/caffe2/operators/piecewise_linear_transform_op.cu
@@ -276,8 +276,10 @@ bool PiecewiseLinearTransformOp<float, CUDAContext>::TransformBinary() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(
     PiecewiseLinearTransform,
     PiecewiseLinearTransformOp<float, CUDAContext>);
+} // namespace
 
 } // namespace caffe2
diff --git a/caffe2/operators/pool_gradient_op.cc b/caffe2/operators/pool_gradient_op.cc
index 53489e1eed..fb177d0807 100644
--- a/caffe2/operators/pool_gradient_op.cc
+++ b/caffe2/operators/pool_gradient_op.cc
@@ -315,6 +315,8 @@ bool PoolGradientOp<T, Context, PoolType>::RunOnDeviceWithOrderNHWC() {
   return true;
 }
 
+namespace {
+
 REGISTER_CPU_OPERATOR(
     AveragePoolGradient,
     PoolGradientOp<float, CPUContext, AveragePool<float>>);
@@ -338,3 +340,4 @@ class GetPoolGradient : public GradientMakerBase {
 REGISTER_GRADIENT(AveragePool, GetPoolGradient);
 REGISTER_GRADIENT(MaxPool, GetPoolGradient);
 }
+}
diff --git a/caffe2/operators/pool_op.cc b/caffe2/operators/pool_op.cc
index fa2754a97b..b12c15f146 100644
--- a/caffe2/operators/pool_op.cc
+++ b/caffe2/operators/pool_op.cc
@@ -459,6 +459,7 @@ bool PoolOp<T, Context, PoolType>::RunOnDeviceWithOrderNHWC() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(
     AveragePool,
     PoolOp<float, CPUContext, AveragePool<float>>);
@@ -507,4 +508,5 @@ data into the output blob Y for further processing.
   "tensor. Dimensions will vary based on various kernel, stride, and pad "
   "sizes.");
 
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/pool_op.cu b/caffe2/operators/pool_op.cu
index 9a64524264..03bfa9da0b 100644
--- a/caffe2/operators/pool_op.cu
+++ b/caffe2/operators/pool_op.cu
@@ -1726,10 +1726,12 @@ bool PoolGradientOp<float, CUDAContext, MaxPool>::RunOnDeviceWithOrderNHWC() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(AveragePool, PoolOp<float, CUDAContext, AveragePool>);
 REGISTER_CUDA_OPERATOR(AveragePoolGradient,
                        PoolGradientOp<float, CUDAContext, AveragePool>);
 REGISTER_CUDA_OPERATOR(MaxPool, PoolOp<float, CUDAContext, MaxPool>);
 REGISTER_CUDA_OPERATOR(MaxPoolGradient,
                        PoolGradientOp<float, CUDAContext, MaxPool>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/prelu_op.cc b/caffe2/operators/prelu_op.cc
index 2d060eef59..641e880f78 100644
--- a/caffe2/operators/prelu_op.cc
+++ b/caffe2/operators/prelu_op.cc
@@ -251,6 +251,7 @@ bool PReluGradientOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(PRelu, PReluOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(PReluGradient, PReluGradientOp<float, CPUContext>);
 
@@ -295,4 +296,5 @@ class GetPReluGradient : public GradientMakerBase {
 };
 REGISTER_GRADIENT(PRelu, GetPReluGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/prelu_op.cu b/caffe2/operators/prelu_op.cu
index b14393d81b..4d1f177f3a 100644
--- a/caffe2/operators/prelu_op.cu
+++ b/caffe2/operators/prelu_op.cu
@@ -276,6 +276,8 @@ bool PReluGradientOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(PRelu, PReluOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(PReluGradient, PReluGradientOp<float, CUDAContext>);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/recurrent_network_op.cc b/caffe2/operators/recurrent_network_op.cc
index dd4fded621..85520eaa7d 100644
--- a/caffe2/operators/recurrent_network_op.cc
+++ b/caffe2/operators/recurrent_network_op.cc
@@ -4,6 +4,7 @@
 namespace caffe2 {
 CAFFE_KNOWN_TYPE(detail::ScratchWorkspaces);
 
+namespace {
 REGISTER_CPU_OPERATOR(RecurrentNetwork, RecurrentNetworkOp<float, CPUContext>);
 OPERATOR_SCHEMA(RecurrentNetwork)
     .NumInputs(1, INT_MAX)
@@ -87,6 +88,7 @@ struct GetRecurrentNetworkGradient : public GradientMakerBase {
 };
 
 REGISTER_GRADIENT(RecurrentNetwork, GetRecurrentNetworkGradient);
+}
 
 namespace detail {
 void extractLinks(
diff --git a/caffe2/operators/recurrent_network_op_gpu.cc b/caffe2/operators/recurrent_network_op_gpu.cc
index 5093cccd57..a658e6c2af 100644
--- a/caffe2/operators/recurrent_network_op_gpu.cc
+++ b/caffe2/operators/recurrent_network_op_gpu.cc
@@ -2,6 +2,7 @@
 #include "caffe2/operators/recurrent_network_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(
     RecurrentNetwork,
     RecurrentNetworkOp<float, CUDAContext>);
@@ -13,3 +14,4 @@ REGISTER_CUDA_OPERATOR(
     rnn_internal_accumulate_gradient_input,
     AccumulateInputGradientOp<float, CUDAContext>);
 }
+}
diff --git a/caffe2/operators/reduction_ops.cc b/caffe2/operators/reduction_ops.cc
index 0228c62bdd..5828d4af91 100644
--- a/caffe2/operators/reduction_ops.cc
+++ b/caffe2/operators/reduction_ops.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/reduction_ops.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(SumElements, SumElementsOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(SumSqrElements, SumSqrElementsOp<float, CPUContext>);
@@ -73,8 +74,6 @@ class GetRowwiseMaxGradient : public GradientMakerBase {
 };
 REGISTER_GRADIENT(RowwiseMax, GetRowwiseMaxGradient);
 
-OPERATOR_SCHEMA(ColwiseMaxGradient);
-
 OPERATOR_SCHEMA(ColwiseMax)
     .NumInputs(1)
     .NumOutputs(1)
@@ -97,6 +96,7 @@ class GetColwiseMaxGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(ColwiseMax, GetColwiseMaxGradient);
+} // namespace
 
 template <typename T, class Context>
 bool SumElementsGradientOp<T, Context>::RunOnDevice() {
diff --git a/caffe2/operators/reduction_ops.cu b/caffe2/operators/reduction_ops.cu
index 83b87f4246..902a9d5987 100644
--- a/caffe2/operators/reduction_ops.cu
+++ b/caffe2/operators/reduction_ops.cu
@@ -5,6 +5,7 @@
 #include <cub/cub.cuh>
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CUDA_OPERATOR(SumElements, SumElementsOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(SumSqrElements, SumSqrElementsOp<float, CUDAContext>);
@@ -71,6 +72,7 @@ __global__ void colwise_max_gradient_kernel(
     }
   }
 }
+} // namespace
 
 template <>
 bool SumElementsGradientOp<float, CUDAContext>::RunOnDevice() {
diff --git a/caffe2/operators/relu_op.cc b/caffe2/operators/relu_op.cc
index da71680933..5551262229 100644
--- a/caffe2/operators/relu_op.cc
+++ b/caffe2/operators/relu_op.cc
@@ -51,6 +51,7 @@ bool ReluGradientOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(Relu, ReluOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(ReluGradient, ReluGradientOp<float, CPUContext>);
 
@@ -91,4 +92,5 @@ class GetReluGradient : public GradientMakerBase {
 REGISTER_GRADIENT(Relu, GetReluGradient);
 REGISTER_GRADIENT(ReluFp16, GetReluGradient);
 
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/relu_op.cu b/caffe2/operators/relu_op.cu
index 382fae525c..5e3d405e4f 100644
--- a/caffe2/operators/relu_op.cu
+++ b/caffe2/operators/relu_op.cu
@@ -45,6 +45,8 @@ bool ReluGradientOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(Relu, ReluOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(ReluGradient, ReluGradientOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/relu_op_fp16.cu b/caffe2/operators/relu_op_fp16.cu
index 9f3354f801..4a08dd7d89 100644
--- a/caffe2/operators/relu_op_fp16.cu
+++ b/caffe2/operators/relu_op_fp16.cu
@@ -80,11 +80,10 @@ bool ReluGradientOp<float16, CUDAContext>::RunOnDevice() {
   return true;
 }
 
-OPERATOR_SCHEMA(ReluFp16);
-OPERATOR_SCHEMA(ReluFp16Gradient);
-
+namespace {
 REGISTER_CUDA_OPERATOR(ReluFp16, ReluOp<float16, CUDAContext>);
 REGISTER_CUDA_OPERATOR(ReluFp16Gradient, ReluGradientOp<float16, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
 
 #endif  // CAFFE_HAS_CUDA_FP16
diff --git a/caffe2/operators/reshape_op.cc b/caffe2/operators/reshape_op.cc
index 67288bac04..0837495d9c 100644
--- a/caffe2/operators/reshape_op.cc
+++ b/caffe2/operators/reshape_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/reshape_op.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(Reshape, ReshapeOp<float, CPUContext>);
 
@@ -139,4 +140,6 @@ class GetReshapeGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(Reshape, GetReshapeGradient);
 
+} // namespace
+
 } // namespace caffe2
diff --git a/caffe2/operators/reshape_op_gpu.cc b/caffe2/operators/reshape_op_gpu.cc
index 8704655579..1253e53ce3 100644
--- a/caffe2/operators/reshape_op_gpu.cc
+++ b/caffe2/operators/reshape_op_gpu.cc
@@ -3,6 +3,9 @@
 
 namespace caffe2 {
 
+namespace {
+
 REGISTER_CUDA_OPERATOR(Reshape, ReshapeOp<float, CUDAContext>);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/resize_op.cc b/caffe2/operators/resize_op.cc
index 77b1c4d4a9..6892976bf4 100644
--- a/caffe2/operators/resize_op.cc
+++ b/caffe2/operators/resize_op.cc
@@ -135,6 +135,8 @@ bool ResizeNearestGradientOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
+
 REGISTER_CPU_OPERATOR(ResizeNearest, ResizeNearestOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(ResizeNearestGradient,
                       ResizeNearestGradientOp<float, CPUContext>);
@@ -173,4 +175,5 @@ class GetResizeNearestGradient : public GradientMakerBase {
 };
 REGISTER_GRADIENT(ResizeNearest, GetResizeNearestGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/resize_op.cu b/caffe2/operators/resize_op.cu
index 1b48ade776..6dddc9247d 100644
--- a/caffe2/operators/resize_op.cu
+++ b/caffe2/operators/resize_op.cu
@@ -135,8 +135,10 @@ bool ResizeNearestGradientOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(ResizeNearest, ResizeNearestOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(
     ResizeNearestGradient,
     ResizeNearestGradientOp<float, CUDAContext>);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/reverse_packed_segs_op.cc b/caffe2/operators/reverse_packed_segs_op.cc
index 0191c61f8a..7a1648f334 100644
--- a/caffe2/operators/reverse_packed_segs_op.cc
+++ b/caffe2/operators/reverse_packed_segs_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/reverse_packed_segs_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CPU_OPERATOR(ReversePackedSegs, ReversePackedSegsOp<CPUContext>);
 
 OPERATOR_SCHEMA(ReversePackedSegs)
@@ -30,4 +31,5 @@ class GetReversePackedSegsGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(ReversePackedSegs, GetReversePackedSegsGradient);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/reverse_packed_segs_op.cu b/caffe2/operators/reverse_packed_segs_op.cu
index fdcffc66c2..733eba8aef 100644
--- a/caffe2/operators/reverse_packed_segs_op.cu
+++ b/caffe2/operators/reverse_packed_segs_op.cu
@@ -87,5 +87,8 @@ void ReversePackedSegsOp<CUDAContext>::DoRunWithLengthType() {
         rev_data_ptr);
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(ReversePackedSegs, ReversePackedSegsOp<CUDAContext>);
+
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/roi_pool_op.cc b/caffe2/operators/roi_pool_op.cc
index bb4c67e82c..de24637b9d 100644
--- a/caffe2/operators/roi_pool_op.cc
+++ b/caffe2/operators/roi_pool_op.cc
@@ -113,6 +113,8 @@ bool RoIPoolOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
+
 REGISTER_CPU_OPERATOR(RoIPool, RoIPoolOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(RoIPoolGradient, RoIPoolGradientOp<float, CPUContext>);
 
@@ -203,4 +205,5 @@ class GetRoIPoolGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(RoIPool, GetRoIPoolGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/roi_pool_op.cu b/caffe2/operators/roi_pool_op.cu
index c591109070..2a9d750454 100644
--- a/caffe2/operators/roi_pool_op.cu
+++ b/caffe2/operators/roi_pool_op.cu
@@ -202,7 +202,10 @@ bool RoIPoolGradientOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
+
 REGISTER_CUDA_OPERATOR(RoIPool, RoIPoolOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(RoIPoolGradient, RoIPoolGradientOp<float, CUDAContext>);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/segment_reduction_op_gpu.cu b/caffe2/operators/segment_reduction_op_gpu.cu
index f4be21ae38..b491e8505e 100644
--- a/caffe2/operators/segment_reduction_op_gpu.cu
+++ b/caffe2/operators/segment_reduction_op_gpu.cu
@@ -185,29 +185,27 @@ class ReduceDimsGradientOp : public Operator<CUDAContext> {
   Tensor<CPUContext> shape_;
 };
 
-REGISTER_CUDA_OPERATOR_STR("ReduceFrontSum", ReduceDimsOp<float, CUDAContext>);
-REGISTER_CUDA_OPERATOR_STR(
-    "ReduceFrontSumGradient",
+REGISTER_CUDA_OPERATOR(ReduceFrontSum, ReduceDimsOp<float, CUDAContext>);
+REGISTER_CUDA_OPERATOR(
+    ReduceFrontSumGradient,
     ReduceDimsGradientOp<float, CUDAContext>);
-REGISTER_CUDA_OPERATOR_STR(
-    "ReduceFrontMean",
+REGISTER_CUDA_OPERATOR(
+    ReduceFrontMean,
     ReduceDimsOp<float, CUDAContext, true, true>);
-REGISTER_CUDA_OPERATOR_STR(
-    "ReduceFrontMeanGradient",
+REGISTER_CUDA_OPERATOR(
+    ReduceFrontMeanGradient,
     ReduceDimsGradientOp<float, CUDAContext, true, true>);
 
-REGISTER_CUDA_OPERATOR_STR(
-    "ReduceBackSum",
-    ReduceDimsOp<float, CUDAContext, false>);
-REGISTER_CUDA_OPERATOR_STR(
-    "ReduceBackSumGradient",
+REGISTER_CUDA_OPERATOR(ReduceBackSum, ReduceDimsOp<float, CUDAContext, false>);
+REGISTER_CUDA_OPERATOR(
+    ReduceBackSumGradient,
     ReduceDimsGradientOp<float, CUDAContext, false, false>);
 
-REGISTER_CUDA_OPERATOR_STR(
-    "ReduceBackMean",
+REGISTER_CUDA_OPERATOR(
+    ReduceBackMean,
     ReduceDimsOp<float, CUDAContext, false, true>);
-REGISTER_CUDA_OPERATOR_STR(
-    "ReduceBackMeanGradient",
+REGISTER_CUDA_OPERATOR(
+    ReduceBackMeanGradient,
     ReduceDimsGradientOp<float, CUDAContext, false, true>);
 
 namespace {
@@ -670,23 +668,23 @@ class CUDAUnsortedSegmentSumOp : public Operator<CUDAContext> {
   Tensor<CUDAContext> scaling_factors_; // for mean
 };
 
-REGISTER_CUDA_OPERATOR_STR(
-    "LengthsSum",
+REGISTER_CUDA_OPERATOR(
+    LengthsSum,
     CUDASparseLengthsSumOp<float, CUDAContext, false>);
-REGISTER_CUDA_OPERATOR_STR(
-    "SparseLengthsSum",
+REGISTER_CUDA_OPERATOR(
+    SparseLengthsSum,
     CUDASparseLengthsSumOp<float, CUDAContext, true>);
-REGISTER_CUDA_OPERATOR_STR(
-    "LengthsSumGradient",
+REGISTER_CUDA_OPERATOR(
+    LengthsSumGradient,
     CUDASparseLengthsSumGradientOp<float, CUDAContext>);
-REGISTER_CUDA_OPERATOR_STR(
-    "SparseLengthsSumGradient",
+REGISTER_CUDA_OPERATOR(
+    SparseLengthsSumGradient,
     CUDASparseLengthsSumGradientOp<float, CUDAContext>);
-REGISTER_CUDA_OPERATOR_STR(
-    "UnsortedSegmentSum",
+REGISTER_CUDA_OPERATOR(
+    UnsortedSegmentSum,
     CUDAUnsortedSegmentSumOp<float, int, false>);
-REGISTER_CUDA_OPERATOR_STR(
-    "UnsortedSegmentMean",
+REGISTER_CUDA_OPERATOR(
+    UnsortedSegmentMean,
     CUDAUnsortedSegmentSumOp<float, int, true>);
 
 } // namespace caffe2
diff --git a/caffe2/operators/sigmoid_op.cc b/caffe2/operators/sigmoid_op.cc
index cd8b2681a9..2734f003f9 100644
--- a/caffe2/operators/sigmoid_op.cc
+++ b/caffe2/operators/sigmoid_op.cc
@@ -21,6 +21,7 @@ struct SigmoidGradientCPUFunctor {
   }
 };
 
+namespace {
 REGISTER_CPU_OPERATOR(
     Sigmoid, UnaryElementwiseOp<
         TensorTypes<float>, CPUContext, SigmoidCPUFunctor>);
@@ -64,4 +65,5 @@ class GetSigmoidGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(Sigmoid, GetSigmoidGradient);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/sigmoid_op.cu b/caffe2/operators/sigmoid_op.cu
index cacf6eb05c..06d7a7ffc9 100644
--- a/caffe2/operators/sigmoid_op.cu
+++ b/caffe2/operators/sigmoid_op.cu
@@ -40,6 +40,7 @@ struct SigmoidGradientCUDAFunctor {
   }
 };
 
+namespace {
 REGISTER_CUDA_OPERATOR(
     Sigmoid,
     UnaryElementwiseOp<TensorTypes<float>, CUDAContext, SigmoidCUDAFunctor>);
@@ -47,4 +48,5 @@ REGISTER_CUDA_OPERATOR(
     SigmoidGradient, BinaryElementwiseOp<
         TensorTypes<float>, CUDAContext,
         WithoutBroadcast<SigmoidGradientCUDAFunctor>>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/sin_op.cc b/caffe2/operators/sin_op.cc
index 8aa9cf77ca..331ab3300a 100644
--- a/caffe2/operators/sin_op.cc
+++ b/caffe2/operators/sin_op.cc
@@ -21,6 +21,7 @@ struct SinGradientCPUFunctor {
   }
 };
 
+namespace {
 REGISTER_CPU_OPERATOR(
     Sin,
     UnaryElementwiseOp<TensorTypes<float>, CPUContext, SinCPUFunctor>);
@@ -54,4 +55,5 @@ class GetSinGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(Sin, GetSinGradient);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/sin_op.cu b/caffe2/operators/sin_op.cu
index 59849dcd29..f393cb6725 100644
--- a/caffe2/operators/sin_op.cu
+++ b/caffe2/operators/sin_op.cu
@@ -49,6 +49,7 @@ struct SinGradientCUDAFunctor {
   }
 };
 
+namespace {
 REGISTER_CUDA_OPERATOR(
     Sin,
     UnaryElementwiseOp<TensorTypes<float>, CUDAContext, SinCUDAFunctor>);
@@ -58,4 +59,5 @@ REGISTER_CUDA_OPERATOR(
         TensorTypes<float>,
         CUDAContext,
         WithoutBroadcast<SinGradientCUDAFunctor>>);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/softmax_op.cc b/caffe2/operators/softmax_op.cc
index a8e9550648..fedad4e36a 100644
--- a/caffe2/operators/softmax_op.cc
+++ b/caffe2/operators/softmax_op.cc
@@ -75,6 +75,7 @@ bool SoftmaxGradientOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(Softmax, SoftmaxOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(SoftmaxGradient, SoftmaxGradientOp<float, CPUContext>);
 
@@ -107,4 +108,5 @@ class GetSoftmaxGradient : public GradientMakerBase {
 REGISTER_GRADIENT(Softmax, GetSoftmaxGradient);
 REGISTER_GRADIENT(SoftmaxFp16, GetSoftmaxGradient);
 
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/softmax_ops.cu b/caffe2/operators/softmax_ops.cu
index e7e0a55f17..5391df7423 100644
--- a/caffe2/operators/softmax_ops.cu
+++ b/caffe2/operators/softmax_ops.cu
@@ -763,6 +763,7 @@ bool SoftmaxGradientOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(SoftmaxWithLoss,
                        SoftmaxWithLossOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(SoftmaxWithLossGradient,
@@ -776,4 +777,5 @@ REGISTER_CUDA_OPERATOR(
 REGISTER_CUDA_OPERATOR(Softmax, SoftmaxOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(SoftmaxGradient, SoftmaxGradientOp<float, CUDAContext>);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/softplus_op.cc b/caffe2/operators/softplus_op.cc
index 20c2115c3d..47e576216e 100644
--- a/caffe2/operators/softplus_op.cc
+++ b/caffe2/operators/softplus_op.cc
@@ -35,6 +35,7 @@ bool SoftplusGradientOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(Softplus, SoftplusOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(SoftplusGradient, SoftplusGradientOp<float, CPUContext>);
 
@@ -70,4 +71,5 @@ class GetSoftplusGradient : public GradientMakerBase {
 };
 REGISTER_GRADIENT(Softplus, GetSoftplusGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/softplus_op.cu b/caffe2/operators/softplus_op.cu
index e733c47a6b..84d0d846dd 100644
--- a/caffe2/operators/softplus_op.cu
+++ b/caffe2/operators/softplus_op.cu
@@ -52,8 +52,10 @@ bool SoftplusGradientOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(Softplus, SoftplusOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(
     SoftplusGradient,
     SoftplusGradientOp<float, CUDAContext>);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/softsign_op.cc b/caffe2/operators/softsign_op.cc
index 2cb31a9d24..1f0adb2c7d 100644
--- a/caffe2/operators/softsign_op.cc
+++ b/caffe2/operators/softsign_op.cc
@@ -22,6 +22,7 @@ struct SoftsignGradientCPUFunctor {
   }
 };
 
+namespace {
 REGISTER_CPU_OPERATOR(
     Softsign,
     UnaryElementwiseOp<TensorTypes<float>, CPUContext, SoftsignCPUFunctor>);
@@ -83,4 +84,5 @@ class GetSoftsignGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(Softsign, GetSoftsignGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/softsign_op.cu b/caffe2/operators/softsign_op.cu
index 8fec50c62a..e7cd2e1d48 100644
--- a/caffe2/operators/softsign_op.cu
+++ b/caffe2/operators/softsign_op.cu
@@ -46,10 +46,12 @@ struct SoftsignGradientCUDAFunctor {
   }
 };
 
+namespace {
 REGISTER_CUDA_OPERATOR(
     Softsign,
     UnaryElementwiseOp<TensorTypes<float>, CUDAContext, SoftsignCUDAFunctor>);
 REGISTER_CUDA_OPERATOR(
     SoftsignGradient,
     BinaryElementwiseOp<TensorTypes<float>, CUDAContext, WithoutBroadcast<SoftsignGradientCUDAFunctor>>);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/space_batch_op.cc b/caffe2/operators/space_batch_op.cc
index 56d2aebd24..29e81b14e5 100644
--- a/caffe2/operators/space_batch_op.cc
+++ b/caffe2/operators/space_batch_op.cc
@@ -2,6 +2,8 @@
 
 namespace caffe2 {
 
+namespace {
+
 REGISTER_CPU_OPERATOR(SpaceToBatch, SpaceToBatchOp<CPUContext>);
 OPERATOR_SCHEMA(SpaceToBatch).NumInputs(1).NumOutputs(1).SetDoc(R"DOC(
 
@@ -47,3 +49,4 @@ class GetBatchToSpaceGradient : public GradientMakerBase {
 REGISTER_GRADIENT(SpaceToBatch, GetSpaceToBatchGradient);
 REGISTER_GRADIENT(BatchToSpace, GetBatchToSpaceGradient);
 }
+}
diff --git a/caffe2/operators/space_batch_op_gpu.cu b/caffe2/operators/space_batch_op_gpu.cu
index 862440907f..45df73c094 100644
--- a/caffe2/operators/space_batch_op_gpu.cu
+++ b/caffe2/operators/space_batch_op_gpu.cu
@@ -173,7 +173,9 @@ void batchToSpace(
       output->mutable_data<float>());
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(SpaceToBatch, SpaceToBatchOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(BatchToSpace, BatchToSpaceOp<CUDAContext>);
+}
 
 }
diff --git a/caffe2/operators/spatial_batch_norm_op_cudnn.cc b/caffe2/operators/spatial_batch_norm_op_cudnn.cc
index 16f9c96068..3791eef2fd 100644
--- a/caffe2/operators/spatial_batch_norm_op_cudnn.cc
+++ b/caffe2/operators/spatial_batch_norm_op_cudnn.cc
@@ -333,6 +333,7 @@ bool CudnnSpatialBNGradientOp::RunOnDevice() {
   return true;
 }
 
+namespace {
 // Since there is no default implementation for spatial batch normalization,
 // we will register the cudnn version as the default as well.
 REGISTER_CUDA_OPERATOR(SpatialBN, CudnnSpatialBNOp);
@@ -340,4 +341,5 @@ REGISTER_CUDA_OPERATOR(SpatialBNGradient, CudnnSpatialBNGradientOp);
 
 REGISTER_CUDNN_OPERATOR(SpatialBN, CudnnSpatialBNOp);
 REGISTER_CUDNN_OPERATOR(SpatialBNGradient, CudnnSpatialBNGradientOp);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/stats_ops.cc b/caffe2/operators/stats_ops.cc
index 74debef0e4..0a2732a773 100644
--- a/caffe2/operators/stats_ops.cc
+++ b/caffe2/operators/stats_ops.cc
@@ -5,6 +5,7 @@
 #include "caffe2/core/tensor.h"
 
 namespace caffe2 {
+namespace {
 
 class StatRegistryCreateOp : public Operator<CPUContext> {
  public:
@@ -205,6 +206,7 @@ OPERATOR_SCHEMA(StatRegistryExport)
     .Arg(
         "reset",
         "(default true) Whether to atomically reset the counters afterwards.");
+}
 
 OPERATOR_SCHEMA(TimerBegin)
     .NumInputs(0)
diff --git a/caffe2/operators/summarize_op.cc b/caffe2/operators/summarize_op.cc
index 51d80ae59b..cc9acde0dc 100644
--- a/caffe2/operators/summarize_op.cc
+++ b/caffe2/operators/summarize_op.cc
@@ -42,6 +42,7 @@ bool SummarizeOp<float, CPUContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(Summarize, SummarizeOp<float, CPUContext>);
 
 // Input: X; output: if set, a summarized Tensor of shape 4, with the values
@@ -62,4 +63,5 @@ greater than 0, the values are written to a log file in the root folder.
           "max, mean and standard deviation");
 
 SHOULD_NOT_DO_GRADIENT(Summarize);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/summarize_op.cu b/caffe2/operators/summarize_op.cu
index 89dd4c0100..0cad7e61ed 100644
--- a/caffe2/operators/summarize_op.cu
+++ b/caffe2/operators/summarize_op.cu
@@ -106,5 +106,7 @@ bool SummarizeOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(Summarize, SummarizeOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/tanh_op.cc b/caffe2/operators/tanh_op.cc
index 421c605c39..17fa249692 100644
--- a/caffe2/operators/tanh_op.cc
+++ b/caffe2/operators/tanh_op.cc
@@ -28,6 +28,7 @@ struct TanhGradientCPUFunctor {
   }
 };
 
+namespace {
 REGISTER_CPU_OPERATOR(
     Tanh, UnaryElementwiseOp<TensorTypes<float>, CPUContext, TanhCPUFunctor>);
 REGISTER_CPU_OPERATOR(
@@ -63,4 +64,5 @@ class GetTanhGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(Tanh, GetTanhGradient);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/tanh_op.cu b/caffe2/operators/tanh_op.cu
index 0feeb6c4b1..578c2f17de 100644
--- a/caffe2/operators/tanh_op.cu
+++ b/caffe2/operators/tanh_op.cu
@@ -43,10 +43,12 @@ struct TanhGradientCUDAFunctor {
   }
 };
 
+namespace {
 REGISTER_CUDA_OPERATOR(
     Tanh, UnaryElementwiseOp<TensorTypes<float>, CUDAContext, TanhCUDAFunctor>);
 REGISTER_CUDA_OPERATOR(
     TanhGradient, BinaryElementwiseOp<
         TensorTypes<float>, CUDAContext,
         WithoutBroadcast<TanhGradientCUDAFunctor>>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/tensor_protos_db_input.cc b/caffe2/operators/tensor_protos_db_input.cc
index c102fad535..2d18053062 100644
--- a/caffe2/operators/tensor_protos_db_input.cc
+++ b/caffe2/operators/tensor_protos_db_input.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/tensor_protos_db_input.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CPU_OPERATOR(TensorProtosDBInput, TensorProtosDBInput<CPUContext>);
 
 OPERATOR_SCHEMA(TensorProtosDBInput)
@@ -29,4 +30,5 @@ corresponding index in the TensorProtos objects in the DB.
           "'batch_size' argument of the operator");
 
 NO_GRADIENT(TensorProtosDBInput);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/tensor_protos_db_input_gpu.cc b/caffe2/operators/tensor_protos_db_input_gpu.cc
index d932b50e0b..816eabe64a 100644
--- a/caffe2/operators/tensor_protos_db_input_gpu.cc
+++ b/caffe2/operators/tensor_protos_db_input_gpu.cc
@@ -3,5 +3,7 @@
 #include "caffe2/operators/tensor_protos_db_input.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(TensorProtosDBInput, TensorProtosDBInput<CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/operators/tile_op.cc b/caffe2/operators/tile_op.cc
index 55480fa4f3..6d83d0f1e2 100644
--- a/caffe2/operators/tile_op.cc
+++ b/caffe2/operators/tile_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/operators/tile_op.h"
 
 namespace caffe2 {
+namespace {
 
 REGISTER_CPU_OPERATOR(Tile, TileOp<CPUContext>);
 REGISTER_CPU_OPERATOR(TileGradient, TileGradientOp<float, CPUContext>);
@@ -69,4 +70,6 @@ class GetTileGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(Tile, GetTileGradient);
 
+} // namespace
+
 } // namespace caffe2
diff --git a/caffe2/operators/tile_op.cu b/caffe2/operators/tile_op.cu
index cfd4e52c97..70de612a1d 100644
--- a/caffe2/operators/tile_op.cu
+++ b/caffe2/operators/tile_op.cu
@@ -86,6 +86,8 @@ void TileGradientOp<float, CUDAContext>::DoTileGradient(
       reinterpret_cast<float*>(output_data));
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(Tile, TileOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(TileGradient, TileGradientOp<float, CUDAContext>);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/top_k.cc b/caffe2/operators/top_k.cc
index ecbd64beda..c8a1c44e16 100644
--- a/caffe2/operators/top_k.cc
+++ b/caffe2/operators/top_k.cc
@@ -154,6 +154,8 @@ bool TopKGradientOp<T, Context>::RunOnDevice() {
   return true;
 }
 
+namespace {
+
 REGISTER_CPU_OPERATOR(TopK, TopKOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(TopKGradient, TopKGradientOp<float, CPUContext>);
 
@@ -228,4 +230,5 @@ class GetTopKGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(TopK, GetTopKGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/top_k.cu b/caffe2/operators/top_k.cu
index e87714ddcd..217c9df14e 100644
--- a/caffe2/operators/top_k.cu
+++ b/caffe2/operators/top_k.cu
@@ -150,6 +150,8 @@ bool TopKOp<float, CUDAContext>::RunOnDevice() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(TopK, TopKOp<float, CUDAContext>);
+} // namespace
 
 } // namespace caffe2
diff --git a/caffe2/operators/transpose_op.cc b/caffe2/operators/transpose_op.cc
index 1a853ebf36..2a14977cd9 100644
--- a/caffe2/operators/transpose_op.cc
+++ b/caffe2/operators/transpose_op.cc
@@ -93,6 +93,7 @@ bool TransposeOp<CPUContext>::DoRunWithType() {
   return true;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(Transpose, TransposeOp<CPUContext>);
 
 OPERATOR_SCHEMA(Transpose)
@@ -165,4 +166,5 @@ class GetTransposeGradient : public GradientMakerBase {
   }
 };
 REGISTER_GRADIENT(Transpose, GetTransposeGradient);
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/transpose_op_cudnn.cc b/caffe2/operators/transpose_op_cudnn.cc
index e750ba5cdf..e5cd284a7b 100644
--- a/caffe2/operators/transpose_op_cudnn.cc
+++ b/caffe2/operators/transpose_op_cudnn.cc
@@ -136,6 +136,8 @@ class CuDNNTransposeOp final : public Operator<CUDAContext> {
   std::vector<TIndex> new_dims_;
 };
 
+namespace {
 REGISTER_CUDNN_OPERATOR(Transpose, CuDNNTransposeOp);
+} // namespace
 
 } // namespace caffe2
diff --git a/caffe2/operators/tt_linear_op.cc b/caffe2/operators/tt_linear_op.cc
index c49762ee21..ba1322fe60 100644
--- a/caffe2/operators/tt_linear_op.cc
+++ b/caffe2/operators/tt_linear_op.cc
@@ -59,8 +59,6 @@ low-rank decomposition and the speed of the computation.
         "Output tensor from previous layer with size (M x N), "
         "where M is the batch size and N is the output size.");
 
-OPERATOR_SCHEMA(TTLinearGradient);
-
 GRADIENT_NOT_IMPLEMENTED_YET(TT);
 } // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/utility_ops.cc b/caffe2/operators/utility_ops.cc
index d8164b5e3b..abdc22fe27 100644
--- a/caffe2/operators/utility_ops.cc
+++ b/caffe2/operators/utility_ops.cc
@@ -51,6 +51,8 @@ void UniqueOp<CPUContext>::DoRun() {
   }
 }
 
+namespace {
+
 REGISTER_CPU_OPERATOR(WallClockTime, WallClockTimeOp<CPUContext>);
 REGISTER_CPU_OPERATOR(Print, PrintOp<CPUContext>);
 REGISTER_CPU_OPERATOR(Flatten, FlattenOp<CPUContext>);
@@ -680,8 +682,6 @@ Example:
     .Arg("ends", "List of ending indices")
     .Output(0, "output", "Sliced data tensor.");
 
-OPERATOR_SCHEMA(SliceGradient);
-
 OPERATOR_SCHEMA(Squeeze)
     .NumInputs(1)
     .NumOutputs(1)
@@ -1001,6 +1001,8 @@ SHOULD_NOT_DO_GRADIENT(GatherRangesOp);
 SHOULD_NOT_DO_GRADIENT(LengthsGather);
 SHOULD_NOT_DO_GRADIENT(AccumulateHistogram);
 
+} // namespace
+
 template <typename T, class Context>
 bool MaxOp<T, Context>::Compute() {
   auto& input0 = Input(0);
diff --git a/caffe2/operators/utility_ops.cu b/caffe2/operators/utility_ops.cu
index 8897c80329..817304d3a0 100644
--- a/caffe2/operators/utility_ops.cu
+++ b/caffe2/operators/utility_ops.cu
@@ -726,7 +726,9 @@ void UniqueOp<CUDAContext>::DoRun() {
         order2.data(), order1.data(), remapping, N, K);
   }
 }
+namespace {
 REGISTER_CUDA_OPERATOR(Unique, UniqueOp<CUDAContext>);
+} // namespace
 #endif // THRUST_VERSION >= 100800
 
 REGISTER_CUDA_OPERATOR(Size, SizeOp<CUDAContext>);
diff --git a/caffe2/operators/utility_ops_gpu.cc b/caffe2/operators/utility_ops_gpu.cc
index aa28140bdf..7d41fa25a8 100644
--- a/caffe2/operators/utility_ops_gpu.cc
+++ b/caffe2/operators/utility_ops_gpu.cc
@@ -50,6 +50,8 @@ class CopyOnDeviceLikeOp<CUDAContext, CUDAContext, CUDAContext>
   }
 };
 
+namespace {
+
 REGISTER_CUDA_OPERATOR(Print, PrintOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(Flatten, FlattenOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(FlattenToVec, FlattenToVecOp<CUDAContext>);
@@ -87,4 +89,5 @@ REGISTER_CUDA_OPERATOR(
 
 REGISTER_CUDA_OPERATOR(UnsafeCoalesce, UnsafeCoalesceOp<CUDAContext>);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/zero_gradient_op.cc b/caffe2/operators/zero_gradient_op.cc
index 47a5f76795..f760ced9f1 100644
--- a/caffe2/operators/zero_gradient_op.cc
+++ b/caffe2/operators/zero_gradient_op.cc
@@ -2,6 +2,7 @@
 
 namespace caffe2 {
 
+namespace {
 REGISTER_CPU_OPERATOR(ZeroGradient, ZeroGradientOp<CPUContext>);
 OPERATOR_SCHEMA(ZeroGradient).NumInputs(1).NumOutputs(0).SetDoc(R"DOC(
             ZeroGradient operators doesn't produce any output blobs. One can use
@@ -22,4 +23,5 @@ struct GetZeroGradientOpGradient : public GradientMakerBase {
 
 REGISTER_GRADIENT(ZeroGradient, GetZeroGradientOpGradient);
 
+} // namespace
 } // namespace caffe2
diff --git a/caffe2/operators/zero_gradient_op_gpu.cc b/caffe2/operators/zero_gradient_op_gpu.cc
index 82cfc23252..0232ae75b5 100644
--- a/caffe2/operators/zero_gradient_op_gpu.cc
+++ b/caffe2/operators/zero_gradient_op_gpu.cc
@@ -2,5 +2,7 @@
 #include "caffe2/operators/zero_gradient_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(ZeroGradient, ZeroGradientOp<CUDAContext>);
 }
+}
diff --git a/caffe2/queue/queue_ops.cc b/caffe2/queue/queue_ops.cc
index 5b8b8d5e48..1671cee237 100644
--- a/caffe2/queue/queue_ops.cc
+++ b/caffe2/queue/queue_ops.cc
@@ -5,6 +5,8 @@ namespace caffe2 {
 
 CAFFE_KNOWN_TYPE(std::shared_ptr<BlobsQueue>);
 
+namespace {
+
 REGISTER_CPU_OPERATOR(CreateBlobsQueue, CreateBlobsQueueOp<CPUContext>);
 REGISTER_CPU_OPERATOR(EnqueueBlobs, EnqueueBlobsOp<CPUContext>);
 REGISTER_CPU_OPERATOR(DequeueBlobs, DequeueBlobsOp<CPUContext>);
@@ -84,5 +86,6 @@ NO_GRADIENT(CloseBlobsQueue);
 NO_GRADIENT(SafeEnqueueBlobs);
 NO_GRADIENT(SafeDequeueBlobs);
 NO_GRADIENT(WeightedSampleDequeueBlobs);
+}
 
 }
diff --git a/caffe2/queue/queue_ops_gpu.cc b/caffe2/queue/queue_ops_gpu.cc
index dbabcceb1d..88ea6c0e4e 100644
--- a/caffe2/queue/queue_ops_gpu.cc
+++ b/caffe2/queue/queue_ops_gpu.cc
@@ -4,6 +4,8 @@
 
 namespace caffe2 {
 
+namespace {
+
 REGISTER_CUDA_OPERATOR(CreateBlobsQueue, CreateBlobsQueueOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(EnqueueBlobs, EnqueueBlobsOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(DequeueBlobs, DequeueBlobsOp<CUDAContext>);
@@ -11,5 +13,6 @@ REGISTER_CUDA_OPERATOR(CloseBlobsQueue, CloseBlobsQueueOp<CUDAContext>);
 
 REGISTER_CUDA_OPERATOR(SafeEnqueueBlobs, SafeEnqueueBlobsOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(SafeDequeueBlobs, SafeDequeueBlobsOp<CUDAContext>);
+}
 
 }
diff --git a/caffe2/sgd/adagrad_op.cc b/caffe2/sgd/adagrad_op.cc
index 388b10c693..83bb9a9b01 100644
--- a/caffe2/sgd/adagrad_op.cc
+++ b/caffe2/sgd/adagrad_op.cc
@@ -2,6 +2,7 @@
 
 namespace caffe2 {
 
+namespace {
 REGISTER_CPU_OPERATOR(Adagrad, AdagradOp<float, CPUContext>);
 OPERATOR_SCHEMA(Adagrad)
     .NumInputs(4)
@@ -51,3 +52,4 @@ new_history) as in the dense case.
 SHOULD_NOT_DO_GRADIENT(Adagrad);
 SHOULD_NOT_DO_GRADIENT(SparseAdagrad);
 }
+}
diff --git a/caffe2/sgd/adagrad_op_gpu.cu b/caffe2/sgd/adagrad_op_gpu.cu
index db538ba659..201ac9f23b 100644
--- a/caffe2/sgd/adagrad_op_gpu.cu
+++ b/caffe2/sgd/adagrad_op_gpu.cu
@@ -81,6 +81,8 @@ bool SparseAdagradOp<float, CUDAContext>::DoRunWithType()
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(Adagrad, AdagradOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(SparseAdagrad, SparseAdagradOp<float, CUDAContext>);
 }
+}
diff --git a/caffe2/sgd/adam_op.cc b/caffe2/sgd/adam_op.cc
index d65ea04623..d05f37eef3 100644
--- a/caffe2/sgd/adam_op.cc
+++ b/caffe2/sgd/adam_op.cc
@@ -2,6 +2,7 @@
 
 namespace caffe2 {
 
+namespace {
 REGISTER_CPU_OPERATOR(Adam, AdamOp<float, CPUContext>);
 OPERATOR_SCHEMA(Adam)
     .NumInputs(6)
@@ -67,5 +68,6 @@ Adam on (param, moment1[indices], momemnt2[indices], lr, iter) and returns
 
 SHOULD_NOT_DO_GRADIENT(Adam);
 SHOULD_NOT_DO_GRADIENT(SparseAdam);
+}
 
 }
diff --git a/caffe2/sgd/adam_op_gpu.cu b/caffe2/sgd/adam_op_gpu.cu
index 5dd153c801..667faf8e38 100644
--- a/caffe2/sgd/adam_op_gpu.cu
+++ b/caffe2/sgd/adam_op_gpu.cu
@@ -144,7 +144,9 @@ bool SparseAdamOp<float, CUDAContext>::DoRunWithType()
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(Adam, AdamOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(SparseAdam, SparseAdamOp<float, CUDAContext>);
+}
 
 }
diff --git a/caffe2/sgd/iter_op.cc b/caffe2/sgd/iter_op.cc
index df9e261f2e..0544476aa3 100644
--- a/caffe2/sgd/iter_op.cc
+++ b/caffe2/sgd/iter_op.cc
@@ -19,6 +19,7 @@ void MutexDeserializer::Deserialize(const BlobProto& /* unused */, Blob* blob) {
       caffe2::make_unique<std::mutex>();
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(Iter, IterOp<CPUContext>);
 REGISTER_CPU_OPERATOR(AtomicIter, AtomicIterOp<CPUContext>);
 
@@ -50,4 +51,5 @@ algorithms.
 
 NO_GRADIENT(Iter);
 NO_GRADIENT(AtomicIter);
+}
 }  // namespace caffe2
diff --git a/caffe2/sgd/iter_op_gpu.cc b/caffe2/sgd/iter_op_gpu.cc
index bdc93b9948..ebba6e23fc 100644
--- a/caffe2/sgd/iter_op_gpu.cc
+++ b/caffe2/sgd/iter_op_gpu.cc
@@ -2,8 +2,8 @@
 #include "caffe2/sgd/iter_op.h"
 
 namespace caffe2 {
-
+namespace {
 REGISTER_CUDA_OPERATOR(Iter, IterOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(AtomicIter, AtomicIterOp<CUDAContext>);
-
+}
 } // namespace caffe2
diff --git a/caffe2/sgd/learning_rate_op.cc b/caffe2/sgd/learning_rate_op.cc
index be8cdd25f7..cd1b3d9db3 100644
--- a/caffe2/sgd/learning_rate_op.cc
+++ b/caffe2/sgd/learning_rate_op.cc
@@ -1,6 +1,7 @@
 #include "caffe2/sgd/learning_rate_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CPU_OPERATOR(LearningRate, LearningRateOp<float, CPUContext>);
 
 OPERATOR_SCHEMA(LearningRate)
@@ -43,4 +44,5 @@ train_net.LearningRate(200, "LR", base_lr=-0.1,
   .Output(0, "output", "description needed");
 
 NO_GRADIENT(LearningRate);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/sgd/learning_rate_op_gpu.cc b/caffe2/sgd/learning_rate_op_gpu.cc
index f2d4cdf000..49461a7674 100644
--- a/caffe2/sgd/learning_rate_op_gpu.cc
+++ b/caffe2/sgd/learning_rate_op_gpu.cc
@@ -2,5 +2,7 @@
 #include "caffe2/sgd/learning_rate_op.h"
 
 namespace caffe2 {
+namespace {
 REGISTER_CUDA_OPERATOR(LearningRate, LearningRateOp<float, CUDAContext>);
+}  // namespace
 }  // namespace caffe2
diff --git a/caffe2/sgd/momentum_sgd_op.cc b/caffe2/sgd/momentum_sgd_op.cc
index c1c54097e9..0ffb3d6e9f 100644
--- a/caffe2/sgd/momentum_sgd_op.cc
+++ b/caffe2/sgd/momentum_sgd_op.cc
@@ -2,6 +2,7 @@
 
 namespace caffe2 {
 
+namespace {
 REGISTER_CPU_OPERATOR(MomentumSGD, MomentumSGDOp<float, CPUContext>);
 OPERATOR_SCHEMA(MomentumSGD)
     .NumInputs(3)
@@ -113,3 +114,4 @@ same blobs).
     .Arg("nesterov", "(boolean) Whether to use Nesterov Accelerated Gradient.");
 SHOULD_NOT_DO_GRADIENT(SparseMomentumSGDUpdate);
 }
+}
diff --git a/caffe2/sgd/momentum_sgd_op_gpu.cu b/caffe2/sgd/momentum_sgd_op_gpu.cu
index 30d1ec4564..08f48e68cd 100644
--- a/caffe2/sgd/momentum_sgd_op_gpu.cu
+++ b/caffe2/sgd/momentum_sgd_op_gpu.cu
@@ -119,8 +119,11 @@ bool SparseMomentumSGDUpdateOp<float, CUDAContext>::DoRunWithType() {
   return true;
 }
 
+namespace {
 REGISTER_CUDA_OPERATOR(MomentumSGD, MomentumSGDOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(MomentumSGDUpdate, MomentumSGDUpdateOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(SparseMomentumSGDUpdate, SparseMomentumSGDUpdateOp<float, CUDAContext>);
 
 }
+
+}
diff --git a/caffe2/sgd/rmsprop_op.cc b/caffe2/sgd/rmsprop_op.cc
index 7fc6a3fdd2..eb8f0531dc 100644
--- a/caffe2/sgd/rmsprop_op.cc
+++ b/caffe2/sgd/rmsprop_op.cc
@@ -29,6 +29,7 @@ void rmsprop_update<CPUContext>(
   EigenVectorArrayMap<float>(ng, N) = nmomVec;
 }
 
+namespace {
 REGISTER_CPU_OPERATOR(RmsProp, RmsPropOp<float, CPUContext>);
 OPERATOR_SCHEMA(RmsProp)
     .NumInputs(4)
@@ -48,5 +49,6 @@ returns (grad_o, mean_squares_o, mom_o).
 
 )DOC");
 SHOULD_NOT_DO_GRADIENT(RmsProp);
+}
 
 }
diff --git a/caffe2/sgd/rmsprop_op_gpu.cu b/caffe2/sgd/rmsprop_op_gpu.cu
index dd34e10f97..88312b3069 100644
--- a/caffe2/sgd/rmsprop_op_gpu.cu
+++ b/caffe2/sgd/rmsprop_op_gpu.cu
@@ -46,6 +46,8 @@ void rmsprop_update<CUDAContext>(
 }
 
 
+namespace {
 REGISTER_CUDA_OPERATOR(RmsProp, RmsPropOp<float, CUDAContext>);
+}
 
 }