summaryrefslogtreecommitdiff
path: root/torch/csrc/cuda
diff options
context:
space:
mode:
authorsoumith <soumith@fb.com>2017-10-13 08:50:00 -0700
committersoumith <soumith@fb.com>2017-10-13 08:50:00 -0700
commit5a96037810eb043235d53d9f9200867500259f65 (patch)
tree95a80984895cd1d4061719dd8331917720446333 /torch/csrc/cuda
parent8f26d6aabcad991da88b663467ee2080a38631f7 (diff)
downloadpytorch-5a96037810eb043235d53d9f9200867500259f65.tar.gz
pytorch-5a96037810eb043235d53d9f9200867500259f65.tar.bz2
pytorch-5a96037810eb043235d53d9f9200867500259f65.zip
skip ncclCommDestroy if CUDA driver is already unloaded
Diffstat (limited to 'torch/csrc/cuda')
-rw-r--r--torch/csrc/cuda/nccl.cpp11
1 files changed, 9 insertions, 2 deletions
diff --git a/torch/csrc/cuda/nccl.cpp b/torch/csrc/cuda/nccl.cpp
index a035495156..0ae936dcdd 100644
--- a/torch/csrc/cuda/nccl.cpp
+++ b/torch/csrc/cuda/nccl.cpp
@@ -28,6 +28,13 @@ struct NcclCommList {
~NcclCommList() {
if (comms) {
for (int i = 0; i < ndevices; i++) {
+ int dummy_var;
+ if (cudaGetDevice(&dummy_var) != cudaSuccess) {
+ /* there are cases when this destructor is called after the
+ CUDA driver is already unloaded from the process.
+ In these cases, skip ncclCommDestroy */
+ return;
+ }
ncclCommDestroy(comms[i]);
}
}
@@ -107,7 +114,7 @@ static void _check_inputs(std::vector<at::Tensor> &inputs, std::vector<at::Tenso
if (input.numel() != numel) {
throw std::runtime_error("all inputs must have the same number of elements");
}
-
+
if (output.numel() * output_multiplier != numel * input_multiplier) {
throw std::runtime_error("output must be of size input_size * size_multiplier");
}
@@ -144,7 +151,7 @@ PyObject * THCPModule_nccl_reduce(PyObject *self, PyObject *args) {
std::vector<THCStream*> streams = THPUtils_PySequence_to_THCStreamList(_streams);
THPUtils_assert(inputs.size() == streams.size(), "number of streams is not equal to number of inputs");
-
+
// we can safely release GIL after this line, no python API used
AutoNoGIL no_gil;
_check_inputs(inputs, outputs, 1, 1);