summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--setup.py9
-rw-r--r--torch/csrc/Module.cpp7
-rw-r--r--torch/csrc/distributed/Module.cpp196
-rw-r--r--torch/csrc/distributed/THDP.h10
-rw-r--r--torch/csrc/distributed/utils.cpp9
-rw-r--r--torch/csrc/distributed/utils.h6
-rw-r--r--torch/distributed/__init__.py49
-rw-r--r--torch/lib/THD/THD.h3
-rw-r--r--torch/lib/THD/base/Tensor.hpp2
-rw-r--r--torch/lib/THD/base/TensorDescriptor.cpp25
-rw-r--r--torch/lib/THD/base/TensorDescriptor.h18
-rw-r--r--torch/lib/THD/base/TensorDescriptor.hpp2
-rw-r--r--torch/lib/THD/base/tensors/THTensor.hpp3
-rw-r--r--torch/lib/THD/base/tensors/generic/THTensor.cpp13
-rw-r--r--torch/lib/THD/master_worker/worker/Worker.h2
-rw-r--r--torch/lib/THD/process_group/Collectives.cpp14
-rw-r--r--torch/lib/THD/process_group/Collectives.h12
-rwxr-xr-xtorch/lib/build_all.sh1
18 files changed, 348 insertions, 33 deletions
diff --git a/setup.py b/setup.py
index 24f45b1fbc..769d6b36db 100644
--- a/setup.py
+++ b/setup.py
@@ -14,6 +14,7 @@ from tools.setup_helpers.env import check_env_flag
from tools.setup_helpers.cuda import WITH_CUDA, CUDA_HOME
from tools.setup_helpers.cudnn import WITH_CUDNN, CUDNN_LIB_DIR, CUDNN_INCLUDE_DIR
DEBUG = check_env_flag('DEBUG')
+WITH_DISTRIBUTED = check_env_flag('WITH_DISTRIBUTED')
################################################################################
# Monkey-patch setuptools to compile in parallel
@@ -230,6 +231,14 @@ try:
except ImportError:
WITH_NUMPY = False
+if WITH_DISTRIBUTED:
+ extra_compile_args += ['-DWITH_DISTRIBUTED']
+ main_sources += [
+ "torch/csrc/distributed/Module.cpp",
+ "torch/csrc/distributed/utils.cpp"
+ ]
+ main_libraries += ['THD']
+
if WITH_CUDA:
cuda_lib_dirs = ['lib64', 'lib']
cuda_include_path = os.path.join(CUDA_HOME, 'include')
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
index 3538a0574c..912d27fb36 100644
--- a/torch/csrc/Module.cpp
+++ b/torch/csrc/Module.cpp
@@ -699,6 +699,10 @@ bool THCSPByteTensor_init(PyObject *module);
static std::vector<PyMethodDef> methods;
+#ifdef WITH_DISTRIBUTED
+PyMethodDef* THDPModule_methods();
+#endif
+
#if PY_MAJOR_VERSION == 2
PyMODINIT_FUNC init_C()
#else
@@ -716,6 +720,9 @@ PyMODINIT_FUNC PyInit__C()
#ifdef WITH_CUDNN
THPUtils_addPyMethodDefs(methods, THCUDNN_methods());
#endif
+#ifdef WITH_DISTRIBUTED
+ THPUtils_addPyMethodDefs(methods, THDPModule_methods());
+#endif
#if PY_MAJOR_VERSION == 2
ASSERT_TRUE(module = Py_InitModule("torch._C", methods.data()));
diff --git a/torch/csrc/distributed/Module.cpp b/torch/csrc/distributed/Module.cpp
new file mode 100644
index 0000000000..af87152c88
--- /dev/null
+++ b/torch/csrc/distributed/Module.cpp
@@ -0,0 +1,196 @@
+#include <Python.h>
+
+#include <unordered_map>
+
+#include "THDP.h"
+
+static std::unordered_map<std::string, THDChannelType> name2channel_type = {
+ {"mpi", THDChannelMPI},
+ {"tcp", THDChannelTCP},
+};
+
+static std::unordered_map<PyObject*, THDReduceOp> obj2reduceop;
+
+static THPObjectPtr _ensureBytes(PyObject *obj)
+{
+#if PY_MAJOR_VERSION == 2
+ if (PyString_Check(obj)) {
+#elif PY_MAJOR_VERSION == 3
+ if (PyBytes_Check(obj)) {
+#endif
+ Py_INCREF(obj);
+ return obj;
+ }
+ if (PyUnicode_Check(obj)) {
+ return PyUnicode_AsASCIIString(obj);
+ }
+ return NULL;
+}
+
+PyObject* THDPModule_initProcessGroup(PyObject *_unused, PyObject *_backend)
+{
+ HANDLE_TH_ERRORS
+ THPObjectPtr backend_bytes = _ensureBytes(_backend);
+ THPUtils_assert(backend_bytes, "backend argument has to be a string/bytes "
+ "object, but got %s", THPUtils_typename(_backend));
+ char *backend_name = THPUtils_bytesAsString(backend_bytes.get());
+ THDChannelType channel_type = name2channel_type.at(backend_name);
+ THPUtils_assert(THDProcessGroupInit(channel_type), "failed to initialize "
+ "distributed library (THD)");
+ Py_RETURN_NONE;
+ END_HANDLE_TH_ERRORS
+}
+
+PyObject* THDPModule_getRank(PyObject *_unused)
+{
+ HANDLE_TH_ERRORS
+ return PyInt_FromLong(THDGetRank());
+ END_HANDLE_TH_ERRORS
+}
+
+PyObject* THDPModule_getNumProcesses(PyObject *_unused)
+{
+ HANDLE_TH_ERRORS
+ return PyInt_FromLong(THDGetNumProcesses());
+ END_HANDLE_TH_ERRORS
+}
+
+static THDTensorDescriptor* _makeDescriptor(PyObject *obj)
+{
+ PyObject *type = (PyObject*)Py_TYPE(obj);
+#define REGISTER_TH_DESCRIPTOR(TYPE) \
+ if (type == THP##TYPE##Class) \
+ return THDTensorDescriptor_newFromTH##TYPE(((THP##TYPE*)obj)->cdata);
+ REGISTER_TH_DESCRIPTOR(DoubleTensor);
+ REGISTER_TH_DESCRIPTOR(FloatTensor);
+ REGISTER_TH_DESCRIPTOR(LongTensor);
+ REGISTER_TH_DESCRIPTOR(IntTensor);
+ REGISTER_TH_DESCRIPTOR(ShortTensor);
+ REGISTER_TH_DESCRIPTOR(CharTensor);
+ REGISTER_TH_DESCRIPTOR(ByteTensor);
+#undef REGISTER_TH_DESCRIPTOR
+ throw std::runtime_error(std::string("don't know how to create a THDTensorDesciptor for "
+ "type ") + std::string(THPUtils_typename(obj)));
+}
+
+static THDReduceOp _getReduceOp(PyObject *obj)
+{
+ auto it = obj2reduceop.find(obj);
+ if (it == obj2reduceop.end()) {
+ throw std::runtime_error("op should be a constant from "
+ "torch.distributed.reduce_op");
+ }
+ return it->second;
+}
+
+PyObject* THDPModule_send(PyObject *_unused, PyObject *args)
+{
+ HANDLE_TH_ERRORS
+ if (PyTuple_GET_SIZE(args) != 2 || !THPModule_isTensor(PyTuple_GET_ITEM(args, 0)) ||
+ !THPUtils_checkLong(PyTuple_GET_ITEM(args, 1))) {
+ THPUtils_invalidArguments(args, "send", 1, "(tensor input, int dst_rank)");
+ return NULL;
+ }
+
+ THDPTensorDesc desc = _makeDescriptor(PyTuple_GET_ITEM(args, 0));
+ int dst_rank = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 1));
+ THDSend(desc, dst_rank);
+ Py_RETURN_NONE;
+ END_HANDLE_TH_ERRORS
+}
+
+PyObject* THDPModule_recv(PyObject *_unused, PyObject *args)
+{
+ HANDLE_TH_ERRORS
+ if (PyTuple_GET_SIZE(args) != 2 || !THPModule_isTensor(PyTuple_GET_ITEM(args, 0)) ||
+ !THPUtils_checkLong(PyTuple_GET_ITEM(args, 1))) {
+ THPUtils_invalidArguments(args, "recv", 1, "(tensor output, int src_rank)");
+ return NULL;
+ }
+
+ THDPTensorDesc desc = _makeDescriptor(PyTuple_GET_ITEM(args, 0));
+ int src_rank = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 1));
+ THDReceive(desc, src_rank);
+ Py_RETURN_NONE;
+ END_HANDLE_TH_ERRORS
+}
+
+PyObject* THDPModule_allReduce(PyObject *_unused, PyObject *args)
+{
+ HANDLE_TH_ERRORS
+ if (PyTuple_GET_SIZE(args) != 2 || !THPModule_isTensor(PyTuple_GET_ITEM(args, 0))) {
+ THPUtils_invalidArguments(args, "all_reduce", 1, "(tensor in_out, reduce_op op)");
+ return NULL;
+ }
+
+ THDReduceOp op = _getReduceOp(PyTuple_GET_ITEM(args, 1));
+ THDPTensorDesc desc = _makeDescriptor(PyTuple_GET_ITEM(args, 0));
+ THDAllReduce(desc, op);
+ Py_RETURN_NONE;
+ END_HANDLE_TH_ERRORS
+}
+
+PyObject* THDPModule_reduce(PyObject *_unused, PyObject *args)
+{
+ HANDLE_TH_ERRORS
+ if (PyTuple_GET_SIZE(args) != 3 || !THPModule_isTensor(PyTuple_GET_ITEM(args, 0)) ||
+ !THPUtils_checkLong(PyTuple_GET_ITEM(args, 1))) {
+ THPUtils_invalidArguments(args, "reduce", 1,
+ "(tensor reduced, int dst_rank, reduce_op op)");
+ return NULL;
+ }
+
+ THDReduceOp op = _getReduceOp(PyTuple_GET_ITEM(args, 2));
+ THDPTensorDesc desc = _makeDescriptor(PyTuple_GET_ITEM(args, 0));
+ int dst_rank = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 1));
+ THDReduce(desc, op, dst_rank);
+ Py_RETURN_NONE;
+ END_HANDLE_TH_ERRORS
+}
+
+PyObject* THDPModule_broadcast(PyObject *_unused, PyObject *args)
+{
+ HANDLE_TH_ERRORS
+ if (PyTuple_GET_SIZE(args) != 2 || !THPModule_isTensor(PyTuple_GET_ITEM(args, 0)) ||
+ !THPUtils_checkLong(PyTuple_GET_ITEM(args, 1))) {
+ THPUtils_invalidArguments(args, "broadcast", 1, "(tensor src_dst, int src_rank)");
+ return NULL;
+ }
+
+ THDPTensorDesc desc = _makeDescriptor(PyTuple_GET_ITEM(args, 0));
+ int src_rank = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 1));
+ THDBroadcast(desc, src_rank);
+ Py_RETURN_NONE;
+ END_HANDLE_TH_ERRORS
+}
+
+PyObject* THDPModule_initExtension(PyObject *_unused, PyObject *reduce_op_obj) {
+ THPObjectPtr reduce_op;
+#define REGISTER_REDUCE_OP(NAME) \
+ reduce_op = PyObject_GetAttrString(reduce_op_obj, #NAME); \
+ THPUtils_assert(reduce_op, "Missing object for reduce op " #NAME); \
+ obj2reduceop.emplace(reduce_op.get(), THDReduce##NAME);
+ REGISTER_REDUCE_OP(SUM);
+ REGISTER_REDUCE_OP(PRODUCT);
+ REGISTER_REDUCE_OP(MIN);
+ REGISTER_REDUCE_OP(MAX);
+#undef REGISTER_REDUCE_OP
+ Py_RETURN_TRUE;
+}
+
+static struct PyMethodDef _THDPModule_methods[] = {
+ {"_dist_init_extension", (PyCFunction)THDPModule_initExtension, METH_O, NULL},
+ {"_dist_init_process_group", (PyCFunction)THDPModule_initProcessGroup, METH_O, NULL},
+ {"_dist_get_rank", (PyCFunction)THDPModule_getRank, METH_NOARGS, NULL},
+ {"_dist_get_num_processes", (PyCFunction)THDPModule_getNumProcesses, METH_NOARGS, NULL},
+ {"_dist_send", (PyCFunction)THDPModule_send, METH_VARARGS, NULL},
+ {"_dist_recv", (PyCFunction)THDPModule_recv, METH_VARARGS, NULL},
+ {"_dist_all_reduce", (PyCFunction)THDPModule_allReduce, METH_VARARGS, NULL},
+ {"_dist_reduce", (PyCFunction)THDPModule_reduce, METH_VARARGS, NULL},
+ {"_dist_broadcast", (PyCFunction)THDPModule_broadcast, METH_VARARGS, NULL},
+ {NULL}
+};
+
+PyMethodDef* THDPModule_methods() {
+ return _THDPModule_methods;
+}
diff --git a/torch/csrc/distributed/THDP.h b/torch/csrc/distributed/THDP.h
new file mode 100644
index 0000000000..7acc1f10d7
--- /dev/null
+++ b/torch/csrc/distributed/THDP.h
@@ -0,0 +1,10 @@
+#ifndef THDP_H
+#define THDP_H
+
+#include <THD/THD.h>
+
+#include "torch/csrc/THP.h"
+#include "Module.h"
+#include "utils.h"
+
+#endif
diff --git a/torch/csrc/distributed/utils.cpp b/torch/csrc/distributed/utils.cpp
new file mode 100644
index 0000000000..ff06c02f67
--- /dev/null
+++ b/torch/csrc/distributed/utils.cpp
@@ -0,0 +1,9 @@
+#include "utils.h"
+
+template<>
+void THPPointer<THDTensorDescriptor>::free() {
+ if (ptr)
+ THDTensorDescriptor_free(ptr);
+}
+
+template class THPPointer<THDTensorDescriptor>;
diff --git a/torch/csrc/distributed/utils.h b/torch/csrc/distributed/utils.h
new file mode 100644
index 0000000000..5a73fa93dd
--- /dev/null
+++ b/torch/csrc/distributed/utils.h
@@ -0,0 +1,6 @@
+#include <Python.h>
+
+#include "THDP.h"
+
+typedef THPPointer<THDTensorDescriptor> THDPTensorDesc;
+
diff --git a/torch/distributed/__init__.py b/torch/distributed/__init__.py
new file mode 100644
index 0000000000..3d566e25d9
--- /dev/null
+++ b/torch/distributed/__init__.py
@@ -0,0 +1,49 @@
+import torch
+
+
+_initialized = False
+
+
+def init_process_group(backend):
+ global _initialized
+ if _initialized:
+ raise RuntimeError("trying to initialize torch.distributed twice!")
+ torch._C._dist_init_process_group(backend)
+ _initialized = True
+
+
+class reduce_op(object):
+ SUM = object()
+ PRODUCT = object()
+ MAX = object()
+ MIN = object()
+
+def get_rank():
+ return torch._C._dist_get_rank()
+
+
+def get_num_processes():
+ return torch._C._dist_get_num_processes()
+
+
+def send(tensor, dst_rank):
+ return torch._C._dist_send(tensor, dst_rank)
+
+
+def recv(tensor, src_rank):
+ return torch._C._dist_recv(tensor, src_rank)
+
+
+def broadcast(tensor, src_rank):
+ return torch._C._dist_broadcast(tensor, src_rank)
+
+
+def all_reduce(tensor, op=reduce_op.SUM):
+ return torch._C._dist_all_reduce(tensor, op)
+
+
+def reduce(tensor, dst_rank, op=reduce_op.SUM):
+ return torch._C._dist_reduce(tensor, dst_rank, op)
+
+
+assert torch._C._dist_init_extension(reduce_op)
diff --git a/torch/lib/THD/THD.h b/torch/lib/THD/THD.h
index f546ba0557..1653ffb863 100644
--- a/torch/lib/THD/THD.h
+++ b/torch/lib/THD/THD.h
@@ -1,6 +1,6 @@
#pragma once
-#ifdef __cplusplus__
+#ifdef __cplusplus
#define THD_API extern "C"
#else
#define THD_API
@@ -14,6 +14,7 @@
#include "base/ChannelType.h"
#include "process_group/General.h"
+#include "process_group/Collectives.h"
#include "master_worker/master/Master.h"
#include "master_worker/master/State.h"
diff --git a/torch/lib/THD/base/Tensor.hpp b/torch/lib/THD/base/Tensor.hpp
index 8abcdc2f17..ebd1cca054 100644
--- a/torch/lib/THD/base/Tensor.hpp
+++ b/torch/lib/THD/base/Tensor.hpp
@@ -140,6 +140,8 @@ struct Tensor {
virtual bool isContiguous() const = 0;
virtual void* data() = 0;
virtual const void* data() const = 0;
+ virtual Tensor& retain() = 0;
+ virtual Tensor& free() = 0;
virtual Tensor& resize(const std::initializer_list<long>& new_size) = 0;
virtual Tensor& resize(const std::vector<long>& new_size) = 0;
diff --git a/torch/lib/THD/base/TensorDescriptor.cpp b/torch/lib/THD/base/TensorDescriptor.cpp
index 0575f3f62f..11f54c68fd 100644
--- a/torch/lib/THD/base/TensorDescriptor.cpp
+++ b/torch/lib/THD/base/TensorDescriptor.cpp
@@ -1,30 +1,41 @@
#include "TensorDescriptor.hpp"
-THDTensorDescriptor THDTensorDescriptor_newFromTHDoubleTensor(THDoubleTensor *tensor) {
+THDTensorDescriptor* THDTensorDescriptor_newFromTHDoubleTensor(THDoubleTensor *tensor) {
+ THDoubleTensor_retain(tensor);
return new THTensor<double>(tensor);
}
-THDTensorDescriptor THDTensorDescriptor_newFromTHFloatTensor(THFloatTensor *tensor) {
+THDTensorDescriptor* THDTensorDescriptor_newFromTHFloatTensor(THFloatTensor *tensor) {
+ THFloatTensor_retain(tensor);
return new THTensor<float>(tensor);
}
-THDTensorDescriptor THDTensorDescriptor_newFromTHLongTensor(THLongTensor *tensor) {
+THDTensorDescriptor* THDTensorDescriptor_newFromTHLongTensor(THLongTensor *tensor) {
+ THLongTensor_retain(tensor);
return new THTensor<long>(tensor);
}
-THDTensorDescriptor THDTensorDescriptor_newFromTHIntTensor(THIntTensor *tensor) {
+THDTensorDescriptor* THDTensorDescriptor_newFromTHIntTensor(THIntTensor *tensor) {
+ THIntTensor_retain(tensor);
return new THTensor<int>(tensor);
}
-THDTensorDescriptor THDTensorDescriptor_newFromTHShortTensor(THShortTensor *tensor) {
+THDTensorDescriptor* THDTensorDescriptor_newFromTHShortTensor(THShortTensor *tensor) {
+ THShortTensor_retain(tensor);
return new THTensor<short>(tensor);
}
-THDTensorDescriptor THDTensorDescriptor_newFromTHCharTensor(THCharTensor *tensor) {
+THDTensorDescriptor* THDTensorDescriptor_newFromTHCharTensor(THCharTensor *tensor) {
+ THCharTensor_retain(tensor);
return new THTensor<char>(tensor);
}
-THDTensorDescriptor THDTensorDescriptor_newFromTHByteTensor(THByteTensor *tensor) {
+THDTensorDescriptor* THDTensorDescriptor_newFromTHByteTensor(THByteTensor *tensor) {
+ THByteTensor_retain(tensor);
return new THTensor<unsigned char>(tensor);
}
+
+THD_API void THDTensorDescriptor_free(THDTensorDescriptor* desc) {
+ delete desc;
+}
diff --git a/torch/lib/THD/base/TensorDescriptor.h b/torch/lib/THD/base/TensorDescriptor.h
index f801cd0693..e129bebc58 100644
--- a/torch/lib/THD/base/TensorDescriptor.h
+++ b/torch/lib/THD/base/TensorDescriptor.h
@@ -1,16 +1,18 @@
#pragma once
+#include "../THD.h"
#include <TH/TH.h>
#ifndef _THD_CORE
struct _THDTensorDescriptor;
-typedef struct _THDTensorDescriptor* THDTensorDescriptor;
+typedef struct _THDTensorDescriptor THDTensorDescriptor;
#endif
-THDTensorDescriptor THDTensorDescriptor_newFromTHDoubleTensor(THDoubleTensor *tensor);
-THDTensorDescriptor THDTensorDescriptor_newFromTHFloatTensor(THFloatTensor *tensor);
-THDTensorDescriptor THDTensorDescriptor_newFromTHLongTensor(THFloatTensor *tensor);
-THDTensorDescriptor THDTensorDescriptor_newFromTHIntTensor(THFloatTensor *tensor);
-THDTensorDescriptor THDTensorDescriptor_newFromTHShortTensor(THFloatTensor *tensor);
-THDTensorDescriptor THDTensorDescriptor_newFromTHCharTensor(THFloatTensor *tensor);
-THDTensorDescriptor THDTensorDescriptor_newFromTHByteTensor(THFloatTensor *tensor);
+THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHDoubleTensor(THDoubleTensor *tensor);
+THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHFloatTensor(THFloatTensor *tensor);
+THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHLongTensor(THLongTensor *tensor);
+THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHIntTensor(THIntTensor *tensor);
+THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHShortTensor(THShortTensor *tensor);
+THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHCharTensor(THCharTensor *tensor);
+THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHByteTensor(THByteTensor *tensor);
+THD_API void THDTensorDescriptor_free(THDTensorDescriptor* desc);
diff --git a/torch/lib/THD/base/TensorDescriptor.hpp b/torch/lib/THD/base/TensorDescriptor.hpp
index 48df37a82d..9badaa67cb 100644
--- a/torch/lib/THD/base/TensorDescriptor.hpp
+++ b/torch/lib/THD/base/TensorDescriptor.hpp
@@ -1,7 +1,7 @@
#pragma once
#include "tensors/THTensor.hpp"
-using THDTensorDescriptor = Tensor*;
+using THDTensorDescriptor = Tensor;
#include "TensorDescriptor.h"
diff --git a/torch/lib/THD/base/tensors/THTensor.hpp b/torch/lib/THD/base/tensors/THTensor.hpp
index 70ddce0c62..bbe13bbb88 100644
--- a/torch/lib/THD/base/tensors/THTensor.hpp
+++ b/torch/lib/THD/base/tensors/THTensor.hpp
@@ -40,6 +40,8 @@ public:
virtual bool isContiguous() const override;
virtual void* data() override;
virtual const void* data() const override;
+ virtual THTensor& retain() override;
+ virtual THTensor& free() override;
virtual THTensor& resize(const std::initializer_list<long>& new_size) override;
virtual THTensor& resize(const std::vector<long>& new_size) override;
@@ -48,6 +50,7 @@ public:
virtual THTensor& add(const Tensor& source, scalar_type scalar) override;
virtual thd::TensorType type() const override;
+
private:
template<typename iterator>
THTensor& resize(const iterator& begin, const iterator& end);
diff --git a/torch/lib/THD/base/tensors/generic/THTensor.cpp b/torch/lib/THD/base/tensors/generic/THTensor.cpp
index 556cd0ca80..b6c1691213 100644
--- a/torch/lib/THD/base/tensors/generic/THTensor.cpp
+++ b/torch/lib/THD/base/tensors/generic/THTensor.cpp
@@ -105,13 +105,24 @@ auto THTensor<real>::fill(scalar_type value) -> THTensor& {
return *this;
}
+template<>
+auto THTensor<real>::retain() -> THTensor& {
+ THTensor_(retain)(tensor);
+ return *this;
+}
+
+template<>
+auto THTensor<real>::free() -> THTensor& {
+ THTensor_(free)(tensor);
+ return *this;
+}
+
#define non_const_cast(tensor) const_cast<THTensor&>(dynamic_cast<const THTensor&>(tensor))
template<>
auto THTensor<real>::add(const Tensor &source, scalar_type value) -> THTensor& {
THTensor &source_t = non_const_cast(source);
THTensor_(add)(tensor, source_t.tensor, value);
- return *this;
}
template<>
diff --git a/torch/lib/THD/master_worker/worker/Worker.h b/torch/lib/THD/master_worker/worker/Worker.h
index c469266118..1a2f708ed4 100644
--- a/torch/lib/THD/master_worker/worker/Worker.h
+++ b/torch/lib/THD/master_worker/worker/Worker.h
@@ -1,5 +1,5 @@
#pragma once
-#include "THD.h"
+#include "../../THD.h"
THD_API void THDWorkerMain();
diff --git a/torch/lib/THD/process_group/Collectives.cpp b/torch/lib/THD/process_group/Collectives.cpp
index 67f11db429..cb668a9469 100644
--- a/torch/lib/THD/process_group/Collectives.cpp
+++ b/torch/lib/THD/process_group/Collectives.cpp
@@ -1,7 +1,7 @@
#include "Collectives.hpp"
#include "General.hpp"
-namespace thd {
+using namespace thd;
int THDGetRank() {
return dataChannel->getRank();
@@ -11,24 +11,22 @@ int THDGetNumProcesses() {
return dataChannel->getNumProcesses();
}
-void THDAllReduce(THDTensorDescriptor desc, THDReduceOp operation) {
+void THDAllReduce(THDTensorDescriptor* desc, THDReduceOp operation) {
dataChannel->allReduce(*desc, operation);
}
-void THDReduce(THDTensorDescriptor desc, THDReduceOp operation, int dst_rank) {
+void THDReduce(THDTensorDescriptor* desc, THDReduceOp operation, int dst_rank) {
dataChannel->reduce(*desc, operation, dst_rank);
}
-void THDBroadcast(THDTensorDescriptor desc, int src_rank) {
+void THDBroadcast(THDTensorDescriptor* desc, int src_rank) {
dataChannel->broadcast(*desc, src_rank);
}
-void THDSend(THDTensorDescriptor desc, int dst_rank) {
+void THDSend(THDTensorDescriptor* desc, int dst_rank) {
dataChannel->send(*desc, dst_rank);
}
-void THDReceive(THDTensorDescriptor desc, int src_rank) {
+void THDReceive(THDTensorDescriptor* desc, int src_rank) {
dataChannel->receive(*desc, src_rank);
}
-
-} // namespace thd
diff --git a/torch/lib/THD/process_group/Collectives.h b/torch/lib/THD/process_group/Collectives.h
index 10a4636eb6..b8168028ca 100644
--- a/torch/lib/THD/process_group/Collectives.h
+++ b/torch/lib/THD/process_group/Collectives.h
@@ -1,12 +1,12 @@
#pragma once
#include "../THD.h"
-#include "base/DataChannel.h"
+#include "../base/DataChannel.h"
THD_API int THDGetRank();
THD_API int THDGetNumProcesses();
-THD_API void THDAllReduce(THDTensorDescriptor desc, THDReduceOp operation);
-THD_API void THDReduce(THDTensorDescriptor desc, THDReduceOp operation, int dst_rank);
-THD_API void THDBroadcast(THDTensorDescriptor desc, int src_rank);
-THD_API void THDSend(THDTensorDescriptor desc, int dst_rank);
-THD_API void THDReceive(THDTensorDescriptor desc, int src_rank);
+THD_API void THDAllReduce(THDTensorDescriptor* desc, THDReduceOp operation);
+THD_API void THDReduce(THDTensorDescriptor* desc, THDReduceOp operation, int dst_rank);
+THD_API void THDBroadcast(THDTensorDescriptor* desc, int src_rank);
+THD_API void THDSend(THDTensorDescriptor* desc, int dst_rank);
+THD_API void THDReceive(THDTensorDescriptor* desc, int src_rank);
diff --git a/torch/lib/build_all.sh b/torch/lib/build_all.sh
index f5144da4e4..3699f35bef 100755
--- a/torch/lib/build_all.sh
+++ b/torch/lib/build_all.sh
@@ -68,6 +68,7 @@ mkdir -p tmp_install
build TH
build THS
build THNN
+build THD
if [[ "$1" == "--with-cuda" ]]; then
build THC