diff options
-rw-r--r-- | setup.py | 9 | ||||
-rw-r--r-- | torch/csrc/Module.cpp | 7 | ||||
-rw-r--r-- | torch/csrc/distributed/Module.cpp | 196 | ||||
-rw-r--r-- | torch/csrc/distributed/THDP.h | 10 | ||||
-rw-r--r-- | torch/csrc/distributed/utils.cpp | 9 | ||||
-rw-r--r-- | torch/csrc/distributed/utils.h | 6 | ||||
-rw-r--r-- | torch/distributed/__init__.py | 49 | ||||
-rw-r--r-- | torch/lib/THD/THD.h | 3 | ||||
-rw-r--r-- | torch/lib/THD/base/Tensor.hpp | 2 | ||||
-rw-r--r-- | torch/lib/THD/base/TensorDescriptor.cpp | 25 | ||||
-rw-r--r-- | torch/lib/THD/base/TensorDescriptor.h | 18 | ||||
-rw-r--r-- | torch/lib/THD/base/TensorDescriptor.hpp | 2 | ||||
-rw-r--r-- | torch/lib/THD/base/tensors/THTensor.hpp | 3 | ||||
-rw-r--r-- | torch/lib/THD/base/tensors/generic/THTensor.cpp | 13 | ||||
-rw-r--r-- | torch/lib/THD/master_worker/worker/Worker.h | 2 | ||||
-rw-r--r-- | torch/lib/THD/process_group/Collectives.cpp | 14 | ||||
-rw-r--r-- | torch/lib/THD/process_group/Collectives.h | 12 | ||||
-rwxr-xr-x | torch/lib/build_all.sh | 1 |
18 files changed, 348 insertions, 33 deletions
@@ -14,6 +14,7 @@ from tools.setup_helpers.env import check_env_flag from tools.setup_helpers.cuda import WITH_CUDA, CUDA_HOME from tools.setup_helpers.cudnn import WITH_CUDNN, CUDNN_LIB_DIR, CUDNN_INCLUDE_DIR DEBUG = check_env_flag('DEBUG') +WITH_DISTRIBUTED = check_env_flag('WITH_DISTRIBUTED') ################################################################################ # Monkey-patch setuptools to compile in parallel @@ -230,6 +231,14 @@ try: except ImportError: WITH_NUMPY = False +if WITH_DISTRIBUTED: + extra_compile_args += ['-DWITH_DISTRIBUTED'] + main_sources += [ + "torch/csrc/distributed/Module.cpp", + "torch/csrc/distributed/utils.cpp" + ] + main_libraries += ['THD'] + if WITH_CUDA: cuda_lib_dirs = ['lib64', 'lib'] cuda_include_path = os.path.join(CUDA_HOME, 'include') diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index 3538a0574c..912d27fb36 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -699,6 +699,10 @@ bool THCSPByteTensor_init(PyObject *module); static std::vector<PyMethodDef> methods; +#ifdef WITH_DISTRIBUTED +PyMethodDef* THDPModule_methods(); +#endif + #if PY_MAJOR_VERSION == 2 PyMODINIT_FUNC init_C() #else @@ -716,6 +720,9 @@ PyMODINIT_FUNC PyInit__C() #ifdef WITH_CUDNN THPUtils_addPyMethodDefs(methods, THCUDNN_methods()); #endif +#ifdef WITH_DISTRIBUTED + THPUtils_addPyMethodDefs(methods, THDPModule_methods()); +#endif #if PY_MAJOR_VERSION == 2 ASSERT_TRUE(module = Py_InitModule("torch._C", methods.data())); diff --git a/torch/csrc/distributed/Module.cpp b/torch/csrc/distributed/Module.cpp new file mode 100644 index 0000000000..af87152c88 --- /dev/null +++ b/torch/csrc/distributed/Module.cpp @@ -0,0 +1,196 @@ +#include <Python.h> + +#include <unordered_map> + +#include "THDP.h" + +static std::unordered_map<std::string, THDChannelType> name2channel_type = { + {"mpi", THDChannelMPI}, + {"tcp", THDChannelTCP}, +}; + +static std::unordered_map<PyObject*, THDReduceOp> obj2reduceop; + +static THPObjectPtr _ensureBytes(PyObject *obj) +{ +#if PY_MAJOR_VERSION == 2 + if (PyString_Check(obj)) { +#elif PY_MAJOR_VERSION == 3 + if (PyBytes_Check(obj)) { +#endif + Py_INCREF(obj); + return obj; + } + if (PyUnicode_Check(obj)) { + return PyUnicode_AsASCIIString(obj); + } + return NULL; +} + +PyObject* THDPModule_initProcessGroup(PyObject *_unused, PyObject *_backend) +{ + HANDLE_TH_ERRORS + THPObjectPtr backend_bytes = _ensureBytes(_backend); + THPUtils_assert(backend_bytes, "backend argument has to be a string/bytes " + "object, but got %s", THPUtils_typename(_backend)); + char *backend_name = THPUtils_bytesAsString(backend_bytes.get()); + THDChannelType channel_type = name2channel_type.at(backend_name); + THPUtils_assert(THDProcessGroupInit(channel_type), "failed to initialize " + "distributed library (THD)"); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +PyObject* THDPModule_getRank(PyObject *_unused) +{ + HANDLE_TH_ERRORS + return PyInt_FromLong(THDGetRank()); + END_HANDLE_TH_ERRORS +} + +PyObject* THDPModule_getNumProcesses(PyObject *_unused) +{ + HANDLE_TH_ERRORS + return PyInt_FromLong(THDGetNumProcesses()); + END_HANDLE_TH_ERRORS +} + +static THDTensorDescriptor* _makeDescriptor(PyObject *obj) +{ + PyObject *type = (PyObject*)Py_TYPE(obj); +#define REGISTER_TH_DESCRIPTOR(TYPE) \ + if (type == THP##TYPE##Class) \ + return THDTensorDescriptor_newFromTH##TYPE(((THP##TYPE*)obj)->cdata); + REGISTER_TH_DESCRIPTOR(DoubleTensor); + REGISTER_TH_DESCRIPTOR(FloatTensor); + REGISTER_TH_DESCRIPTOR(LongTensor); + REGISTER_TH_DESCRIPTOR(IntTensor); + REGISTER_TH_DESCRIPTOR(ShortTensor); + REGISTER_TH_DESCRIPTOR(CharTensor); + REGISTER_TH_DESCRIPTOR(ByteTensor); +#undef REGISTER_TH_DESCRIPTOR + throw std::runtime_error(std::string("don't know how to create a THDTensorDesciptor for " + "type ") + std::string(THPUtils_typename(obj))); +} + +static THDReduceOp _getReduceOp(PyObject *obj) +{ + auto it = obj2reduceop.find(obj); + if (it == obj2reduceop.end()) { + throw std::runtime_error("op should be a constant from " + "torch.distributed.reduce_op"); + } + return it->second; +} + +PyObject* THDPModule_send(PyObject *_unused, PyObject *args) +{ + HANDLE_TH_ERRORS + if (PyTuple_GET_SIZE(args) != 2 || !THPModule_isTensor(PyTuple_GET_ITEM(args, 0)) || + !THPUtils_checkLong(PyTuple_GET_ITEM(args, 1))) { + THPUtils_invalidArguments(args, "send", 1, "(tensor input, int dst_rank)"); + return NULL; + } + + THDPTensorDesc desc = _makeDescriptor(PyTuple_GET_ITEM(args, 0)); + int dst_rank = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 1)); + THDSend(desc, dst_rank); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +PyObject* THDPModule_recv(PyObject *_unused, PyObject *args) +{ + HANDLE_TH_ERRORS + if (PyTuple_GET_SIZE(args) != 2 || !THPModule_isTensor(PyTuple_GET_ITEM(args, 0)) || + !THPUtils_checkLong(PyTuple_GET_ITEM(args, 1))) { + THPUtils_invalidArguments(args, "recv", 1, "(tensor output, int src_rank)"); + return NULL; + } + + THDPTensorDesc desc = _makeDescriptor(PyTuple_GET_ITEM(args, 0)); + int src_rank = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 1)); + THDReceive(desc, src_rank); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +PyObject* THDPModule_allReduce(PyObject *_unused, PyObject *args) +{ + HANDLE_TH_ERRORS + if (PyTuple_GET_SIZE(args) != 2 || !THPModule_isTensor(PyTuple_GET_ITEM(args, 0))) { + THPUtils_invalidArguments(args, "all_reduce", 1, "(tensor in_out, reduce_op op)"); + return NULL; + } + + THDReduceOp op = _getReduceOp(PyTuple_GET_ITEM(args, 1)); + THDPTensorDesc desc = _makeDescriptor(PyTuple_GET_ITEM(args, 0)); + THDAllReduce(desc, op); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +PyObject* THDPModule_reduce(PyObject *_unused, PyObject *args) +{ + HANDLE_TH_ERRORS + if (PyTuple_GET_SIZE(args) != 3 || !THPModule_isTensor(PyTuple_GET_ITEM(args, 0)) || + !THPUtils_checkLong(PyTuple_GET_ITEM(args, 1))) { + THPUtils_invalidArguments(args, "reduce", 1, + "(tensor reduced, int dst_rank, reduce_op op)"); + return NULL; + } + + THDReduceOp op = _getReduceOp(PyTuple_GET_ITEM(args, 2)); + THDPTensorDesc desc = _makeDescriptor(PyTuple_GET_ITEM(args, 0)); + int dst_rank = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 1)); + THDReduce(desc, op, dst_rank); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +PyObject* THDPModule_broadcast(PyObject *_unused, PyObject *args) +{ + HANDLE_TH_ERRORS + if (PyTuple_GET_SIZE(args) != 2 || !THPModule_isTensor(PyTuple_GET_ITEM(args, 0)) || + !THPUtils_checkLong(PyTuple_GET_ITEM(args, 1))) { + THPUtils_invalidArguments(args, "broadcast", 1, "(tensor src_dst, int src_rank)"); + return NULL; + } + + THDPTensorDesc desc = _makeDescriptor(PyTuple_GET_ITEM(args, 0)); + int src_rank = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 1)); + THDBroadcast(desc, src_rank); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +PyObject* THDPModule_initExtension(PyObject *_unused, PyObject *reduce_op_obj) { + THPObjectPtr reduce_op; +#define REGISTER_REDUCE_OP(NAME) \ + reduce_op = PyObject_GetAttrString(reduce_op_obj, #NAME); \ + THPUtils_assert(reduce_op, "Missing object for reduce op " #NAME); \ + obj2reduceop.emplace(reduce_op.get(), THDReduce##NAME); + REGISTER_REDUCE_OP(SUM); + REGISTER_REDUCE_OP(PRODUCT); + REGISTER_REDUCE_OP(MIN); + REGISTER_REDUCE_OP(MAX); +#undef REGISTER_REDUCE_OP + Py_RETURN_TRUE; +} + +static struct PyMethodDef _THDPModule_methods[] = { + {"_dist_init_extension", (PyCFunction)THDPModule_initExtension, METH_O, NULL}, + {"_dist_init_process_group", (PyCFunction)THDPModule_initProcessGroup, METH_O, NULL}, + {"_dist_get_rank", (PyCFunction)THDPModule_getRank, METH_NOARGS, NULL}, + {"_dist_get_num_processes", (PyCFunction)THDPModule_getNumProcesses, METH_NOARGS, NULL}, + {"_dist_send", (PyCFunction)THDPModule_send, METH_VARARGS, NULL}, + {"_dist_recv", (PyCFunction)THDPModule_recv, METH_VARARGS, NULL}, + {"_dist_all_reduce", (PyCFunction)THDPModule_allReduce, METH_VARARGS, NULL}, + {"_dist_reduce", (PyCFunction)THDPModule_reduce, METH_VARARGS, NULL}, + {"_dist_broadcast", (PyCFunction)THDPModule_broadcast, METH_VARARGS, NULL}, + {NULL} +}; + +PyMethodDef* THDPModule_methods() { + return _THDPModule_methods; +} diff --git a/torch/csrc/distributed/THDP.h b/torch/csrc/distributed/THDP.h new file mode 100644 index 0000000000..7acc1f10d7 --- /dev/null +++ b/torch/csrc/distributed/THDP.h @@ -0,0 +1,10 @@ +#ifndef THDP_H +#define THDP_H + +#include <THD/THD.h> + +#include "torch/csrc/THP.h" +#include "Module.h" +#include "utils.h" + +#endif diff --git a/torch/csrc/distributed/utils.cpp b/torch/csrc/distributed/utils.cpp new file mode 100644 index 0000000000..ff06c02f67 --- /dev/null +++ b/torch/csrc/distributed/utils.cpp @@ -0,0 +1,9 @@ +#include "utils.h" + +template<> +void THPPointer<THDTensorDescriptor>::free() { + if (ptr) + THDTensorDescriptor_free(ptr); +} + +template class THPPointer<THDTensorDescriptor>; diff --git a/torch/csrc/distributed/utils.h b/torch/csrc/distributed/utils.h new file mode 100644 index 0000000000..5a73fa93dd --- /dev/null +++ b/torch/csrc/distributed/utils.h @@ -0,0 +1,6 @@ +#include <Python.h> + +#include "THDP.h" + +typedef THPPointer<THDTensorDescriptor> THDPTensorDesc; + diff --git a/torch/distributed/__init__.py b/torch/distributed/__init__.py new file mode 100644 index 0000000000..3d566e25d9 --- /dev/null +++ b/torch/distributed/__init__.py @@ -0,0 +1,49 @@ +import torch + + +_initialized = False + + +def init_process_group(backend): + global _initialized + if _initialized: + raise RuntimeError("trying to initialize torch.distributed twice!") + torch._C._dist_init_process_group(backend) + _initialized = True + + +class reduce_op(object): + SUM = object() + PRODUCT = object() + MAX = object() + MIN = object() + +def get_rank(): + return torch._C._dist_get_rank() + + +def get_num_processes(): + return torch._C._dist_get_num_processes() + + +def send(tensor, dst_rank): + return torch._C._dist_send(tensor, dst_rank) + + +def recv(tensor, src_rank): + return torch._C._dist_recv(tensor, src_rank) + + +def broadcast(tensor, src_rank): + return torch._C._dist_broadcast(tensor, src_rank) + + +def all_reduce(tensor, op=reduce_op.SUM): + return torch._C._dist_all_reduce(tensor, op) + + +def reduce(tensor, dst_rank, op=reduce_op.SUM): + return torch._C._dist_reduce(tensor, dst_rank, op) + + +assert torch._C._dist_init_extension(reduce_op) diff --git a/torch/lib/THD/THD.h b/torch/lib/THD/THD.h index f546ba0557..1653ffb863 100644 --- a/torch/lib/THD/THD.h +++ b/torch/lib/THD/THD.h @@ -1,6 +1,6 @@ #pragma once -#ifdef __cplusplus__ +#ifdef __cplusplus #define THD_API extern "C" #else #define THD_API @@ -14,6 +14,7 @@ #include "base/ChannelType.h" #include "process_group/General.h" +#include "process_group/Collectives.h" #include "master_worker/master/Master.h" #include "master_worker/master/State.h" diff --git a/torch/lib/THD/base/Tensor.hpp b/torch/lib/THD/base/Tensor.hpp index 8abcdc2f17..ebd1cca054 100644 --- a/torch/lib/THD/base/Tensor.hpp +++ b/torch/lib/THD/base/Tensor.hpp @@ -140,6 +140,8 @@ struct Tensor { virtual bool isContiguous() const = 0; virtual void* data() = 0; virtual const void* data() const = 0; + virtual Tensor& retain() = 0; + virtual Tensor& free() = 0; virtual Tensor& resize(const std::initializer_list<long>& new_size) = 0; virtual Tensor& resize(const std::vector<long>& new_size) = 0; diff --git a/torch/lib/THD/base/TensorDescriptor.cpp b/torch/lib/THD/base/TensorDescriptor.cpp index 0575f3f62f..11f54c68fd 100644 --- a/torch/lib/THD/base/TensorDescriptor.cpp +++ b/torch/lib/THD/base/TensorDescriptor.cpp @@ -1,30 +1,41 @@ #include "TensorDescriptor.hpp" -THDTensorDescriptor THDTensorDescriptor_newFromTHDoubleTensor(THDoubleTensor *tensor) { +THDTensorDescriptor* THDTensorDescriptor_newFromTHDoubleTensor(THDoubleTensor *tensor) { + THDoubleTensor_retain(tensor); return new THTensor<double>(tensor); } -THDTensorDescriptor THDTensorDescriptor_newFromTHFloatTensor(THFloatTensor *tensor) { +THDTensorDescriptor* THDTensorDescriptor_newFromTHFloatTensor(THFloatTensor *tensor) { + THFloatTensor_retain(tensor); return new THTensor<float>(tensor); } -THDTensorDescriptor THDTensorDescriptor_newFromTHLongTensor(THLongTensor *tensor) { +THDTensorDescriptor* THDTensorDescriptor_newFromTHLongTensor(THLongTensor *tensor) { + THLongTensor_retain(tensor); return new THTensor<long>(tensor); } -THDTensorDescriptor THDTensorDescriptor_newFromTHIntTensor(THIntTensor *tensor) { +THDTensorDescriptor* THDTensorDescriptor_newFromTHIntTensor(THIntTensor *tensor) { + THIntTensor_retain(tensor); return new THTensor<int>(tensor); } -THDTensorDescriptor THDTensorDescriptor_newFromTHShortTensor(THShortTensor *tensor) { +THDTensorDescriptor* THDTensorDescriptor_newFromTHShortTensor(THShortTensor *tensor) { + THShortTensor_retain(tensor); return new THTensor<short>(tensor); } -THDTensorDescriptor THDTensorDescriptor_newFromTHCharTensor(THCharTensor *tensor) { +THDTensorDescriptor* THDTensorDescriptor_newFromTHCharTensor(THCharTensor *tensor) { + THCharTensor_retain(tensor); return new THTensor<char>(tensor); } -THDTensorDescriptor THDTensorDescriptor_newFromTHByteTensor(THByteTensor *tensor) { +THDTensorDescriptor* THDTensorDescriptor_newFromTHByteTensor(THByteTensor *tensor) { + THByteTensor_retain(tensor); return new THTensor<unsigned char>(tensor); } + +THD_API void THDTensorDescriptor_free(THDTensorDescriptor* desc) { + delete desc; +} diff --git a/torch/lib/THD/base/TensorDescriptor.h b/torch/lib/THD/base/TensorDescriptor.h index f801cd0693..e129bebc58 100644 --- a/torch/lib/THD/base/TensorDescriptor.h +++ b/torch/lib/THD/base/TensorDescriptor.h @@ -1,16 +1,18 @@ #pragma once +#include "../THD.h" #include <TH/TH.h> #ifndef _THD_CORE struct _THDTensorDescriptor; -typedef struct _THDTensorDescriptor* THDTensorDescriptor; +typedef struct _THDTensorDescriptor THDTensorDescriptor; #endif -THDTensorDescriptor THDTensorDescriptor_newFromTHDoubleTensor(THDoubleTensor *tensor); -THDTensorDescriptor THDTensorDescriptor_newFromTHFloatTensor(THFloatTensor *tensor); -THDTensorDescriptor THDTensorDescriptor_newFromTHLongTensor(THFloatTensor *tensor); -THDTensorDescriptor THDTensorDescriptor_newFromTHIntTensor(THFloatTensor *tensor); -THDTensorDescriptor THDTensorDescriptor_newFromTHShortTensor(THFloatTensor *tensor); -THDTensorDescriptor THDTensorDescriptor_newFromTHCharTensor(THFloatTensor *tensor); -THDTensorDescriptor THDTensorDescriptor_newFromTHByteTensor(THFloatTensor *tensor); +THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHDoubleTensor(THDoubleTensor *tensor); +THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHFloatTensor(THFloatTensor *tensor); +THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHLongTensor(THLongTensor *tensor); +THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHIntTensor(THIntTensor *tensor); +THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHShortTensor(THShortTensor *tensor); +THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHCharTensor(THCharTensor *tensor); +THD_API THDTensorDescriptor* THDTensorDescriptor_newFromTHByteTensor(THByteTensor *tensor); +THD_API void THDTensorDescriptor_free(THDTensorDescriptor* desc); diff --git a/torch/lib/THD/base/TensorDescriptor.hpp b/torch/lib/THD/base/TensorDescriptor.hpp index 48df37a82d..9badaa67cb 100644 --- a/torch/lib/THD/base/TensorDescriptor.hpp +++ b/torch/lib/THD/base/TensorDescriptor.hpp @@ -1,7 +1,7 @@ #pragma once #include "tensors/THTensor.hpp" -using THDTensorDescriptor = Tensor*; +using THDTensorDescriptor = Tensor; #include "TensorDescriptor.h" diff --git a/torch/lib/THD/base/tensors/THTensor.hpp b/torch/lib/THD/base/tensors/THTensor.hpp index 70ddce0c62..bbe13bbb88 100644 --- a/torch/lib/THD/base/tensors/THTensor.hpp +++ b/torch/lib/THD/base/tensors/THTensor.hpp @@ -40,6 +40,8 @@ public: virtual bool isContiguous() const override; virtual void* data() override; virtual const void* data() const override; + virtual THTensor& retain() override; + virtual THTensor& free() override; virtual THTensor& resize(const std::initializer_list<long>& new_size) override; virtual THTensor& resize(const std::vector<long>& new_size) override; @@ -48,6 +50,7 @@ public: virtual THTensor& add(const Tensor& source, scalar_type scalar) override; virtual thd::TensorType type() const override; + private: template<typename iterator> THTensor& resize(const iterator& begin, const iterator& end); diff --git a/torch/lib/THD/base/tensors/generic/THTensor.cpp b/torch/lib/THD/base/tensors/generic/THTensor.cpp index 556cd0ca80..b6c1691213 100644 --- a/torch/lib/THD/base/tensors/generic/THTensor.cpp +++ b/torch/lib/THD/base/tensors/generic/THTensor.cpp @@ -105,13 +105,24 @@ auto THTensor<real>::fill(scalar_type value) -> THTensor& { return *this; } +template<> +auto THTensor<real>::retain() -> THTensor& { + THTensor_(retain)(tensor); + return *this; +} + +template<> +auto THTensor<real>::free() -> THTensor& { + THTensor_(free)(tensor); + return *this; +} + #define non_const_cast(tensor) const_cast<THTensor&>(dynamic_cast<const THTensor&>(tensor)) template<> auto THTensor<real>::add(const Tensor &source, scalar_type value) -> THTensor& { THTensor &source_t = non_const_cast(source); THTensor_(add)(tensor, source_t.tensor, value); - return *this; } template<> diff --git a/torch/lib/THD/master_worker/worker/Worker.h b/torch/lib/THD/master_worker/worker/Worker.h index c469266118..1a2f708ed4 100644 --- a/torch/lib/THD/master_worker/worker/Worker.h +++ b/torch/lib/THD/master_worker/worker/Worker.h @@ -1,5 +1,5 @@ #pragma once -#include "THD.h" +#include "../../THD.h" THD_API void THDWorkerMain(); diff --git a/torch/lib/THD/process_group/Collectives.cpp b/torch/lib/THD/process_group/Collectives.cpp index 67f11db429..cb668a9469 100644 --- a/torch/lib/THD/process_group/Collectives.cpp +++ b/torch/lib/THD/process_group/Collectives.cpp @@ -1,7 +1,7 @@ #include "Collectives.hpp" #include "General.hpp" -namespace thd { +using namespace thd; int THDGetRank() { return dataChannel->getRank(); @@ -11,24 +11,22 @@ int THDGetNumProcesses() { return dataChannel->getNumProcesses(); } -void THDAllReduce(THDTensorDescriptor desc, THDReduceOp operation) { +void THDAllReduce(THDTensorDescriptor* desc, THDReduceOp operation) { dataChannel->allReduce(*desc, operation); } -void THDReduce(THDTensorDescriptor desc, THDReduceOp operation, int dst_rank) { +void THDReduce(THDTensorDescriptor* desc, THDReduceOp operation, int dst_rank) { dataChannel->reduce(*desc, operation, dst_rank); } -void THDBroadcast(THDTensorDescriptor desc, int src_rank) { +void THDBroadcast(THDTensorDescriptor* desc, int src_rank) { dataChannel->broadcast(*desc, src_rank); } -void THDSend(THDTensorDescriptor desc, int dst_rank) { +void THDSend(THDTensorDescriptor* desc, int dst_rank) { dataChannel->send(*desc, dst_rank); } -void THDReceive(THDTensorDescriptor desc, int src_rank) { +void THDReceive(THDTensorDescriptor* desc, int src_rank) { dataChannel->receive(*desc, src_rank); } - -} // namespace thd diff --git a/torch/lib/THD/process_group/Collectives.h b/torch/lib/THD/process_group/Collectives.h index 10a4636eb6..b8168028ca 100644 --- a/torch/lib/THD/process_group/Collectives.h +++ b/torch/lib/THD/process_group/Collectives.h @@ -1,12 +1,12 @@ #pragma once #include "../THD.h" -#include "base/DataChannel.h" +#include "../base/DataChannel.h" THD_API int THDGetRank(); THD_API int THDGetNumProcesses(); -THD_API void THDAllReduce(THDTensorDescriptor desc, THDReduceOp operation); -THD_API void THDReduce(THDTensorDescriptor desc, THDReduceOp operation, int dst_rank); -THD_API void THDBroadcast(THDTensorDescriptor desc, int src_rank); -THD_API void THDSend(THDTensorDescriptor desc, int dst_rank); -THD_API void THDReceive(THDTensorDescriptor desc, int src_rank); +THD_API void THDAllReduce(THDTensorDescriptor* desc, THDReduceOp operation); +THD_API void THDReduce(THDTensorDescriptor* desc, THDReduceOp operation, int dst_rank); +THD_API void THDBroadcast(THDTensorDescriptor* desc, int src_rank); +THD_API void THDSend(THDTensorDescriptor* desc, int dst_rank); +THD_API void THDReceive(THDTensorDescriptor* desc, int src_rank); diff --git a/torch/lib/build_all.sh b/torch/lib/build_all.sh index f5144da4e4..3699f35bef 100755 --- a/torch/lib/build_all.sh +++ b/torch/lib/build_all.sh @@ -68,6 +68,7 @@ mkdir -p tmp_install build TH build THS build THNN +build THD if [[ "$1" == "--with-cuda" ]]; then build THC |