summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTongzhou Wang <SsnL@users.noreply.github.com>2018-01-27 04:54:25 -0500
committerSoumith Chintala <soumith@gmail.com>2018-01-27 04:54:25 -0500
commit6420c6b224e943c40952852069e6a006fe88a338 (patch)
tree633cde3c677ca8026abc1fe0aa63aad6d537e0ff
parentfe9121ff59484c529e85fa139fc7b945722a1941 (diff)
downloadpytorch-6420c6b224e943c40952852069e6a006fe88a338.tar.gz
pytorch-6420c6b224e943c40952852069e6a006fe88a338.tar.bz2
pytorch-6420c6b224e943c40952852069e6a006fe88a338.zip
Improve `torch.cuda.empty_cache` documentation (#4879)
* add doc about empty_cache wont increase amount of memory available * typo
-rw-r--r--docs/source/notes/cuda.rst9
-rw-r--r--torch/cuda/__init__.py60
2 files changed, 46 insertions, 23 deletions
diff --git a/docs/source/notes/cuda.rst b/docs/source/notes/cuda.rst
index f3702986f6..a435edfdbd 100644
--- a/docs/source/notes/cuda.rst
+++ b/docs/source/notes/cuda.rst
@@ -92,6 +92,8 @@ ensure proper synchronization.
.. _CUDA stream: http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#streams
+.. _cuda-memory-management:
+
Memory management
-----------------
@@ -102,9 +104,10 @@ unused memory managed by the allocator will still show as if used in
:meth:`~torch.cuda.max_memory_allocated` to monitor memory occupied by
tensors, and use :meth:`~torch.cuda.memory_cached` and
:meth:`~torch.cuda.max_memory_cached` to monitor memory managed by the caching
-allocator. Calling :meth:`~torch.cuda.empty_cache` can release all unused cached
-memory from PyTorch so that those can be used by other GPU applications.
-
+allocator. Calling :meth:`~torch.cuda.empty_cache` can release all **unused**
+cached memory from PyTorch so that those can be used by other GPU applications.
+However, the occupied GPU memory by tensors will not be freed so it can not
+increase the amount of GPU memory available for PyTorch.
Best practices
--------------
diff --git a/torch/cuda/__init__.py b/torch/cuda/__init__.py
index 411a3e25b4..a5e36fc857 100644
--- a/torch/cuda/__init__.py
+++ b/torch/cuda/__init__.py
@@ -1,4 +1,4 @@
-"""
+r"""
This package adds support for CUDA tensor types, that implement the same
function as CPU tensors, but they utilize GPUs for computation.
@@ -42,7 +42,7 @@ def find_cuda_windows_lib():
def is_available():
- """Returns a bool indicating if CUDA is currently available."""
+ r"""Returns a bool indicating if CUDA is currently available."""
if (not hasattr(torch._C, '_cuda_isDriverSufficient') or
not torch._C._cuda_isDriverSufficient()):
return False
@@ -123,7 +123,7 @@ class DeferredCudaCallError(Exception):
def init():
- """Initialize PyTorch's CUDA state. You may need to call
+ r"""Initialize PyTorch's CUDA state. You may need to call
this explicitly if you are interacting with PyTorch via
its C API, as Python bindings for CUDA functionality will not
be until this initialization takes place. Ordinary users
@@ -201,7 +201,7 @@ def check_error(res):
class device(object):
- """Context-manager that changes the selected device.
+ r"""Context-manager that changes the selected device.
Arguments:
idx (int): device index to select. It's a no-op if this argument
@@ -227,7 +227,7 @@ class device(object):
class device_of(device):
- """Context-manager that changes the current device to that of given object.
+ r"""Context-manager that changes the current device to that of given object.
You can use both tensors and storages as arguments. If a given object is
not allocated on a GPU, this is a no-op.
@@ -242,7 +242,7 @@ class device_of(device):
def set_device(device):
- """Sets the current device.
+ r"""Sets the current device.
Usage of this function is discouraged in favor of :any:`device`. In most
cases it's better to use ``CUDA_VISIBLE_DEVICES`` environmental variable.
@@ -256,7 +256,7 @@ def set_device(device):
def get_device_name(device):
- """Gets the name of a device.
+ r"""Gets the name of a device.
Arguments:
device (int): device for which to return the name. This function is a
@@ -267,7 +267,7 @@ def get_device_name(device):
def get_device_capability(device):
- """Gets the cuda capability of a device.
+ r"""Gets the cuda capability of a device.
Arguments:
device (int): device for which to return the name. This function is a
@@ -281,7 +281,7 @@ def get_device_capability(device):
@contextlib.contextmanager
def stream(stream):
- """Context-manager that selects a given stream.
+ r"""Context-manager that selects a given stream.
All CUDA kernels queued within its context will be enqueued on a selected
stream.
@@ -315,37 +315,43 @@ def device_count():
def current_device():
- """Returns the index of a currently selected device."""
+ r"""Returns the index of a currently selected device."""
_lazy_init()
return torch._C._cuda_getDevice()
def synchronize():
- """Waits for all kernels in all streams on current device to complete."""
+ r"""Waits for all kernels in all streams on current device to complete."""
_lazy_init()
return torch._C._cuda_synchronize()
def current_stream():
- """Returns a currently selected :class:`Stream`."""
+ r"""Returns a currently selected :class:`Stream`."""
_lazy_init()
return torch.cuda.Stream(_cdata=torch._C._cuda_getCurrentStream())
def current_blas_handle():
- """Returns cublasHandle_t pointer to current cuBLAS handle"""
+ r"""Returns cublasHandle_t pointer to current cuBLAS handle"""
return torch._C._cuda_getCurrentBlasHandle()
def empty_cache():
- """Releases all unoccupied cached memory currently held by the caching
+ r"""Releases all unoccupied cached memory currently held by the caching
allocator so that those can be used in other GPU application and visible in
- `nvidia-smi`."""
+ `nvidia-smi`.
+
+ .. note::
+ :meth:`~torch.cuda.empty_cache` doesn't increase the amount of GPU
+ memory available for PyTorch. See :ref:`cuda-memory-management` for
+ more details about GPU memory management.
+ """
return torch._C._cuda_emptyCache()
def memory_allocated(device=None):
- """Returns the current GPU memory usage by tensors in bytes for a given
+ r"""Returns the current GPU memory usage by tensors in bytes for a given
device.
Arguments:
@@ -356,14 +362,16 @@ def memory_allocated(device=None):
.. note:: This is likely less than the amount shown in `nvidia-smi` since
some unused memory can be held by the caching allocator and some context
- needs to be created on GPU. """
+ needs to be created on GPU. See :ref:`cuda-memory-management` for more
+ details about GPU memory management.
+ """
if device is None:
device = current_device()
return torch._C._cuda_memoryAllocated(device)
def max_memory_allocated(device=None):
- """Returns the maxium GPU memory usage by tensors in bytes for a given
+ r"""Returns the maxium GPU memory usage by tensors in bytes for a given
device.
Arguments:
@@ -371,6 +379,10 @@ def max_memory_allocated(device=None):
current device, given by
:meth:`~torch.cuda.current_device`, if
:attr:`device` is ``None`` (default).
+
+ .. note::
+ See :ref:`cuda-memory-management` for more details about GPU memory
+ management.
"""
if device is None:
device = current_device()
@@ -378,7 +390,7 @@ def max_memory_allocated(device=None):
def memory_cached(device=None):
- """Returns the current GPU memory managed by the caching allocator in bytes
+ r"""Returns the current GPU memory managed by the caching allocator in bytes
for a given device.
Arguments:
@@ -386,6 +398,10 @@ def memory_cached(device=None):
current device, given by
:meth:`~torch.cuda.current_device`, if
:attr:`device` is ``None`` (default).
+
+ .. note::
+ See :ref:`cuda-memory-management` for more details about GPU memory
+ management.
"""
if device is None:
device = current_device()
@@ -393,7 +409,7 @@ def memory_cached(device=None):
def max_memory_cached(device=None):
- """Returns the maximum GPU memory managed by the caching allocator in bytes
+ r"""Returns the maximum GPU memory managed by the caching allocator in bytes
for a given device.
Arguments:
@@ -401,6 +417,10 @@ def max_memory_cached(device=None):
current device, given by
:meth:`~torch.cuda.current_device`, if
:attr:`device` is ``None`` (default).
+
+ .. note::
+ See :ref:`cuda-memory-management` for more details about GPU memory
+ management.
"""
if device is None:
device = current_device()