Improve `torch.cuda.empty_cache` documentation (#4879)

* add doc about empty_cache wont increase amount of memory available * typo
author: Tongzhou Wang <SsnL@users.noreply.github.com> 2018-01-27 04:54:25 -0500
committer: Soumith Chintala <soumith@gmail.com> 2018-01-27 04:54:25 -0500
commit: 6420c6b224e943c40952852069e6a006fe88a338 (patch)
tree: 633cde3c677ca8026abc1fe0aa63aad6d537e0ff
parent: fe9121ff59484c529e85fa139fc7b945722a1941 (diff)
download: pytorch-6420c6b224e943c40952852069e6a006fe88a338.tar.gz
pytorch-6420c6b224e943c40952852069e6a006fe88a338.tar.bz2
pytorch-6420c6b224e943c40952852069e6a006fe88a338.zip
2 files changed, 46 insertions, 23 deletions
diff --git a/docs/source/notes/cuda.rst b/docs/source/notes/cuda.rst
index f3702986f6..a435edfdbd 100644
--- a/docs/source/notes/cuda.rst
+++ b/docs/source/notes/cuda.rst
@@ -92,6 +92,8 @@ ensure proper synchronization.
 
 .. _CUDA stream: http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#streams
 
+.. _cuda-memory-management:
+
 Memory management
 -----------------
 
@@ -102,9 +104,10 @@ unused memory managed by the allocator will still show as if used in
 :meth:`~torch.cuda.max_memory_allocated` to monitor memory occupied by
 tensors, and use :meth:`~torch.cuda.memory_cached` and
 :meth:`~torch.cuda.max_memory_cached` to monitor memory managed by the caching
-allocator. Calling :meth:`~torch.cuda.empty_cache` can release all unused cached
-memory from PyTorch so that those can be used by other GPU applications.
-
+allocator. Calling :meth:`~torch.cuda.empty_cache` can release all **unused**
+cached memory from PyTorch so that those can be used by other GPU applications.
+However, the occupied GPU memory by tensors will not be freed so it can not
+increase the amount of GPU memory available for PyTorch.
 
 Best practices
 --------------
diff --git a/torch/cuda/__init__.py b/torch/cuda/__init__.py
index 411a3e25b4..a5e36fc857 100644
--- a/torch/cuda/__init__.py
+++ b/torch/cuda/__init__.py
@@ -1,4 +1,4 @@
-"""
+r"""
 This package adds support for CUDA tensor types, that implement the same
 function as CPU tensors, but they utilize GPUs for computation.
 
@@ -42,7 +42,7 @@ def find_cuda_windows_lib():
 
 
 def is_available():
-    """Returns a bool indicating if CUDA is currently available."""
+    r"""Returns a bool indicating if CUDA is currently available."""
     if (not hasattr(torch._C, '_cuda_isDriverSufficient') or
             not torch._C._cuda_isDriverSufficient()):
         return False
@@ -123,7 +123,7 @@ class DeferredCudaCallError(Exception):
 
 
 def init():
-    """Initialize PyTorch's CUDA state.  You may need to call
+    r"""Initialize PyTorch's CUDA state.  You may need to call
     this explicitly if you are interacting with PyTorch via
     its C API, as Python bindings for CUDA functionality will not
     be until this initialization takes place.  Ordinary users
@@ -201,7 +201,7 @@ def check_error(res):
 
 
 class device(object):
-    """Context-manager that changes the selected device.
+    r"""Context-manager that changes the selected device.
 
     Arguments:
         idx (int): device index to select. It's a no-op if this argument
@@ -227,7 +227,7 @@ class device(object):
 
 
 class device_of(device):
-    """Context-manager that changes the current device to that of given object.
+    r"""Context-manager that changes the current device to that of given object.
 
     You can use both tensors and storages as arguments. If a given object is
     not allocated on a GPU, this is a no-op.
@@ -242,7 +242,7 @@ class device_of(device):
 
 
 def set_device(device):
-    """Sets the current device.
+    r"""Sets the current device.
 
     Usage of this function is discouraged in favor of :any:`device`. In most
     cases it's better to use ``CUDA_VISIBLE_DEVICES`` environmental variable.
@@ -256,7 +256,7 @@ def set_device(device):
 
 
 def get_device_name(device):
-    """Gets the name of a device.
+    r"""Gets the name of a device.
 
     Arguments:
         device (int): device for which to return the name. This function is a
@@ -267,7 +267,7 @@ def get_device_name(device):
 
 
 def get_device_capability(device):
-    """Gets the cuda capability of a device.
+    r"""Gets the cuda capability of a device.
 
     Arguments:
         device (int): device for which to return the name. This function is a
@@ -281,7 +281,7 @@ def get_device_capability(device):
 
 @contextlib.contextmanager
 def stream(stream):
-    """Context-manager that selects a given stream.
+    r"""Context-manager that selects a given stream.
 
     All CUDA kernels queued within its context will be enqueued on a selected
     stream.
@@ -315,37 +315,43 @@ def device_count():
 
 
 def current_device():
-    """Returns the index of a currently selected device."""
+    r"""Returns the index of a currently selected device."""
     _lazy_init()
     return torch._C._cuda_getDevice()
 
 
 def synchronize():
-    """Waits for all kernels in all streams on current device to complete."""
+    r"""Waits for all kernels in all streams on current device to complete."""
     _lazy_init()
     return torch._C._cuda_synchronize()
 
 
 def current_stream():
-    """Returns a currently selected :class:`Stream`."""
+    r"""Returns a currently selected :class:`Stream`."""
     _lazy_init()
     return torch.cuda.Stream(_cdata=torch._C._cuda_getCurrentStream())
 
 
 def current_blas_handle():
-    """Returns cublasHandle_t pointer to current cuBLAS handle"""
+    r"""Returns cublasHandle_t pointer to current cuBLAS handle"""
     return torch._C._cuda_getCurrentBlasHandle()
 
 
 def empty_cache():
-    """Releases all unoccupied cached memory currently held by the caching
+    r"""Releases all unoccupied cached memory currently held by the caching
     allocator so that those can be used in other GPU application and visible in
-    `nvidia-smi`."""
+    `nvidia-smi`.
+
+    .. note::
+        :meth:`~torch.cuda.empty_cache` doesn't increase the amount of GPU
+        memory available for PyTorch. See :ref:`cuda-memory-management` for
+        more details about GPU memory management.
+    """
     return torch._C._cuda_emptyCache()
 
 
 def memory_allocated(device=None):
-    """Returns the current GPU memory usage by tensors in bytes for a given
+    r"""Returns the current GPU memory usage by tensors in bytes for a given
     device.
 
     Arguments:
@@ -356,14 +362,16 @@ def memory_allocated(device=None):
 
     .. note:: This is likely less than the amount shown in `nvidia-smi` since
     some unused memory can be held by the caching allocator and some context
-    needs to be created on GPU. """
+    needs to be created on GPU. See :ref:`cuda-memory-management` for more
+    details about GPU memory management.
+    """
     if device is None:
         device = current_device()
     return torch._C._cuda_memoryAllocated(device)
 
 
 def max_memory_allocated(device=None):
-    """Returns the maxium GPU memory usage by tensors in bytes for a given
+    r"""Returns the maxium GPU memory usage by tensors in bytes for a given
     device.
 
     Arguments:
@@ -371,6 +379,10 @@ def max_memory_allocated(device=None):
                                 current device, given by
                                 :meth:`~torch.cuda.current_device`, if
                                 :attr:`device` is ``None`` (default).
+
+    .. note::
+        See :ref:`cuda-memory-management` for more details about GPU memory
+        management.
     """
     if device is None:
         device = current_device()
@@ -378,7 +390,7 @@ def max_memory_allocated(device=None):
 
 
 def memory_cached(device=None):
-    """Returns the current GPU memory managed by the caching allocator in bytes
+    r"""Returns the current GPU memory managed by the caching allocator in bytes
     for a given device.
 
     Arguments:
@@ -386,6 +398,10 @@ def memory_cached(device=None):
                                 current device, given by
                                 :meth:`~torch.cuda.current_device`, if
                                 :attr:`device` is ``None`` (default).
+
+    .. note::
+        See :ref:`cuda-memory-management` for more details about GPU memory
+        management.
     """
     if device is None:
         device = current_device()
@@ -393,7 +409,7 @@ def memory_cached(device=None):
 
 
 def max_memory_cached(device=None):
-    """Returns the maximum GPU memory managed by the caching allocator in bytes
+    r"""Returns the maximum GPU memory managed by the caching allocator in bytes
     for a given device.
 
     Arguments:
@@ -401,6 +417,10 @@ def max_memory_cached(device=None):
                                 current device, given by
                                 :meth:`~torch.cuda.current_device`, if
                                 :attr:`device` is ``None`` (default).
+
+    .. note::
+        See :ref:`cuda-memory-management` for more details about GPU memory
+        management.
     """
     if device is None:
         device = current_device()
author	Tongzhou Wang <SsnL@users.noreply.github.com>	2018-01-27 04:54:25 -0500
committer	Soumith Chintala <soumith@gmail.com>	2018-01-27 04:54:25 -0500
commit	6420c6b224e943c40952852069e6a006fe88a338 (patch)
tree	633cde3c677ca8026abc1fe0aa63aad6d537e0ff
parent	fe9121ff59484c529e85fa139fc7b945722a1941 (diff)
download	pytorch-6420c6b224e943c40952852069e6a006fe88a338.tar.gz pytorch-6420c6b224e943c40952852069e6a006fe88a338.tar.bz2 pytorch-6420c6b224e943c40952852069e6a006fe88a338.zip