Add environment collection script (#6635)

* Add environment collection script Fixes #6111. This should make it easier for users to report bugs by giving them a script to collect system environment information. Changes include: - Refactor out the environment collecting code from utils.bottleneck - Add script (collect_env.py) - Cleaned up the issues template so that it suggests using the script and is more readable. Testing: added expect tests to go with 4 CI configurations. Whenever one of these configurations gets updated, the test will fail until the test also gets updated. * Expect tests * Update issue template * Fix random space * Minor improvement to issue template; fix expect test * Skip expect test if BUILD_ENVIRONMENT not found; test fix; split off smoke/expect test
author: Richard Zou <zou3519@users.noreply.github.com> 2018-04-22 15:18:14 -0400
committer: Edward Z. Yang <ezyang@mit.edu> 2018-04-22 15:18:14 -0400
commit: 7a3c38ab595ea78f16935df788c4982a0ec56966 (patch)
tree: 95fcfe023746c9063e59551df071658d23a03c74 /torch
parent: 56567fe47dafcb77222c366cca0ac73f557bf4df (diff)
download: pytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.tar.gz
pytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.tar.bz2
pytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.zip
2 files changed, 340 insertions, 61 deletions
diff --git a/torch/utils/bottleneck/__main__.py b/torch/utils/bottleneck/__main__.py
index 7a620ec92b..951e42a892 100644
--- a/torch/utils/bottleneck/__main__.py
+++ b/torch/utils/bottleneck/__main__.py
@@ -9,67 +9,16 @@ import contextlib
 
 import torch
 from torch.autograd import profiler
-
-PY3 = sys.version_info >= (3, 0)
-
-
-def run(command):
-    """Returns (return-code, stdout, stderr)"""
-    p = subprocess.Popen(command, stdout=subprocess.PIPE,
-                         stderr=subprocess.PIPE, shell=True)
-    output, err = p.communicate()
-    rc = p.returncode
-    if PY3:
-        output = output.decode("ascii")
-        err = err.decode("ascii")
-    return (rc, output, err)
+from torch.utils.collect_env import get_env_info
 
 
 def redirect_argv(new_argv):
     sys.argv[:] = new_argv[:]
 
 
-def check_running_cuda_version():
-    (rc, out, err) = run('nvcc --version')
-    if rc is not 0:
-        return None
-    m = re.search(r'V(.*)$', out)
-    assert m is not None
-    return m.group(1)
-
-
-def check_pip_packages():
-    # People generally have `pip` as `pip` or `pip3`
-    def run_with_pip(pip):
-        rc, out, _ = run(pip + ' list --format=legacy | grep torch')
-        if rc is 0:
-            return out
-        return None
-
-    if not PY3:
-        return 'pip', run_with_pip('pip')
-
-    # Try to figure out if the user is running pip or pip3.
-    out2 = run_with_pip('pip')
-    out3 = run_with_pip('pip3')
-
-    num_pips = len([x for x in [out2, out3] if x is not None])
-    if num_pips is 0:
-        return 'pip', out2
-
-    if num_pips == 1:
-        if out2 is not None:
-            return 'pip', out2
-        return 'pip3', out3
-
-    # num_pips is 2. Return pip3 by default b/c that most likely
-    # is the one associated with Python 3
-    return 'pip3', out3
-
-
-def compiled_with_cuda():
-    if torch.version.cuda:
-        return 'compiled w/ CUDA {}'.format(torch.version.cuda)
+def compiled_with_cuda(sysinfo):
+    if sysinfo.cuda_compiled_version:
+        return 'compiled w/ CUDA {}'.format(sysinfo.cuda_compiled_version)
     return 'not compiled w/ CUDA'
 
 
@@ -87,28 +36,31 @@ Running with Python {py_version} and {cuda_runtime}
 
 def run_env_analysis():
     print('Running environment analysis...')
+    info = get_env_info()
+
     result = []
 
     debug_str = ''
-    if torch.version.debug:
+    if info.is_debug_build:
         debug_str = ' DEBUG'
 
     cuda_avail = ''
-    if torch.cuda.is_available():
-        cuda = check_running_cuda_version()
+    if info.is_cuda_available:
+        cuda = info.cuda_runtime_version
         if cuda is not None:
             cuda_avail = 'CUDA ' + cuda
     else:
         cuda = 'CUDA unavailable'
 
-    pip_version, pip_list_output = check_pip_packages()
+    pip_version = info.pip_version
+    pip_list_output = info.pip_packages
     if pip_list_output is None:
         pip_list_output = 'Unable to fetch'
 
     result = {
         'debug_str': debug_str,
-        'pytorch_version': torch.__version__,
-        'cuda_compiled': compiled_with_cuda(),
+        'pytorch_version': info.torch_version,
+        'cuda_compiled': compiled_with_cuda(info),
         'py_version': '{}.{}'.format(sys.version_info[0], sys.version_info[1]),
         'cuda_runtime': cuda_avail,
         'pip_version': pip_version,
diff --git a/torch/utils/collect_env.py b/torch/utils/collect_env.py
new file mode 100644
index 0000000000..3c98a96316
--- /dev/null
+++ b/torch/utils/collect_env.py
@@ -0,0 +1,327 @@
+# This script outputs relevant system environment info
+# Run it with `python collect_env.py`.
+import re
+import subprocess
+import sys
+import time
+import datetime
+import os
+from collections import namedtuple
+
+import torch
+
+PY3 = sys.version_info >= (3, 0)
+
+# System Environment Information
+SystemEnv = namedtuple('SystemEnv', [
+    'torch_version',
+    'is_debug_build',
+    'cuda_compiled_version',
+    'gcc_version',
+    'cmake_version',
+    'os',
+    'python_version',
+    'is_cuda_available',
+    'cuda_runtime_version',
+    'nvidia_driver_version',
+    'nvidia_gpu_models',
+    'cudnn_version',
+    'pip_version',  # 'pip' or 'pip3'
+    'pip_packages',
+    'conda_packages',
+])
+
+
+def run(command):
+    """Returns (return-code, stdout, stderr)"""
+    p = subprocess.Popen(command, stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE, shell=True)
+    output, err = p.communicate()
+    rc = p.returncode
+    if PY3:
+        output = output.decode("ascii")
+        err = err.decode("ascii")
+    return rc, output.strip(), err.strip()
+
+
+def run_and_read_all(run_lambda, command):
+    """Runs command using run_lambda; reads and returns entire output if rc is 0"""
+    rc, out, _ = run_lambda(command)
+    if rc is not 0:
+        return None
+    return out
+
+
+def run_and_parse_first_match(run_lambda, command, regex):
+    """Runs command using run_lambda, returns the first regex match if it exists"""
+    rc, out, _ = run_lambda(command)
+    if rc is not 0:
+        return None
+    match = re.search(regex, out)
+    if match is None:
+        return None
+    return match.group(1)
+
+
+def get_conda_packages(run_lambda):
+    out = run_and_read_all(run_lambda, 'conda list | grep "torch\|soumith"')
+    if out is None:
+        return out
+    # Comment starting at beginning of line
+    comment_regex = re.compile(r'^#.*\n')
+    return re.sub(comment_regex, '', out)
+
+
+def get_gcc_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
+
+
+def get_cmake_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')
+
+
+def get_nvidia_driver_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'nvidia-smi', r'Driver Version: (.*?) ')
+
+
+def get_gpu_info(run_lambda):
+    uuid_regex = re.compile(' \(UUID: .+?\)')
+    rc, out, _ = run_lambda('nvidia-smi -L')
+    if rc is not 0:
+        return None
+    # Anonymize GPUs by removing their UUID
+    return re.sub(uuid_regex, '', out)
+
+
+def get_running_cuda_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'nvcc --version', r'V(.*)$')
+
+
+def get_cudnn_version(run_lambda):
+    """This will return a list of libcudnn.so; it's hard to tell which one is being used"""
+    rc, out, _ = run_lambda('find /usr/local /usr/lib -type f -name "libcudnn*" 2> /dev/null')
+    # find will return 1 if there are permission errors or if not found
+    if len(out) == 0:
+        return None
+    if rc != 1 and rc != 0:
+        return None
+    # Alphabetize the result because the order is non-deterministic otherwise
+    result = '\n'.join(sorted(out.split('\n')))
+    return 'Probably one of the following:\n{}'.format(result)
+
+
+def get_platform():
+    if sys.platform.startswith('linux'):
+        return 'linux'
+    elif sys.platform.startswith('win32'):
+        return 'win32'
+    elif sys.platform.startswith('cygwin'):
+        return 'cygwin'
+    elif sys.platform.startswith('darwin'):
+        return 'darwin'
+    else:
+        return sys.platform
+
+
+def get_mac_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')
+
+
+def get_windows_version(run_lambda):
+    return run_and_read_all(run_lambda, 'wmic os get Caption | findstr /v Caption')
+
+
+def get_lsb_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')
+
+
+def check_release_file(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
+                                     r'PRETTY_NAME="(.*)"')
+
+
+def get_os(run_lambda):
+    platform = get_platform()
+
+    if platform is 'win32' or platform is 'cygwin':
+        return get_windows_version(run_lambda)
+
+    if platform == 'darwin':
+        version = get_mac_version(run_lambda)
+        if version is None:
+            return None
+        return 'Mac OSX {}'.format(version)
+
+    if platform == 'linux':
+        # Ubuntu/Debian based
+        desc = get_lsb_version(run_lambda)
+        if desc is not None:
+            return desc
+
+        # Try reading /etc/*-release
+        desc = check_release_file(run_lambda)
+        if desc is not None:
+            return desc
+
+        return platform
+
+    # Unknown platform
+    return platform
+
+
+def get_pip_packages(run_lambda):
+    # People generally have `pip` as `pip` or `pip3`
+    def run_with_pip(pip):
+        return run_and_read_all(run_lambda, pip + ' list --format=legacy | grep "torch\|numpy"')
+
+    if not PY3:
+        return 'pip', run_with_pip('pip')
+
+    # Try to figure out if the user is running pip or pip3.
+    out2 = run_with_pip('pip')
+    out3 = run_with_pip('pip3')
+
+    num_pips = len([x for x in [out2, out3] if x is not None])
+    if num_pips is 0:
+        return 'pip', out2
+
+    if num_pips == 1:
+        if out2 is not None:
+            return 'pip', out2
+        return 'pip3', out3
+
+    # num_pips is 2. Return pip3 by default b/c that most likely
+    # is the one associated with Python 3
+    return 'pip3', out3
+
+
+def get_env_info():
+    run_lambda = run
+    pip_version, pip_list_output = get_pip_packages(run_lambda)
+
+    return SystemEnv(
+        torch_version=torch.__version__,
+        is_debug_build=torch.version.debug,
+        python_version='{}.{}'.format(sys.version_info[0], sys.version_info[1]),
+        is_cuda_available=torch.cuda.is_available(),
+        cuda_compiled_version=torch.version.cuda,
+        cuda_runtime_version=get_running_cuda_version(run_lambda),
+        nvidia_gpu_models=get_gpu_info(run_lambda),
+        nvidia_driver_version=get_nvidia_driver_version(run_lambda),
+        cudnn_version=get_cudnn_version(run_lambda),
+        pip_version=pip_version,
+        pip_packages=pip_list_output,
+        conda_packages=get_conda_packages(run_lambda),
+        os=get_os(run_lambda),
+        gcc_version=get_gcc_version(run_lambda),
+        cmake_version=get_cmake_version(run_lambda),
+    )
+
+env_info_fmt = """
+PyTorch version: {torch_version}
+Is debug build: {is_debug_build}
+CUDA used to build PyTorch: {cuda_compiled_version}
+
+OS: {os}
+GCC version: {gcc_version}
+CMake version: {cmake_version}
+
+Python version: {python_version}
+Is CUDA available: {is_cuda_available}
+CUDA runtime version: {cuda_runtime_version}
+GPU models and configuration: {nvidia_gpu_models}
+Nvidia driver version: {nvidia_driver_version}
+cuDNN version: {cudnn_version}
+
+Versions of relevant libraries:
+{pip_packages}
+{conda_packages}
+""".strip()
+
+
+def pretty_str(envinfo):
+    def replace_nones(dct, replacement='Could not collect'):
+        for key in dct.keys():
+            if dct[key] is not None:
+                continue
+            dct[key] = replacement
+        return dct
+
+    def replace_bools(dct, true='Yes', false='No'):
+        for key in dct.keys():
+            if dct[key] is True:
+                dct[key] = true
+            elif dct[key] is False:
+                dct[key] = false
+        return dct
+
+    def prepend(text, tag='[prepend]'):
+        lines = text.split('\n')
+        updated_lines = [tag + line for line in lines]
+        return '\n'.join(updated_lines)
+
+    def replace_if_empty(text, replacement='No relevant packages'):
+        if text is not None and len(text) == 0:
+            return replacement
+        return text
+
+    def maybe_start_on_next_line(string):
+        # If `string` is multiline, prepend a \n to it.
+        if string is not None and len(string.split('\n')) > 1:
+            return '\n{}\n'.format(string)
+        return string
+
+    mutable_dict = envinfo._asdict()
+
+    # If nvidia_gpu_models is multiline, start on the next line
+    mutable_dict['nvidia_gpu_models'] = \
+        maybe_start_on_next_line(envinfo.nvidia_gpu_models)
+
+    # If the machine doesn't have CUDA, report some fields as 'No CUDA'
+    dynamic_cuda_fields = [
+        'cuda_runtime_version',
+        'nvidia_gpu_models',
+        'nvidia_driver_version',
+    ]
+    all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
+    all_dynamic_cuda_fields_missing = all(
+        mutable_dict[field] is None for field in dynamic_cuda_fields)
+    if not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
+        for field in all_cuda_fields:
+            mutable_dict[field] = 'No CUDA'
+        if envinfo.cuda_compiled_version is None:
+            mutable_dict['cuda_compiled_version'] = 'None'
+
+    # Replace True with Yes, False with No
+    mutable_dict = replace_bools(mutable_dict)
+
+    # Replace all None objects with 'Could not collect'
+    mutable_dict = replace_nones(mutable_dict)
+
+    # If either of these are '', replace with 'No relevant packages'
+    mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
+    mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])
+
+    # Tag conda and pip packages with a prefix
+    # If they were previously None, they'll show up as ie '[conda] Could not collect'
+    if mutable_dict['pip_packages']:
+        mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
+                                               '[{}] '.format(envinfo.pip_version))
+    if mutable_dict['conda_packages']:
+        mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
+                                                 '[conda] ')
+    return env_info_fmt.format(**mutable_dict)
+
+
+def get_pretty_env_info():
+    return pretty_str(get_env_info())
+
+
+def main():
+    print("Collecting environment information...")
+    output = get_pretty_env_info()
+    print(output)
+
+
+if __name__ == '__main__':
+    main()
author	Richard Zou <zou3519@users.noreply.github.com>	2018-04-22 15:18:14 -0400
committer	Edward Z. Yang <ezyang@mit.edu>	2018-04-22 15:18:14 -0400
commit	7a3c38ab595ea78f16935df788c4982a0ec56966 (patch)
tree	95fcfe023746c9063e59551df071658d23a03c74 /torch
parent	56567fe47dafcb77222c366cca0ac73f557bf4df (diff)
download	pytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.tar.gz pytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.tar.bz2 pytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.zip