summaryrefslogtreecommitdiff
path: root/torch
diff options
context:
space:
mode:
authorRichard Zou <zou3519@users.noreply.github.com>2018-04-22 15:18:14 -0400
committerEdward Z. Yang <ezyang@mit.edu>2018-04-22 15:18:14 -0400
commit7a3c38ab595ea78f16935df788c4982a0ec56966 (patch)
tree95fcfe023746c9063e59551df071658d23a03c74 /torch
parent56567fe47dafcb77222c366cca0ac73f557bf4df (diff)
downloadpytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.tar.gz
pytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.tar.bz2
pytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.zip
Add environment collection script (#6635)
* Add environment collection script Fixes #6111. This should make it easier for users to report bugs by giving them a script to collect system environment information. Changes include: - Refactor out the environment collecting code from utils.bottleneck - Add script (collect_env.py) - Cleaned up the issues template so that it suggests using the script and is more readable. Testing: added expect tests to go with 4 CI configurations. Whenever one of these configurations gets updated, the test will fail until the test also gets updated. * Expect tests * Update issue template * Fix random space * Minor improvement to issue template; fix expect test * Skip expect test if BUILD_ENVIRONMENT not found; test fix; split off smoke/expect test
Diffstat (limited to 'torch')
-rw-r--r--torch/utils/bottleneck/__main__.py74
-rw-r--r--torch/utils/collect_env.py327
2 files changed, 340 insertions, 61 deletions
diff --git a/torch/utils/bottleneck/__main__.py b/torch/utils/bottleneck/__main__.py
index 7a620ec92b..951e42a892 100644
--- a/torch/utils/bottleneck/__main__.py
+++ b/torch/utils/bottleneck/__main__.py
@@ -9,67 +9,16 @@ import contextlib
import torch
from torch.autograd import profiler
-
-PY3 = sys.version_info >= (3, 0)
-
-
-def run(command):
- """Returns (return-code, stdout, stderr)"""
- p = subprocess.Popen(command, stdout=subprocess.PIPE,
- stderr=subprocess.PIPE, shell=True)
- output, err = p.communicate()
- rc = p.returncode
- if PY3:
- output = output.decode("ascii")
- err = err.decode("ascii")
- return (rc, output, err)
+from torch.utils.collect_env import get_env_info
def redirect_argv(new_argv):
sys.argv[:] = new_argv[:]
-def check_running_cuda_version():
- (rc, out, err) = run('nvcc --version')
- if rc is not 0:
- return None
- m = re.search(r'V(.*)$', out)
- assert m is not None
- return m.group(1)
-
-
-def check_pip_packages():
- # People generally have `pip` as `pip` or `pip3`
- def run_with_pip(pip):
- rc, out, _ = run(pip + ' list --format=legacy | grep torch')
- if rc is 0:
- return out
- return None
-
- if not PY3:
- return 'pip', run_with_pip('pip')
-
- # Try to figure out if the user is running pip or pip3.
- out2 = run_with_pip('pip')
- out3 = run_with_pip('pip3')
-
- num_pips = len([x for x in [out2, out3] if x is not None])
- if num_pips is 0:
- return 'pip', out2
-
- if num_pips == 1:
- if out2 is not None:
- return 'pip', out2
- return 'pip3', out3
-
- # num_pips is 2. Return pip3 by default b/c that most likely
- # is the one associated with Python 3
- return 'pip3', out3
-
-
-def compiled_with_cuda():
- if torch.version.cuda:
- return 'compiled w/ CUDA {}'.format(torch.version.cuda)
+def compiled_with_cuda(sysinfo):
+ if sysinfo.cuda_compiled_version:
+ return 'compiled w/ CUDA {}'.format(sysinfo.cuda_compiled_version)
return 'not compiled w/ CUDA'
@@ -87,28 +36,31 @@ Running with Python {py_version} and {cuda_runtime}
def run_env_analysis():
print('Running environment analysis...')
+ info = get_env_info()
+
result = []
debug_str = ''
- if torch.version.debug:
+ if info.is_debug_build:
debug_str = ' DEBUG'
cuda_avail = ''
- if torch.cuda.is_available():
- cuda = check_running_cuda_version()
+ if info.is_cuda_available:
+ cuda = info.cuda_runtime_version
if cuda is not None:
cuda_avail = 'CUDA ' + cuda
else:
cuda = 'CUDA unavailable'
- pip_version, pip_list_output = check_pip_packages()
+ pip_version = info.pip_version
+ pip_list_output = info.pip_packages
if pip_list_output is None:
pip_list_output = 'Unable to fetch'
result = {
'debug_str': debug_str,
- 'pytorch_version': torch.__version__,
- 'cuda_compiled': compiled_with_cuda(),
+ 'pytorch_version': info.torch_version,
+ 'cuda_compiled': compiled_with_cuda(info),
'py_version': '{}.{}'.format(sys.version_info[0], sys.version_info[1]),
'cuda_runtime': cuda_avail,
'pip_version': pip_version,
diff --git a/torch/utils/collect_env.py b/torch/utils/collect_env.py
new file mode 100644
index 0000000000..3c98a96316
--- /dev/null
+++ b/torch/utils/collect_env.py
@@ -0,0 +1,327 @@
+# This script outputs relevant system environment info
+# Run it with `python collect_env.py`.
+import re
+import subprocess
+import sys
+import time
+import datetime
+import os
+from collections import namedtuple
+
+import torch
+
+PY3 = sys.version_info >= (3, 0)
+
+# System Environment Information
+SystemEnv = namedtuple('SystemEnv', [
+ 'torch_version',
+ 'is_debug_build',
+ 'cuda_compiled_version',
+ 'gcc_version',
+ 'cmake_version',
+ 'os',
+ 'python_version',
+ 'is_cuda_available',
+ 'cuda_runtime_version',
+ 'nvidia_driver_version',
+ 'nvidia_gpu_models',
+ 'cudnn_version',
+ 'pip_version', # 'pip' or 'pip3'
+ 'pip_packages',
+ 'conda_packages',
+])
+
+
+def run(command):
+ """Returns (return-code, stdout, stderr)"""
+ p = subprocess.Popen(command, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, shell=True)
+ output, err = p.communicate()
+ rc = p.returncode
+ if PY3:
+ output = output.decode("ascii")
+ err = err.decode("ascii")
+ return rc, output.strip(), err.strip()
+
+
+def run_and_read_all(run_lambda, command):
+ """Runs command using run_lambda; reads and returns entire output if rc is 0"""
+ rc, out, _ = run_lambda(command)
+ if rc is not 0:
+ return None
+ return out
+
+
+def run_and_parse_first_match(run_lambda, command, regex):
+ """Runs command using run_lambda, returns the first regex match if it exists"""
+ rc, out, _ = run_lambda(command)
+ if rc is not 0:
+ return None
+ match = re.search(regex, out)
+ if match is None:
+ return None
+ return match.group(1)
+
+
+def get_conda_packages(run_lambda):
+ out = run_and_read_all(run_lambda, 'conda list | grep "torch\|soumith"')
+ if out is None:
+ return out
+ # Comment starting at beginning of line
+ comment_regex = re.compile(r'^#.*\n')
+ return re.sub(comment_regex, '', out)
+
+
+def get_gcc_version(run_lambda):
+ return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
+
+
+def get_cmake_version(run_lambda):
+ return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')
+
+
+def get_nvidia_driver_version(run_lambda):
+ return run_and_parse_first_match(run_lambda, 'nvidia-smi', r'Driver Version: (.*?) ')
+
+
+def get_gpu_info(run_lambda):
+ uuid_regex = re.compile(' \(UUID: .+?\)')
+ rc, out, _ = run_lambda('nvidia-smi -L')
+ if rc is not 0:
+ return None
+ # Anonymize GPUs by removing their UUID
+ return re.sub(uuid_regex, '', out)
+
+
+def get_running_cuda_version(run_lambda):
+ return run_and_parse_first_match(run_lambda, 'nvcc --version', r'V(.*)$')
+
+
+def get_cudnn_version(run_lambda):
+ """This will return a list of libcudnn.so; it's hard to tell which one is being used"""
+ rc, out, _ = run_lambda('find /usr/local /usr/lib -type f -name "libcudnn*" 2> /dev/null')
+ # find will return 1 if there are permission errors or if not found
+ if len(out) == 0:
+ return None
+ if rc != 1 and rc != 0:
+ return None
+ # Alphabetize the result because the order is non-deterministic otherwise
+ result = '\n'.join(sorted(out.split('\n')))
+ return 'Probably one of the following:\n{}'.format(result)
+
+
+def get_platform():
+ if sys.platform.startswith('linux'):
+ return 'linux'
+ elif sys.platform.startswith('win32'):
+ return 'win32'
+ elif sys.platform.startswith('cygwin'):
+ return 'cygwin'
+ elif sys.platform.startswith('darwin'):
+ return 'darwin'
+ else:
+ return sys.platform
+
+
+def get_mac_version(run_lambda):
+ return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')
+
+
+def get_windows_version(run_lambda):
+ return run_and_read_all(run_lambda, 'wmic os get Caption | findstr /v Caption')
+
+
+def get_lsb_version(run_lambda):
+ return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')
+
+
+def check_release_file(run_lambda):
+ return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
+ r'PRETTY_NAME="(.*)"')
+
+
+def get_os(run_lambda):
+ platform = get_platform()
+
+ if platform is 'win32' or platform is 'cygwin':
+ return get_windows_version(run_lambda)
+
+ if platform == 'darwin':
+ version = get_mac_version(run_lambda)
+ if version is None:
+ return None
+ return 'Mac OSX {}'.format(version)
+
+ if platform == 'linux':
+ # Ubuntu/Debian based
+ desc = get_lsb_version(run_lambda)
+ if desc is not None:
+ return desc
+
+ # Try reading /etc/*-release
+ desc = check_release_file(run_lambda)
+ if desc is not None:
+ return desc
+
+ return platform
+
+ # Unknown platform
+ return platform
+
+
+def get_pip_packages(run_lambda):
+ # People generally have `pip` as `pip` or `pip3`
+ def run_with_pip(pip):
+ return run_and_read_all(run_lambda, pip + ' list --format=legacy | grep "torch\|numpy"')
+
+ if not PY3:
+ return 'pip', run_with_pip('pip')
+
+ # Try to figure out if the user is running pip or pip3.
+ out2 = run_with_pip('pip')
+ out3 = run_with_pip('pip3')
+
+ num_pips = len([x for x in [out2, out3] if x is not None])
+ if num_pips is 0:
+ return 'pip', out2
+
+ if num_pips == 1:
+ if out2 is not None:
+ return 'pip', out2
+ return 'pip3', out3
+
+ # num_pips is 2. Return pip3 by default b/c that most likely
+ # is the one associated with Python 3
+ return 'pip3', out3
+
+
+def get_env_info():
+ run_lambda = run
+ pip_version, pip_list_output = get_pip_packages(run_lambda)
+
+ return SystemEnv(
+ torch_version=torch.__version__,
+ is_debug_build=torch.version.debug,
+ python_version='{}.{}'.format(sys.version_info[0], sys.version_info[1]),
+ is_cuda_available=torch.cuda.is_available(),
+ cuda_compiled_version=torch.version.cuda,
+ cuda_runtime_version=get_running_cuda_version(run_lambda),
+ nvidia_gpu_models=get_gpu_info(run_lambda),
+ nvidia_driver_version=get_nvidia_driver_version(run_lambda),
+ cudnn_version=get_cudnn_version(run_lambda),
+ pip_version=pip_version,
+ pip_packages=pip_list_output,
+ conda_packages=get_conda_packages(run_lambda),
+ os=get_os(run_lambda),
+ gcc_version=get_gcc_version(run_lambda),
+ cmake_version=get_cmake_version(run_lambda),
+ )
+
+env_info_fmt = """
+PyTorch version: {torch_version}
+Is debug build: {is_debug_build}
+CUDA used to build PyTorch: {cuda_compiled_version}
+
+OS: {os}
+GCC version: {gcc_version}
+CMake version: {cmake_version}
+
+Python version: {python_version}
+Is CUDA available: {is_cuda_available}
+CUDA runtime version: {cuda_runtime_version}
+GPU models and configuration: {nvidia_gpu_models}
+Nvidia driver version: {nvidia_driver_version}
+cuDNN version: {cudnn_version}
+
+Versions of relevant libraries:
+{pip_packages}
+{conda_packages}
+""".strip()
+
+
+def pretty_str(envinfo):
+ def replace_nones(dct, replacement='Could not collect'):
+ for key in dct.keys():
+ if dct[key] is not None:
+ continue
+ dct[key] = replacement
+ return dct
+
+ def replace_bools(dct, true='Yes', false='No'):
+ for key in dct.keys():
+ if dct[key] is True:
+ dct[key] = true
+ elif dct[key] is False:
+ dct[key] = false
+ return dct
+
+ def prepend(text, tag='[prepend]'):
+ lines = text.split('\n')
+ updated_lines = [tag + line for line in lines]
+ return '\n'.join(updated_lines)
+
+ def replace_if_empty(text, replacement='No relevant packages'):
+ if text is not None and len(text) == 0:
+ return replacement
+ return text
+
+ def maybe_start_on_next_line(string):
+ # If `string` is multiline, prepend a \n to it.
+ if string is not None and len(string.split('\n')) > 1:
+ return '\n{}\n'.format(string)
+ return string
+
+ mutable_dict = envinfo._asdict()
+
+ # If nvidia_gpu_models is multiline, start on the next line
+ mutable_dict['nvidia_gpu_models'] = \
+ maybe_start_on_next_line(envinfo.nvidia_gpu_models)
+
+ # If the machine doesn't have CUDA, report some fields as 'No CUDA'
+ dynamic_cuda_fields = [
+ 'cuda_runtime_version',
+ 'nvidia_gpu_models',
+ 'nvidia_driver_version',
+ ]
+ all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
+ all_dynamic_cuda_fields_missing = all(
+ mutable_dict[field] is None for field in dynamic_cuda_fields)
+ if not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
+ for field in all_cuda_fields:
+ mutable_dict[field] = 'No CUDA'
+ if envinfo.cuda_compiled_version is None:
+ mutable_dict['cuda_compiled_version'] = 'None'
+
+ # Replace True with Yes, False with No
+ mutable_dict = replace_bools(mutable_dict)
+
+ # Replace all None objects with 'Could not collect'
+ mutable_dict = replace_nones(mutable_dict)
+
+ # If either of these are '', replace with 'No relevant packages'
+ mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
+ mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])
+
+ # Tag conda and pip packages with a prefix
+ # If they were previously None, they'll show up as ie '[conda] Could not collect'
+ if mutable_dict['pip_packages']:
+ mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
+ '[{}] '.format(envinfo.pip_version))
+ if mutable_dict['conda_packages']:
+ mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
+ '[conda] ')
+ return env_info_fmt.format(**mutable_dict)
+
+
+def get_pretty_env_info():
+ return pretty_str(get_env_info())
+
+
+def main():
+ print("Collecting environment information...")
+ output = get_pretty_env_info()
+ print(output)
+
+
+if __name__ == '__main__':
+ main()