diff options
author | Richard Zou <zou3519@users.noreply.github.com> | 2018-04-22 15:18:14 -0400 |
---|---|---|
committer | Edward Z. Yang <ezyang@mit.edu> | 2018-04-22 15:18:14 -0400 |
commit | 7a3c38ab595ea78f16935df788c4982a0ec56966 (patch) | |
tree | 95fcfe023746c9063e59551df071658d23a03c74 | |
parent | 56567fe47dafcb77222c366cca0ac73f557bf4df (diff) | |
download | pytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.tar.gz pytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.tar.bz2 pytorch-7a3c38ab595ea78f16935df788c4982a0ec56966.zip |
Add environment collection script (#6635)
* Add environment collection script
Fixes #6111. This should make it easier for users to report bugs by giving
them a script to collect system environment information.
Changes include:
- Refactor out the environment collecting code from utils.bottleneck
- Add script (collect_env.py)
- Cleaned up the issues template so that it suggests using the script
and is more readable.
Testing: added expect tests to go with 4 CI configurations. Whenever one
of these configurations gets updated, the test will fail until the test
also gets updated.
* Expect tests
* Update issue template
* Fix random space
* Minor improvement to issue template; fix expect test
* Skip expect test if BUILD_ENVIRONMENT not found; test fix; split off smoke/expect test
-rw-r--r-- | .github/ISSUE_TEMPLATE.md | 38 | ||||
-rw-r--r-- | test/expect/TestCollectEnv.test_pytorch_linux_trusty_py27.expect | 19 | ||||
-rw-r--r-- | test/expect/TestCollectEnv.test_pytorch_linux_xenial_cuda9_cudnn7_py3.expect | 25 | ||||
-rw-r--r-- | test/expect/TestCollectEnv.test_pytorch_macos_1013_py3.expect | 19 | ||||
-rw-r--r-- | test/expect/TestCollectEnv.test_pytorch_win_ws2016_cuda9_cudnn7_py3.expect | 18 | ||||
-rw-r--r-- | test/test_utils.py | 48 | ||||
-rw-r--r-- | torch/utils/bottleneck/__main__.py | 74 | ||||
-rw-r--r-- | torch/utils/collect_env.py | 327 |
8 files changed, 494 insertions, 74 deletions
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index d993127645..3447f75846 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,24 +1,38 @@ -PyTorch GitHub Issues Guidelines --------------------------------- - -We like to limit our issues to bug reports and feature requests. If you have a question or would like help and support, please visit our forums: https://discuss.pytorch.org/ +If you have a question or would like help and support, please ask at our +[forums](https://discuss.pytorch.org/). If you are submitting a feature request, please preface the title with [feature request]. +If you are submitting a bug report, please fill in the following details. + +## Issue description + +Provide a short description. + +## Code example + +Please try to provide a minimal example to repro the bug. +Error messages and stack traces are also helpful. + +## System Info +Please copy and paste the output from our +[environment collection script](https://github.com/pytorch/pytorch/tree/master/torch/utils/collect_env.py) +(or fill out the checklist below manually). + +You can get the script and run it with: +``` +wget https://github.com/pytorch/pytorch/tree/master/torch/utils/collect_env.py +# For security purposes, please check the contents of collect_env.py before running it. +python collect_env.py +``` -When submitting a bug report, please include the following information (where relevant): - PyTorch or Caffe2: +- How you installed PyTorch (conda, pip, source): +- Build command you used (if compiling from source): - OS: - PyTorch version: -- How you installed PyTorch (conda, pip, source): - Python version: - CUDA/cuDNN version: - GPU models and configuration: - GCC version (if compiling from source): - CMake version: -- Build command you used (if compiling from source): - Versions of any other relevant libraries: - -In addition, including the following information will also be very helpful for us to diagnose the problem: -- A script to reproduce the bug. Please try to provide as minimal of a test case as possible. -- Error messages and/or stack traces of the bug -- Context around what you are trying to do diff --git a/test/expect/TestCollectEnv.test_pytorch_linux_trusty_py27.expect b/test/expect/TestCollectEnv.test_pytorch_linux_trusty_py27.expect new file mode 100644 index 0000000000..8571b8c294 --- /dev/null +++ b/test/expect/TestCollectEnv.test_pytorch_linux_trusty_py27.expect @@ -0,0 +1,19 @@ +PyTorch version: 0.4.0a0 +Is debug build: No +CUDA used to build PyTorch: None + +OS: Ubuntu 14.04.5 LTS +GCC version: (Ubuntu 4.8.4-2ubuntu1~14.04.4) 4.8.4 +CMake version: version 3.5.1 + +Python version: 2.7 +Is CUDA available: No +CUDA runtime version: No CUDA +GPU models and configuration: No CUDA +Nvidia driver version: No CUDA +cuDNN version: No CUDA + +Versions of relevant libraries: +[pip] numpy (1.14.2) +[pip] torch (0.4.0a0) +[conda] Could not collect diff --git a/test/expect/TestCollectEnv.test_pytorch_linux_xenial_cuda9_cudnn7_py3.expect b/test/expect/TestCollectEnv.test_pytorch_linux_xenial_cuda9_cudnn7_py3.expect new file mode 100644 index 0000000000..95c6b7bf52 --- /dev/null +++ b/test/expect/TestCollectEnv.test_pytorch_linux_xenial_cuda9_cudnn7_py3.expect @@ -0,0 +1,25 @@ +PyTorch version: 0.4.0a0 +Is debug build: No +CUDA used to build PyTorch: 9.0.176 + +OS: Ubuntu 16.04.4 LTS +GCC version: (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609 +CMake version: version 3.9.4 + +Python version: 3.6 +Is CUDA available: Yes +CUDA runtime version: 9.0.176 +GPU models and configuration: +GPU 0: Tesla M60 +GPU 1: Tesla M60 + +Nvidia driver version: 384.111 +cuDNN version: Probably one of the following: +/usr/lib/x86_64-linux-gnu/libcudnn.so.7.1.2 +/usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a + +Versions of relevant libraries: +[pip] numpy (1.14.2) +[pip] torch (0.4.0a0) +[conda] magma-cuda90 2.3.0 1 soumith +[conda] torch 0.4.0a0 <pip> diff --git a/test/expect/TestCollectEnv.test_pytorch_macos_1013_py3.expect b/test/expect/TestCollectEnv.test_pytorch_macos_1013_py3.expect new file mode 100644 index 0000000000..51d96b9376 --- /dev/null +++ b/test/expect/TestCollectEnv.test_pytorch_macos_1013_py3.expect @@ -0,0 +1,19 @@ +PyTorch version: 0.4.0a0 +Is debug build: No +CUDA used to build PyTorch: None + +OS: Mac OSX 10.13.3 +GCC version: Could not collect +CMake version: version 3.9.4 + +Python version: 3.6 +Is CUDA available: No +CUDA runtime version: No CUDA +GPU models and configuration: No CUDA +Nvidia driver version: No CUDA +cuDNN version: No CUDA + +Versions of relevant libraries: +[pip] numpy (1.14.2) +[pip] torch (0.4.0a0) +[conda] torch 0.4.0a0 <pip> diff --git a/test/expect/TestCollectEnv.test_pytorch_win_ws2016_cuda9_cudnn7_py3.expect b/test/expect/TestCollectEnv.test_pytorch_win_ws2016_cuda9_cudnn7_py3.expect new file mode 100644 index 0000000000..48130595f8 --- /dev/null +++ b/test/expect/TestCollectEnv.test_pytorch_win_ws2016_cuda9_cudnn7_py3.expect @@ -0,0 +1,18 @@ +PyTorch version: 0.4.0a0 +Is debug build: No +CUDA used to build PyTorch: 9.0 + +OS: Microsoft Windows Server 2012 R2 Standard +GCC version: Could not collect +CMake version: version 3.10.2 + +Python version: 3.6 +Is CUDA available: Yes +CUDA runtime version: 9.0.176 +GPU models and configuration: Could not collect +Nvidia driver version: Could not collect +cuDNN version: Could not collect + +Versions of relevant libraries: +[pip] numpy (1.14.2) +[conda] Could not collect diff --git a/test/test_utils.py b/test/test_utils.py index 5ede8a92d3..c18cbc53c2 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -515,7 +515,6 @@ class TestBottleneck(TestCase): return (rc, output, err) def _run_bottleneck(self, test_file, scriptargs=''): - import os curdir = os.path.dirname(os.path.abspath(__file__)) filepath = '{}/{}'.format(curdir, test_file) if scriptargs != '': @@ -596,6 +595,53 @@ class TestBottleneck(TestCase): self._check_cuda(out) +from torch.utils.collect_env import get_pretty_env_info + + +class TestCollectEnv(TestCase): + + def _build_env_to_expect(self, build_env): + return 'expect/TestCollectEnv.test_{}.expect'.format( + build_env.replace('.', '').replace('-', '_')) + + def _preprocess_info_for_test(self, info_output): + # Remove the version hash + version_hash_regex = re.compile(r'(a\d+)\+.......') + return re.sub(version_hash_regex, r'\1', info_output).strip() + + def assertExpectedOutput(self, info_output, build_env): + processed_info = self._preprocess_info_for_test(info_output) + expect_filename = self._build_env_to_expect(build_env) + + ci_warning = ('This test will error out if the CI config was recently ' + 'updated. If this is the case, please update the expect ' + 'files to match the CI machines\' system config.') + + with open(expect_filename, 'r') as f: + expected_info = f.read().strip() + self.assertEqual(processed_info, expected_info, ci_warning) + + def test_smoke(self): + info_output = get_pretty_env_info() + self.assertTrue(info_output.count('\n') >= 17) + + @unittest.skipIf('BUILD_ENVIRONMENT' not in os.environ.keys(), 'CI-only test') + def test_expect(self): + info_output = get_pretty_env_info() + + ci_build_envs = [ + 'pytorch-linux-trusty-py2.7', + 'pytorch-linux-xenial-cuda9-cudnn7-py3', + 'pytorch-macos-10.13-py3', + 'pytorch-win-ws2016-cuda9-cudnn7-py3' + ] + build_env = os.environ['BUILD_ENVIRONMENT'] + if build_env not in ci_build_envs: + return + + self.assertExpectedOutput(info_output, build_env) + + class TestONNXUtils(TestCase): def test_prepare_onnx_paddings(self): sizes = [2, 3, 4] diff --git a/torch/utils/bottleneck/__main__.py b/torch/utils/bottleneck/__main__.py index 7a620ec92b..951e42a892 100644 --- a/torch/utils/bottleneck/__main__.py +++ b/torch/utils/bottleneck/__main__.py @@ -9,67 +9,16 @@ import contextlib import torch from torch.autograd import profiler - -PY3 = sys.version_info >= (3, 0) - - -def run(command): - """Returns (return-code, stdout, stderr)""" - p = subprocess.Popen(command, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, shell=True) - output, err = p.communicate() - rc = p.returncode - if PY3: - output = output.decode("ascii") - err = err.decode("ascii") - return (rc, output, err) +from torch.utils.collect_env import get_env_info def redirect_argv(new_argv): sys.argv[:] = new_argv[:] -def check_running_cuda_version(): - (rc, out, err) = run('nvcc --version') - if rc is not 0: - return None - m = re.search(r'V(.*)$', out) - assert m is not None - return m.group(1) - - -def check_pip_packages(): - # People generally have `pip` as `pip` or `pip3` - def run_with_pip(pip): - rc, out, _ = run(pip + ' list --format=legacy | grep torch') - if rc is 0: - return out - return None - - if not PY3: - return 'pip', run_with_pip('pip') - - # Try to figure out if the user is running pip or pip3. - out2 = run_with_pip('pip') - out3 = run_with_pip('pip3') - - num_pips = len([x for x in [out2, out3] if x is not None]) - if num_pips is 0: - return 'pip', out2 - - if num_pips == 1: - if out2 is not None: - return 'pip', out2 - return 'pip3', out3 - - # num_pips is 2. Return pip3 by default b/c that most likely - # is the one associated with Python 3 - return 'pip3', out3 - - -def compiled_with_cuda(): - if torch.version.cuda: - return 'compiled w/ CUDA {}'.format(torch.version.cuda) +def compiled_with_cuda(sysinfo): + if sysinfo.cuda_compiled_version: + return 'compiled w/ CUDA {}'.format(sysinfo.cuda_compiled_version) return 'not compiled w/ CUDA' @@ -87,28 +36,31 @@ Running with Python {py_version} and {cuda_runtime} def run_env_analysis(): print('Running environment analysis...') + info = get_env_info() + result = [] debug_str = '' - if torch.version.debug: + if info.is_debug_build: debug_str = ' DEBUG' cuda_avail = '' - if torch.cuda.is_available(): - cuda = check_running_cuda_version() + if info.is_cuda_available: + cuda = info.cuda_runtime_version if cuda is not None: cuda_avail = 'CUDA ' + cuda else: cuda = 'CUDA unavailable' - pip_version, pip_list_output = check_pip_packages() + pip_version = info.pip_version + pip_list_output = info.pip_packages if pip_list_output is None: pip_list_output = 'Unable to fetch' result = { 'debug_str': debug_str, - 'pytorch_version': torch.__version__, - 'cuda_compiled': compiled_with_cuda(), + 'pytorch_version': info.torch_version, + 'cuda_compiled': compiled_with_cuda(info), 'py_version': '{}.{}'.format(sys.version_info[0], sys.version_info[1]), 'cuda_runtime': cuda_avail, 'pip_version': pip_version, diff --git a/torch/utils/collect_env.py b/torch/utils/collect_env.py new file mode 100644 index 0000000000..3c98a96316 --- /dev/null +++ b/torch/utils/collect_env.py @@ -0,0 +1,327 @@ +# This script outputs relevant system environment info +# Run it with `python collect_env.py`. +import re +import subprocess +import sys +import time +import datetime +import os +from collections import namedtuple + +import torch + +PY3 = sys.version_info >= (3, 0) + +# System Environment Information +SystemEnv = namedtuple('SystemEnv', [ + 'torch_version', + 'is_debug_build', + 'cuda_compiled_version', + 'gcc_version', + 'cmake_version', + 'os', + 'python_version', + 'is_cuda_available', + 'cuda_runtime_version', + 'nvidia_driver_version', + 'nvidia_gpu_models', + 'cudnn_version', + 'pip_version', # 'pip' or 'pip3' + 'pip_packages', + 'conda_packages', +]) + + +def run(command): + """Returns (return-code, stdout, stderr)""" + p = subprocess.Popen(command, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, shell=True) + output, err = p.communicate() + rc = p.returncode + if PY3: + output = output.decode("ascii") + err = err.decode("ascii") + return rc, output.strip(), err.strip() + + +def run_and_read_all(run_lambda, command): + """Runs command using run_lambda; reads and returns entire output if rc is 0""" + rc, out, _ = run_lambda(command) + if rc is not 0: + return None + return out + + +def run_and_parse_first_match(run_lambda, command, regex): + """Runs command using run_lambda, returns the first regex match if it exists""" + rc, out, _ = run_lambda(command) + if rc is not 0: + return None + match = re.search(regex, out) + if match is None: + return None + return match.group(1) + + +def get_conda_packages(run_lambda): + out = run_and_read_all(run_lambda, 'conda list | grep "torch\|soumith"') + if out is None: + return out + # Comment starting at beginning of line + comment_regex = re.compile(r'^#.*\n') + return re.sub(comment_regex, '', out) + + +def get_gcc_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)') + + +def get_cmake_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)') + + +def get_nvidia_driver_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'nvidia-smi', r'Driver Version: (.*?) ') + + +def get_gpu_info(run_lambda): + uuid_regex = re.compile(' \(UUID: .+?\)') + rc, out, _ = run_lambda('nvidia-smi -L') + if rc is not 0: + return None + # Anonymize GPUs by removing their UUID + return re.sub(uuid_regex, '', out) + + +def get_running_cuda_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'nvcc --version', r'V(.*)$') + + +def get_cudnn_version(run_lambda): + """This will return a list of libcudnn.so; it's hard to tell which one is being used""" + rc, out, _ = run_lambda('find /usr/local /usr/lib -type f -name "libcudnn*" 2> /dev/null') + # find will return 1 if there are permission errors or if not found + if len(out) == 0: + return None + if rc != 1 and rc != 0: + return None + # Alphabetize the result because the order is non-deterministic otherwise + result = '\n'.join(sorted(out.split('\n'))) + return 'Probably one of the following:\n{}'.format(result) + + +def get_platform(): + if sys.platform.startswith('linux'): + return 'linux' + elif sys.platform.startswith('win32'): + return 'win32' + elif sys.platform.startswith('cygwin'): + return 'cygwin' + elif sys.platform.startswith('darwin'): + return 'darwin' + else: + return sys.platform + + +def get_mac_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)') + + +def get_windows_version(run_lambda): + return run_and_read_all(run_lambda, 'wmic os get Caption | findstr /v Caption') + + +def get_lsb_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)') + + +def check_release_file(run_lambda): + return run_and_parse_first_match(run_lambda, 'cat /etc/*-release', + r'PRETTY_NAME="(.*)"') + + +def get_os(run_lambda): + platform = get_platform() + + if platform is 'win32' or platform is 'cygwin': + return get_windows_version(run_lambda) + + if platform == 'darwin': + version = get_mac_version(run_lambda) + if version is None: + return None + return 'Mac OSX {}'.format(version) + + if platform == 'linux': + # Ubuntu/Debian based + desc = get_lsb_version(run_lambda) + if desc is not None: + return desc + + # Try reading /etc/*-release + desc = check_release_file(run_lambda) + if desc is not None: + return desc + + return platform + + # Unknown platform + return platform + + +def get_pip_packages(run_lambda): + # People generally have `pip` as `pip` or `pip3` + def run_with_pip(pip): + return run_and_read_all(run_lambda, pip + ' list --format=legacy | grep "torch\|numpy"') + + if not PY3: + return 'pip', run_with_pip('pip') + + # Try to figure out if the user is running pip or pip3. + out2 = run_with_pip('pip') + out3 = run_with_pip('pip3') + + num_pips = len([x for x in [out2, out3] if x is not None]) + if num_pips is 0: + return 'pip', out2 + + if num_pips == 1: + if out2 is not None: + return 'pip', out2 + return 'pip3', out3 + + # num_pips is 2. Return pip3 by default b/c that most likely + # is the one associated with Python 3 + return 'pip3', out3 + + +def get_env_info(): + run_lambda = run + pip_version, pip_list_output = get_pip_packages(run_lambda) + + return SystemEnv( + torch_version=torch.__version__, + is_debug_build=torch.version.debug, + python_version='{}.{}'.format(sys.version_info[0], sys.version_info[1]), + is_cuda_available=torch.cuda.is_available(), + cuda_compiled_version=torch.version.cuda, + cuda_runtime_version=get_running_cuda_version(run_lambda), + nvidia_gpu_models=get_gpu_info(run_lambda), + nvidia_driver_version=get_nvidia_driver_version(run_lambda), + cudnn_version=get_cudnn_version(run_lambda), + pip_version=pip_version, + pip_packages=pip_list_output, + conda_packages=get_conda_packages(run_lambda), + os=get_os(run_lambda), + gcc_version=get_gcc_version(run_lambda), + cmake_version=get_cmake_version(run_lambda), + ) + +env_info_fmt = """ +PyTorch version: {torch_version} +Is debug build: {is_debug_build} +CUDA used to build PyTorch: {cuda_compiled_version} + +OS: {os} +GCC version: {gcc_version} +CMake version: {cmake_version} + +Python version: {python_version} +Is CUDA available: {is_cuda_available} +CUDA runtime version: {cuda_runtime_version} +GPU models and configuration: {nvidia_gpu_models} +Nvidia driver version: {nvidia_driver_version} +cuDNN version: {cudnn_version} + +Versions of relevant libraries: +{pip_packages} +{conda_packages} +""".strip() + + +def pretty_str(envinfo): + def replace_nones(dct, replacement='Could not collect'): + for key in dct.keys(): + if dct[key] is not None: + continue + dct[key] = replacement + return dct + + def replace_bools(dct, true='Yes', false='No'): + for key in dct.keys(): + if dct[key] is True: + dct[key] = true + elif dct[key] is False: + dct[key] = false + return dct + + def prepend(text, tag='[prepend]'): + lines = text.split('\n') + updated_lines = [tag + line for line in lines] + return '\n'.join(updated_lines) + + def replace_if_empty(text, replacement='No relevant packages'): + if text is not None and len(text) == 0: + return replacement + return text + + def maybe_start_on_next_line(string): + # If `string` is multiline, prepend a \n to it. + if string is not None and len(string.split('\n')) > 1: + return '\n{}\n'.format(string) + return string + + mutable_dict = envinfo._asdict() + + # If nvidia_gpu_models is multiline, start on the next line + mutable_dict['nvidia_gpu_models'] = \ + maybe_start_on_next_line(envinfo.nvidia_gpu_models) + + # If the machine doesn't have CUDA, report some fields as 'No CUDA' + dynamic_cuda_fields = [ + 'cuda_runtime_version', + 'nvidia_gpu_models', + 'nvidia_driver_version', + ] + all_cuda_fields = dynamic_cuda_fields + ['cudnn_version'] + all_dynamic_cuda_fields_missing = all( + mutable_dict[field] is None for field in dynamic_cuda_fields) + if not torch.cuda.is_available() and all_dynamic_cuda_fields_missing: + for field in all_cuda_fields: + mutable_dict[field] = 'No CUDA' + if envinfo.cuda_compiled_version is None: + mutable_dict['cuda_compiled_version'] = 'None' + + # Replace True with Yes, False with No + mutable_dict = replace_bools(mutable_dict) + + # Replace all None objects with 'Could not collect' + mutable_dict = replace_nones(mutable_dict) + + # If either of these are '', replace with 'No relevant packages' + mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages']) + mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages']) + + # Tag conda and pip packages with a prefix + # If they were previously None, they'll show up as ie '[conda] Could not collect' + if mutable_dict['pip_packages']: + mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'], + '[{}] '.format(envinfo.pip_version)) + if mutable_dict['conda_packages']: + mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'], + '[conda] ') + return env_info_fmt.format(**mutable_dict) + + +def get_pretty_env_info(): + return pretty_str(get_env_info()) + + +def main(): + print("Collecting environment information...") + output = get_pretty_env_info() + print(output) + + +if __name__ == '__main__': + main() |