1
0

env.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731
  1. # ruff: noqa
  2. # code borrowed from https://github.com/pytorch/pytorch/blob/main/torch/utils/collect_env.py
  3. # and https://github.com/vllm-project/vllm/blob/e14fb22e59a1a9aa745b2a72211973838f6a5993/collect_env.py
  4. # Unlike the rest of the PyTorch this file must be python2 compliant.
  5. # This script outputs relevant system environment info
  6. # Run it with `python collect_env.py` or `python -m torch.utils.collect_env`
  7. import datetime
  8. import locale
  9. import os
  10. import re
  11. import subprocess
  12. import sys
  13. from collections import namedtuple
  14. try:
  15. import torch
  16. TORCH_AVAILABLE = True
  17. except (ImportError, NameError, AttributeError, OSError):
  18. TORCH_AVAILABLE = False
  19. # System Environment Information
  20. SystemEnv = namedtuple(
  21. 'SystemEnv',
  22. [
  23. 'torch_version',
  24. 'is_debug_build',
  25. 'cuda_compiled_version',
  26. 'gcc_version',
  27. 'clang_version',
  28. 'cmake_version',
  29. 'os',
  30. 'libc_version',
  31. 'python_version',
  32. 'python_platform',
  33. 'is_cuda_available',
  34. 'cuda_runtime_version',
  35. 'cuda_module_loading',
  36. 'nvidia_driver_version',
  37. 'nvidia_gpu_models',
  38. 'cudnn_version',
  39. 'pip_version', # 'pip' or 'pip3'
  40. 'pip_packages',
  41. 'conda_packages',
  42. 'hip_compiled_version',
  43. 'hip_runtime_version',
  44. 'miopen_runtime_version',
  45. 'caching_allocator_config',
  46. 'is_xnnpack_available',
  47. 'cpu_info',
  48. 'rocm_version', # aphrodite specific field
  49. 'neuron_sdk_version', # aphrodite specific field
  50. 'aphrodite_version', # aphrodite specific field
  51. 'aphrodite_build_flags', # aphrodite specific field
  52. 'gpu_topo', # aphrodite specific field
  53. ])
  54. DEFAULT_CONDA_PATTERNS = {
  55. "torch",
  56. "numpy",
  57. "cudatoolkit",
  58. "soumith",
  59. "mkl",
  60. "magma",
  61. "triton",
  62. "optree",
  63. "nccl",
  64. "transformers",
  65. "zmq",
  66. }
  67. DEFAULT_PIP_PATTERNS = {
  68. "torch",
  69. "numpy",
  70. "mypy",
  71. "flake8",
  72. "triton",
  73. "optree",
  74. "onnx",
  75. "nccl",
  76. "transformers",
  77. "zmq",
  78. }
  79. def run(command):
  80. """Return (return-code, stdout, stderr)."""
  81. shell = True if type(command) is str else False
  82. p = subprocess.Popen(command,
  83. stdout=subprocess.PIPE,
  84. stderr=subprocess.PIPE,
  85. shell=shell)
  86. raw_output, raw_err = p.communicate()
  87. rc = p.returncode
  88. if get_platform() == 'win32':
  89. enc = 'oem'
  90. else:
  91. enc = locale.getpreferredencoding()
  92. output = raw_output.decode(enc)
  93. err = raw_err.decode(enc)
  94. return rc, output.strip(), err.strip()
  95. def run_and_read_all(run_lambda, command):
  96. """Run command using run_lambda; reads and returns entire output if rc is 0."""
  97. rc, out, _ = run_lambda(command)
  98. if rc != 0:
  99. return None
  100. return out
  101. def run_and_parse_first_match(run_lambda, command, regex):
  102. """Run command using run_lambda, returns the first regex match if it exists."""
  103. rc, out, _ = run_lambda(command)
  104. if rc != 0:
  105. return None
  106. match = re.search(regex, out)
  107. if match is None:
  108. return None
  109. return match.group(1)
  110. def run_and_return_first_line(run_lambda, command):
  111. """Run command using run_lambda and returns first line if output is not empty."""
  112. rc, out, _ = run_lambda(command)
  113. if rc != 0:
  114. return None
  115. return out.split('\n')[0]
  116. def get_conda_packages(run_lambda, patterns=None):
  117. if patterns is None:
  118. patterns = DEFAULT_CONDA_PATTERNS
  119. conda = os.environ.get('CONDA_EXE', 'conda')
  120. out = run_and_read_all(run_lambda, "{} list".format(conda))
  121. if out is None:
  122. return out
  123. return "\n".join(line for line in out.splitlines()
  124. if not line.startswith("#") and any(name in line
  125. for name in patterns))
  126. def get_gcc_version(run_lambda):
  127. return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
  128. def get_clang_version(run_lambda):
  129. return run_and_parse_first_match(run_lambda, 'clang --version',
  130. r'clang version (.*)')
  131. def get_cmake_version(run_lambda):
  132. return run_and_parse_first_match(run_lambda, 'cmake --version',
  133. r'cmake (.*)')
  134. def get_nvidia_driver_version(run_lambda):
  135. if get_platform() == 'darwin':
  136. cmd = 'kextstat | grep -i cuda'
  137. return run_and_parse_first_match(run_lambda, cmd,
  138. r'com[.]nvidia[.]CUDA [(](.*?)[)]')
  139. smi = get_nvidia_smi()
  140. return run_and_parse_first_match(run_lambda, smi,
  141. r'Driver Version: (.*?) ')
  142. def get_gpu_info(run_lambda):
  143. if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(
  144. torch.version, 'hip') and torch.version.hip is not None):
  145. if TORCH_AVAILABLE and torch.cuda.is_available():
  146. if torch.version.hip is not None:
  147. prop = torch.cuda.get_device_properties(0)
  148. if hasattr(prop, "gcnArchName"):
  149. gcnArch = " ({})".format(prop.gcnArchName)
  150. else:
  151. gcnArch = "NoGCNArchNameOnOldPyTorch"
  152. else:
  153. gcnArch = ""
  154. return torch.cuda.get_device_name(None) + gcnArch
  155. return None
  156. smi = get_nvidia_smi()
  157. uuid_regex = re.compile(r' \(UUID: .+?\)')
  158. rc, out, _ = run_lambda(smi + ' -L')
  159. if rc != 0:
  160. return None
  161. # Anonymize GPUs by removing their UUID
  162. return re.sub(uuid_regex, '', out)
  163. def get_running_cuda_version(run_lambda):
  164. return run_and_parse_first_match(run_lambda, 'nvcc --version',
  165. r'release .+ V(.*)')
  166. def get_cudnn_version(run_lambda):
  167. """Return a list of libcudnn.so; it's hard to tell which one is being used."""
  168. if get_platform() == 'win32':
  169. system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
  170. cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%")
  171. where_cmd = os.path.join(system_root, 'System32', 'where')
  172. cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
  173. elif get_platform() == 'darwin':
  174. # CUDA libraries and drivers can be found in /usr/local/cuda/. See
  175. # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
  176. # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
  177. # Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
  178. cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
  179. else:
  180. cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
  181. rc, out, _ = run_lambda(cudnn_cmd)
  182. # find will return 1 if there are permission errors or if not found
  183. if len(out) == 0 or (rc != 1 and rc != 0):
  184. l = os.environ.get('CUDNN_LIBRARY')
  185. if l is not None and os.path.isfile(l):
  186. return os.path.realpath(l)
  187. return None
  188. files_set = set()
  189. for fn in out.split('\n'):
  190. fn = os.path.realpath(fn) # eliminate symbolic links
  191. if os.path.isfile(fn):
  192. files_set.add(fn)
  193. if not files_set:
  194. return None
  195. # Alphabetize the result because the order is non-deterministic otherwise
  196. files = sorted(files_set)
  197. if len(files) == 1:
  198. return files[0]
  199. result = '\n'.join(files)
  200. return 'Probably one of the following:\n{}'.format(result)
  201. def get_nvidia_smi():
  202. # Note: nvidia-smi is currently available only on Windows and Linux
  203. smi = 'nvidia-smi'
  204. if get_platform() == 'win32':
  205. system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
  206. program_files_root = os.environ.get('PROGRAMFILES',
  207. 'C:\\Program Files')
  208. legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation',
  209. 'NVSMI', smi)
  210. new_path = os.path.join(system_root, 'System32', smi)
  211. smis = [new_path, legacy_path]
  212. for candidate_smi in smis:
  213. if os.path.exists(candidate_smi):
  214. smi = '"{}"'.format(candidate_smi)
  215. break
  216. return smi
  217. def get_rocm_version(run_lambda):
  218. """Returns the ROCm version if available, otherwise 'N/A'."""
  219. return run_and_parse_first_match(run_lambda, 'hipcc --version',
  220. r'HIP version: (\S+)')
  221. def get_neuron_sdk_version(run_lambda):
  222. # Adapted from your install script
  223. try:
  224. result = run_lambda(["neuron-ls"])
  225. return result if result[0] == 0 else 'N/A'
  226. except Exception:
  227. return 'N/A'
  228. def get_aphrodite_version():
  229. try:
  230. import aphrodite
  231. return aphrodite.__version__
  232. except ImportError:
  233. return 'N/A'
  234. def summarize_aphrodite_build_flags():
  235. # This could be a static method if the flags are constant, or dynamic if you need to check environment variables, etc.
  236. return 'CUDA Archs: {}; ROCm: {}; Neuron: {}'.format(
  237. os.environ.get('TORCH_CUDA_ARCH_LIST', 'Not Set'),
  238. 'Enabled' if os.environ.get('ROCM_HOME') else 'Disabled',
  239. 'Enabled' if os.environ.get('NEURON_CORES') else 'Disabled',
  240. )
  241. def get_gpu_topo(run_lambda):
  242. if get_platform() == 'linux':
  243. return run_and_read_all(run_lambda, 'nvidia-smi topo -m')
  244. return None
  245. # example outputs of CPU infos
  246. # * linux
  247. # Architecture: x86_64
  248. # CPU op-mode(s): 32-bit, 64-bit
  249. # Address sizes: 46 bits physical, 48 bits virtual
  250. # Byte Order: Little Endian
  251. # CPU(s): 128
  252. # On-line CPU(s) list: 0-127
  253. # Vendor ID: GenuineIntel
  254. # Model name: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
  255. # CPU family: 6
  256. # Model: 106
  257. # Thread(s) per core: 2
  258. # Core(s) per socket: 32
  259. # Socket(s): 2
  260. # Stepping: 6
  261. # BogoMIPS: 5799.78
  262. # Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr
  263. # sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl
  264. # xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16
  265. # pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand
  266. # hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced
  267. # fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap
  268. # avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1
  269. # xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq
  270. # avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities
  271. # Virtualization features:
  272. # Hypervisor vendor: KVM
  273. # Virtualization type: full
  274. # Caches (sum of all):
  275. # L1d: 3 MiB (64 instances)
  276. # L1i: 2 MiB (64 instances)
  277. # L2: 80 MiB (64 instances)
  278. # L3: 108 MiB (2 instances)
  279. # NUMA:
  280. # NUMA node(s): 2
  281. # NUMA node0 CPU(s): 0-31,64-95
  282. # NUMA node1 CPU(s): 32-63,96-127
  283. # Vulnerabilities:
  284. # Itlb multihit: Not affected
  285. # L1tf: Not affected
  286. # Mds: Not affected
  287. # Meltdown: Not affected
  288. # Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
  289. # Retbleed: Not affected
  290. # Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp
  291. # Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization
  292. # Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
  293. # Srbds: Not affected
  294. # Tsx async abort: Not affected
  295. # * win32
  296. # Architecture=9
  297. # CurrentClockSpeed=2900
  298. # DeviceID=CPU0
  299. # Family=179
  300. # L2CacheSize=40960
  301. # L2CacheSpeed=
  302. # Manufacturer=GenuineIntel
  303. # MaxClockSpeed=2900
  304. # Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
  305. # ProcessorType=3
  306. # Revision=27142
  307. #
  308. # Architecture=9
  309. # CurrentClockSpeed=2900
  310. # DeviceID=CPU1
  311. # Family=179
  312. # L2CacheSize=40960
  313. # L2CacheSpeed=
  314. # Manufacturer=GenuineIntel
  315. # MaxClockSpeed=2900
  316. # Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
  317. # ProcessorType=3
  318. # Revision=27142
  319. def get_cpu_info(run_lambda):
  320. rc, out, err = 0, '', ''
  321. if get_platform() == 'linux':
  322. rc, out, err = run_lambda('lscpu')
  323. elif get_platform() == 'win32':
  324. rc, out, err = run_lambda(
  325. 'wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \
  326. CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE'
  327. )
  328. elif get_platform() == 'darwin':
  329. rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
  330. cpu_info = 'None'
  331. if rc == 0:
  332. cpu_info = out
  333. else:
  334. cpu_info = err
  335. return cpu_info
  336. def get_platform():
  337. if sys.platform.startswith('linux'):
  338. return 'linux'
  339. elif sys.platform.startswith('win32'):
  340. return 'win32'
  341. elif sys.platform.startswith('cygwin'):
  342. return 'cygwin'
  343. elif sys.platform.startswith('darwin'):
  344. return 'darwin'
  345. else:
  346. return sys.platform
  347. def get_mac_version(run_lambda):
  348. return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion',
  349. r'(.*)')
  350. def get_windows_version(run_lambda):
  351. system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
  352. wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
  353. findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
  354. return run_and_read_all(
  355. run_lambda,
  356. '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
  357. def get_lsb_version(run_lambda):
  358. return run_and_parse_first_match(run_lambda, 'lsb_release -a',
  359. r'Description:\t(.*)')
  360. def check_release_file(run_lambda):
  361. return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
  362. r'PRETTY_NAME="(.*)"')
  363. def get_os(run_lambda):
  364. from platform import machine
  365. platform = get_platform()
  366. if platform == 'win32' or platform == 'cygwin':
  367. return get_windows_version(run_lambda)
  368. if platform == 'darwin':
  369. version = get_mac_version(run_lambda)
  370. if version is None:
  371. return None
  372. return 'macOS {} ({})'.format(version, machine())
  373. if platform == 'linux':
  374. # Ubuntu/Debian based
  375. desc = get_lsb_version(run_lambda)
  376. if desc is not None:
  377. return '{} ({})'.format(desc, machine())
  378. # Try reading /etc/*-release
  379. desc = check_release_file(run_lambda)
  380. if desc is not None:
  381. return '{} ({})'.format(desc, machine())
  382. return '{} ({})'.format(platform, machine())
  383. # Unknown platform
  384. return platform
  385. def get_python_platform():
  386. import platform
  387. return platform.platform()
  388. def get_libc_version():
  389. import platform
  390. if get_platform() != 'linux':
  391. return 'N/A'
  392. return '-'.join(platform.libc_ver())
  393. def get_pip_packages(run_lambda, patterns=None):
  394. """Return `pip list` output. Note: will also find conda-installed pytorch and numpy packages."""
  395. if patterns is None:
  396. patterns = DEFAULT_PIP_PATTERNS
  397. # People generally have `pip` as `pip` or `pip3`
  398. # But here it is invoked as `python -mpip`
  399. def run_with_pip(pip):
  400. out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"])
  401. return "\n".join(line for line in out.splitlines()
  402. if any(name in line for name in patterns))
  403. pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
  404. out = run_with_pip([sys.executable, '-mpip'])
  405. return pip_version, out
  406. def get_cachingallocator_config():
  407. ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
  408. return ca_config
  409. def get_cuda_module_loading_config():
  410. if TORCH_AVAILABLE and torch.cuda.is_available():
  411. torch.cuda.init()
  412. config = os.environ.get('CUDA_MODULE_LOADING', '')
  413. return config
  414. else:
  415. return "N/A"
  416. def is_xnnpack_available():
  417. if TORCH_AVAILABLE:
  418. import torch.backends.xnnpack
  419. return str(
  420. torch.backends.xnnpack.enabled) # type: ignore[attr-defined]
  421. else:
  422. return "N/A"
  423. def get_env_info():
  424. run_lambda = run
  425. pip_version, pip_list_output = get_pip_packages(run_lambda)
  426. if TORCH_AVAILABLE:
  427. version_str = torch.__version__
  428. debug_mode_str = str(torch.version.debug)
  429. cuda_available_str = str(torch.cuda.is_available())
  430. cuda_version_str = torch.version.cuda
  431. if not hasattr(torch.version,
  432. 'hip') or torch.version.hip is None: # cuda version
  433. hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
  434. else: # HIP version
  435. def get_version_or_na(cfg, prefix):
  436. _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s]
  437. return _lst[0] if _lst else 'N/A'
  438. cfg = torch._C._show_config().split('\n')
  439. hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')
  440. miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')
  441. cuda_version_str = 'N/A'
  442. hip_compiled_version = torch.version.hip
  443. else:
  444. version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'
  445. hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
  446. sys_version = sys.version.replace("\n", " ")
  447. conda_packages = get_conda_packages(run_lambda)
  448. rocm_version = get_rocm_version(run_lambda)
  449. neuron_sdk_version = get_neuron_sdk_version(run_lambda)
  450. aphrodite_version = get_aphrodite_version()
  451. aphrodite_build_flags = summarize_aphrodite_build_flags()
  452. gpu_topo = get_gpu_topo(run_lambda)
  453. return SystemEnv(
  454. torch_version=version_str,
  455. is_debug_build=debug_mode_str,
  456. python_version='{} ({}-bit runtime)'.format(
  457. sys_version,
  458. sys.maxsize.bit_length() + 1),
  459. python_platform=get_python_platform(),
  460. is_cuda_available=cuda_available_str,
  461. cuda_compiled_version=cuda_version_str,
  462. cuda_runtime_version=get_running_cuda_version(run_lambda),
  463. cuda_module_loading=get_cuda_module_loading_config(),
  464. nvidia_gpu_models=get_gpu_info(run_lambda),
  465. nvidia_driver_version=get_nvidia_driver_version(run_lambda),
  466. cudnn_version=get_cudnn_version(run_lambda),
  467. hip_compiled_version=hip_compiled_version,
  468. hip_runtime_version=hip_runtime_version,
  469. miopen_runtime_version=miopen_runtime_version,
  470. pip_version=pip_version,
  471. pip_packages=pip_list_output,
  472. conda_packages=conda_packages,
  473. os=get_os(run_lambda),
  474. libc_version=get_libc_version(),
  475. gcc_version=get_gcc_version(run_lambda),
  476. clang_version=get_clang_version(run_lambda),
  477. cmake_version=get_cmake_version(run_lambda),
  478. caching_allocator_config=get_cachingallocator_config(),
  479. is_xnnpack_available=is_xnnpack_available(),
  480. cpu_info=get_cpu_info(run_lambda),
  481. rocm_version=rocm_version,
  482. neuron_sdk_version=neuron_sdk_version,
  483. aphrodite_version=aphrodite_version,
  484. aphrodite_build_flags=aphrodite_build_flags,
  485. gpu_topo=gpu_topo,
  486. )
  487. env_info_fmt = """
  488. PyTorch version: {torch_version}
  489. Is debug build: {is_debug_build}
  490. CUDA used to build PyTorch: {cuda_compiled_version}
  491. ROCM used to build PyTorch: {hip_compiled_version}
  492. OS: {os}
  493. GCC version: {gcc_version}
  494. Clang version: {clang_version}
  495. CMake version: {cmake_version}
  496. Libc version: {libc_version}
  497. Python version: {python_version}
  498. Python platform: {python_platform}
  499. Is CUDA available: {is_cuda_available}
  500. CUDA runtime version: {cuda_runtime_version}
  501. CUDA_MODULE_LOADING set to: {cuda_module_loading}
  502. GPU models and configuration: {nvidia_gpu_models}
  503. Nvidia driver version: {nvidia_driver_version}
  504. cuDNN version: {cudnn_version}
  505. HIP runtime version: {hip_runtime_version}
  506. MIOpen runtime version: {miopen_runtime_version}
  507. Is XNNPACK available: {is_xnnpack_available}
  508. CPU:
  509. {cpu_info}
  510. Versions of relevant libraries:
  511. {pip_packages}
  512. {conda_packages}
  513. """.strip()
  514. # both the above code and the following code use `strip()` to
  515. # remove leading/trailing whitespaces, so we need to add a newline
  516. # in between to separate the two sections
  517. env_info_fmt += "\n"
  518. env_info_fmt += """
  519. ROCM Version: {rocm_version}
  520. Neuron SDK Version: {neuron_sdk_version}
  521. Aphrodite Version: {aphrodite_version}
  522. Aphrodite Build Flags:
  523. {aphrodite_build_flags}
  524. GPU Topology:
  525. {gpu_topo}
  526. """.strip()
  527. def pretty_str(envinfo):
  528. def replace_nones(dct, replacement='Could not collect'):
  529. for key in dct.keys():
  530. if dct[key] is not None:
  531. continue
  532. dct[key] = replacement
  533. return dct
  534. def replace_bools(dct, true='Yes', false='No'):
  535. for key in dct.keys():
  536. if dct[key] is True:
  537. dct[key] = true
  538. elif dct[key] is False:
  539. dct[key] = false
  540. return dct
  541. def prepend(text, tag='[prepend]'):
  542. lines = text.split('\n')
  543. updated_lines = [tag + line for line in lines]
  544. return '\n'.join(updated_lines)
  545. def replace_if_empty(text, replacement='No relevant packages'):
  546. if text is not None and len(text) == 0:
  547. return replacement
  548. return text
  549. def maybe_start_on_next_line(string):
  550. # If `string` is multiline, prepend a \n to it.
  551. if string is not None and len(string.split('\n')) > 1:
  552. return '\n{}\n'.format(string)
  553. return string
  554. mutable_dict = envinfo._asdict()
  555. # If nvidia_gpu_models is multiline, start on the next line
  556. mutable_dict['nvidia_gpu_models'] = \
  557. maybe_start_on_next_line(envinfo.nvidia_gpu_models)
  558. # If the machine doesn't have CUDA, report some fields as 'No CUDA'
  559. dynamic_cuda_fields = [
  560. 'cuda_runtime_version',
  561. 'nvidia_gpu_models',
  562. 'nvidia_driver_version',
  563. ]
  564. all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
  565. all_dynamic_cuda_fields_missing = all(mutable_dict[field] is None
  566. for field in dynamic_cuda_fields)
  567. if TORCH_AVAILABLE and not torch.cuda.is_available(
  568. ) and all_dynamic_cuda_fields_missing:
  569. for field in all_cuda_fields:
  570. mutable_dict[field] = 'No CUDA'
  571. if envinfo.cuda_compiled_version is None:
  572. mutable_dict['cuda_compiled_version'] = 'None'
  573. # Replace True with Yes, False with No
  574. mutable_dict = replace_bools(mutable_dict)
  575. # Replace all None objects with 'Could not collect'
  576. mutable_dict = replace_nones(mutable_dict)
  577. # If either of these are '', replace with 'No relevant packages'
  578. mutable_dict['pip_packages'] = replace_if_empty(
  579. mutable_dict['pip_packages'])
  580. mutable_dict['conda_packages'] = replace_if_empty(
  581. mutable_dict['conda_packages'])
  582. # Tag conda and pip packages with a prefix
  583. # If they were previously None, they'll show up as ie '[conda] Could not collect'
  584. if mutable_dict['pip_packages']:
  585. mutable_dict['pip_packages'] = prepend(
  586. mutable_dict['pip_packages'], '[{}] '.format(envinfo.pip_version))
  587. if mutable_dict['conda_packages']:
  588. mutable_dict['conda_packages'] = prepend(
  589. mutable_dict['conda_packages'], '[conda] ')
  590. mutable_dict['cpu_info'] = envinfo.cpu_info
  591. return env_info_fmt.format(**mutable_dict)
  592. def get_pretty_env_info():
  593. return pretty_str(get_env_info())
  594. def main():
  595. print("Collecting environment information...")
  596. output = get_pretty_env_info()
  597. print(output)
  598. if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(
  599. torch.utils, '_crash_handler'):
  600. minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
  601. if sys.platform == "linux" and os.path.exists(minidump_dir):
  602. dumps = [
  603. os.path.join(minidump_dir, dump)
  604. for dump in os.listdir(minidump_dir)
  605. ]
  606. latest = max(dumps, key=os.path.getctime)
  607. ctime = os.path.getctime(latest)
  608. creation_time = datetime.datetime.fromtimestamp(ctime).strftime(
  609. '%Y-%m-%d %H:%M:%S')
  610. msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
  611. "if this is related to your bug please include it when you file a report ***"
  612. print(msg, file=sys.stderr)
  613. if __name__ == '__main__':
  614. main()