From 32b31fd578ae0d5680b5b1affd080f1ce8cd1d36 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Sun, 24 Jul 2022 15:31:41 +0800 Subject: [PATCH] fix(mgb): change the check method of cuda sm code GitOrigin-RevId: 23dbc9b5747128813efdb6f7d4f557004a5cbf95 --- imperative/python/megengine/__init__.py | 21 -------- src/core/impl/comp_node/cuda/comp_node.cpp | 62 ++++++++++++++++++++++ 2 files changed, 62 insertions(+), 21 deletions(-) diff --git a/imperative/python/megengine/__init__.py b/imperative/python/megengine/__init__.py index 093f22aae..f71c96b96 100644 --- a/imperative/python/megengine/__init__.py +++ b/imperative/python/megengine/__init__.py @@ -96,27 +96,6 @@ from .utils.persistent_cache import PersistentCacheOnServer as _PersistentCacheO from .version import __version__ -def _check_sm_version(): - cur_logger = get_logger(__name__) - ngpus = get_device_count("gpu") - supported_sm_versions = re.findall(r"sm_(\d+)", _get_supported_sm_versions()) - for idx in range(ngpus): - prop = get_cuda_device_property(idx) - cur_sm = str(prop.major * 10 + prop.minor) - if not cur_sm in supported_sm_versions: - cur_logger.warning( - "{} with CUDA capability sm_{} is not compatible with the current MegEngine installation. The current MegEngine install supports CUDA {} {}. If you want to use the {} with MegEngine, please check the instructions at https://github.com/MegEngine/MegEngine/blob/master/scripts/cmake-build/BUILD_README.md".format( - prop.name, - cur_sm, - "capabilities" if len(supported_sm_versions) > 1 else "capability", - " ".join(["sm_" + v for v in supported_sm_versions]), - prop.name, - ) - ) - - -_check_sm_version() - _exit_handlers = [] diff --git a/src/core/impl/comp_node/cuda/comp_node.cpp b/src/core/impl/comp_node/cuda/comp_node.cpp index 3a6957563..fe107ccd3 100644 --- a/src/core/impl/comp_node/cuda/comp_node.cpp +++ b/src/core/impl/comp_node/cuda/comp_node.cpp @@ -8,10 +8,15 @@ using namespace mgb; #if MGB_CUDA +#if MEGDNN_WITH_CUDA +#include "cuda_sm_gen.h" +#endif + #include "megbrain/comp_node/alloc.h" #include #include +#include #include @@ -417,6 +422,63 @@ void CudaCompNodeImpl::init(const Locator& locator, const Locator& locator_logic m_env.init_cuda_async( locator.device, make_comp_node_from_impl(this), {on_succ, on_error}); +#if MEGDNN_WITH_CUDA + auto cur_prop = CudaCompNode::get_device_prop(locator.device); + auto cur_sm = + std::string("sm_") + std::to_string(cur_prop.major * 10 + cur_prop.minor); + const std::string mge_gen_code = MGE_CUDA_GENCODE; + std::regex re("sm_([0-9]+)"); + std::vector build_sm( + std::sregex_token_iterator(mge_gen_code.begin(), mge_gen_code.end(), re), + std::sregex_token_iterator()); + + if (std::find(build_sm.begin(), build_sm.end(), cur_sm) == build_sm.end()) { + std::string build_sm_info = ""; + for (auto&& s : build_sm) { + build_sm_info += std::string(" ") + s; + } + + std::vector support_gpu; + for (int i = 0; i < get_device_count(); i++) { + auto prop = CudaCompNode::get_device_prop(i); + auto sm = std::string("sm_") + std::to_string(prop.major * 10 + prop.minor); + if (std::find(build_sm.begin(), build_sm.end(), sm) != build_sm.end()) { + support_gpu.emplace_back(i); + } + } + + if (support_gpu.size() == 0) { + mgb_throw( + MegBrainError, + "%s(gpu%d) with CUDA capability %s is not compatible with the " + "current MegEngine installation. The current MegEngine install " + "supports CUDA capabilities%s. If you want to use the %s(gpu%d) " + "with MegEngine, please check the instructions at " + "https://github.com/MegEngine/MegEngine/blob/master/scripts/" + "cmake-build/BUILD_README.md", + cur_prop.name.c_str(), locator.device, cur_sm.c_str(), + build_sm_info.c_str(), cur_prop.name.c_str(), locator.device); + } else { + std::string support_gpu_info = ""; + for (auto&& g : support_gpu) { + support_gpu_info += std::string(" gpu") + std::to_string(g); + } + mgb_throw( + MegBrainError, + "%s(gpu%d) with CUDA capability %s is not compatible with the " + "current MegEngine installation. The current MegEngine install " + "supports CUDA capabilities%s. You can try to use%s instead or " + "config CUDA_VISIBLE_DEVICES to chosse anthor cuda card.If you " + "really want to use the %s(gpu%d) with MegEngine, please check the " + "instructions at " + "https://github.com/MegEngine/MegEngine/blob/master/scripts/" + "cmake-build/BUILD_README.md", + cur_prop.name.c_str(), locator.device, cur_sm.c_str(), + build_sm_info.c_str(), support_gpu_info.c_str(), + cur_prop.name.c_str(), locator.device); + } + } +#endif } void CudaCompNodeImpl::fini() { -- GitLab