From 32b31fd578ae0d5680b5b1affd080f1ce8cd1d36 Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Sun, 24 Jul 2022 15:31:41 +0800
Subject: [PATCH] fix(mgb): change the check method of cuda sm code

GitOrigin-RevId: 23dbc9b5747128813efdb6f7d4f557004a5cbf95
---
 imperative/python/megengine/__init__.py    | 21 --------
 src/core/impl/comp_node/cuda/comp_node.cpp | 62 ++++++++++++++++++++++
 2 files changed, 62 insertions(+), 21 deletions(-)
diff --git a/imperative/python/megengine/__init__.py b/imperative/python/megengine/__init__.py
index 093f22aae..f71c96b96 100644
--- a/imperative/python/megengine/__init__.py
+++ b/imperative/python/megengine/__init__.py
@@ -96,27 +96,6 @@ from .utils.persistent_cache import PersistentCacheOnServer as _PersistentCacheO
 from .version import __version__
 
 
-def _check_sm_version():
-    cur_logger = get_logger(__name__)
-    ngpus = get_device_count("gpu")
-    supported_sm_versions = re.findall(r"sm_(\d+)", _get_supported_sm_versions())
-    for idx in range(ngpus):
-        prop = get_cuda_device_property(idx)
-        cur_sm = str(prop.major * 10 + prop.minor)
-        if not cur_sm in supported_sm_versions:
-            cur_logger.warning(
-                "{} with CUDA capability sm_{} is not compatible with the current MegEngine installation. The current MegEngine install supports CUDA {} {}. If you want to use the {} with MegEngine, please check the instructions at https://github.com/MegEngine/MegEngine/blob/master/scripts/cmake-build/BUILD_README.md".format(
-                    prop.name,
-                    cur_sm,
-                    "capabilities" if len(supported_sm_versions) > 1 else "capability",
-                    " ".join(["sm_" + v for v in supported_sm_versions]),
-                    prop.name,
-                )
-            )
-
-
-_check_sm_version()
-
 _exit_handlers = []
 
 
diff --git a/src/core/impl/comp_node/cuda/comp_node.cpp b/src/core/impl/comp_node/cuda/comp_node.cpp
index 3a6957563..fe107ccd3 100644
--- a/src/core/impl/comp_node/cuda/comp_node.cpp
+++ b/src/core/impl/comp_node/cuda/comp_node.cpp
@@ -8,10 +8,15 @@ using namespace mgb;
 
 #if MGB_CUDA
 
+#if MEGDNN_WITH_CUDA
+#include "cuda_sm_gen.h"
+#endif
+
 #include "megbrain/comp_node/alloc.h"
 
 #include <cctype>
 #include <cstdio>
+#include <regex>
 
 #include <thread>
 
@@ -417,6 +422,63 @@ void CudaCompNodeImpl::init(const Locator& locator, const Locator& locator_logic
 
     m_env.init_cuda_async(
             locator.device, make_comp_node_from_impl(this), {on_succ, on_error});
+#if MEGDNN_WITH_CUDA
+    auto cur_prop = CudaCompNode::get_device_prop(locator.device);
+    auto cur_sm =
+            std::string("sm_") + std::to_string(cur_prop.major * 10 + cur_prop.minor);
+    const std::string mge_gen_code = MGE_CUDA_GENCODE;
+    std::regex re("sm_([0-9]+)");
+    std::vector<std::string> build_sm(
+            std::sregex_token_iterator(mge_gen_code.begin(), mge_gen_code.end(), re),
+            std::sregex_token_iterator());
+
+    if (std::find(build_sm.begin(), build_sm.end(), cur_sm) == build_sm.end()) {
+        std::string build_sm_info = "";
+        for (auto&& s : build_sm) {
+            build_sm_info += std::string(" ") + s;
+        }
+
+        std::vector<int> support_gpu;
+        for (int i = 0; i < get_device_count(); i++) {
+            auto prop = CudaCompNode::get_device_prop(i);
+            auto sm = std::string("sm_") + std::to_string(prop.major * 10 + prop.minor);
+            if (std::find(build_sm.begin(), build_sm.end(), sm) != build_sm.end()) {
+                support_gpu.emplace_back(i);
+            }
+        }
+
+        if (support_gpu.size() == 0) {
+            mgb_throw(
+                    MegBrainError,
+                    "%s(gpu%d) with CUDA capability %s is not compatible with the "
+                    "current MegEngine installation. The current MegEngine install "
+                    "supports CUDA capabilities%s. If you want to use the %s(gpu%d) "
+                    "with MegEngine, please check the instructions at "
+                    "https://github.com/MegEngine/MegEngine/blob/master/scripts/"
+                    "cmake-build/BUILD_README.md",
+                    cur_prop.name.c_str(), locator.device, cur_sm.c_str(),
+                    build_sm_info.c_str(), cur_prop.name.c_str(), locator.device);
+        } else {
+            std::string support_gpu_info = "";
+            for (auto&& g : support_gpu) {
+                support_gpu_info += std::string(" gpu") + std::to_string(g);
+            }
+            mgb_throw(
+                    MegBrainError,
+                    "%s(gpu%d) with CUDA capability %s is not compatible with the "
+                    "current MegEngine installation. The current MegEngine install "
+                    "supports CUDA capabilities%s. You can try to use%s instead or "
+                    "config CUDA_VISIBLE_DEVICES to chosse anthor cuda card.If you "
+                    "really want to use the %s(gpu%d) with MegEngine, please check the "
+                    "instructions at "
+                    "https://github.com/MegEngine/MegEngine/blob/master/scripts/"
+                    "cmake-build/BUILD_README.md",
+                    cur_prop.name.c_str(), locator.device, cur_sm.c_str(),
+                    build_sm_info.c_str(), support_gpu_info.c_str(),
+                    cur_prop.name.c_str(), locator.device);
+        }
+    }
+#endif
 }
 
 void CudaCompNodeImpl::fini() {
-- 
GitLab