提交 32b31fd5 编写于 作者: M Megvii Engine Team

fix(mgb): change the check method of cuda sm code

GitOrigin-RevId: 23dbc9b5747128813efdb6f7d4f557004a5cbf95
上级 5f863682
......@@ -96,27 +96,6 @@ from .utils.persistent_cache import PersistentCacheOnServer as _PersistentCacheO
from .version import __version__
def _check_sm_version():
cur_logger = get_logger(__name__)
ngpus = get_device_count("gpu")
supported_sm_versions = re.findall(r"sm_(\d+)", _get_supported_sm_versions())
for idx in range(ngpus):
prop = get_cuda_device_property(idx)
cur_sm = str(prop.major * 10 + prop.minor)
if not cur_sm in supported_sm_versions:
cur_logger.warning(
"{} with CUDA capability sm_{} is not compatible with the current MegEngine installation. The current MegEngine install supports CUDA {} {}. If you want to use the {} with MegEngine, please check the instructions at https://github.com/MegEngine/MegEngine/blob/master/scripts/cmake-build/BUILD_README.md".format(
prop.name,
cur_sm,
"capabilities" if len(supported_sm_versions) > 1 else "capability",
" ".join(["sm_" + v for v in supported_sm_versions]),
prop.name,
)
)
_check_sm_version()
_exit_handlers = []
......
......@@ -8,10 +8,15 @@ using namespace mgb;
#if MGB_CUDA
#if MEGDNN_WITH_CUDA
#include "cuda_sm_gen.h"
#endif
#include "megbrain/comp_node/alloc.h"
#include <cctype>
#include <cstdio>
#include <regex>
#include <thread>
......@@ -417,6 +422,63 @@ void CudaCompNodeImpl::init(const Locator& locator, const Locator& locator_logic
m_env.init_cuda_async(
locator.device, make_comp_node_from_impl(this), {on_succ, on_error});
#if MEGDNN_WITH_CUDA
auto cur_prop = CudaCompNode::get_device_prop(locator.device);
auto cur_sm =
std::string("sm_") + std::to_string(cur_prop.major * 10 + cur_prop.minor);
const std::string mge_gen_code = MGE_CUDA_GENCODE;
std::regex re("sm_([0-9]+)");
std::vector<std::string> build_sm(
std::sregex_token_iterator(mge_gen_code.begin(), mge_gen_code.end(), re),
std::sregex_token_iterator());
if (std::find(build_sm.begin(), build_sm.end(), cur_sm) == build_sm.end()) {
std::string build_sm_info = "";
for (auto&& s : build_sm) {
build_sm_info += std::string(" ") + s;
}
std::vector<int> support_gpu;
for (int i = 0; i < get_device_count(); i++) {
auto prop = CudaCompNode::get_device_prop(i);
auto sm = std::string("sm_") + std::to_string(prop.major * 10 + prop.minor);
if (std::find(build_sm.begin(), build_sm.end(), sm) != build_sm.end()) {
support_gpu.emplace_back(i);
}
}
if (support_gpu.size() == 0) {
mgb_throw(
MegBrainError,
"%s(gpu%d) with CUDA capability %s is not compatible with the "
"current MegEngine installation. The current MegEngine install "
"supports CUDA capabilities%s. If you want to use the %s(gpu%d) "
"with MegEngine, please check the instructions at "
"https://github.com/MegEngine/MegEngine/blob/master/scripts/"
"cmake-build/BUILD_README.md",
cur_prop.name.c_str(), locator.device, cur_sm.c_str(),
build_sm_info.c_str(), cur_prop.name.c_str(), locator.device);
} else {
std::string support_gpu_info = "";
for (auto&& g : support_gpu) {
support_gpu_info += std::string(" gpu") + std::to_string(g);
}
mgb_throw(
MegBrainError,
"%s(gpu%d) with CUDA capability %s is not compatible with the "
"current MegEngine installation. The current MegEngine install "
"supports CUDA capabilities%s. You can try to use%s instead or "
"config CUDA_VISIBLE_DEVICES to chosse anthor cuda card.If you "
"really want to use the %s(gpu%d) with MegEngine, please check the "
"instructions at "
"https://github.com/MegEngine/MegEngine/blob/master/scripts/"
"cmake-build/BUILD_README.md",
cur_prop.name.c_str(), locator.device, cur_sm.c_str(),
build_sm_info.c_str(), support_gpu_info.c_str(),
cur_prop.name.c_str(), locator.device);
}
}
#endif
}
void CudaCompNodeImpl::fini() {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册