提交 fca4ae57 编写于 作者: M Megvii Engine Team

fix(mge/dist): give more error messages when the device check fails

GitOrigin-RevId: 5601aca458157404244fa16c9623428722544308
上级 53a76526
...@@ -153,13 +153,15 @@ def synchronized(func: Callable): ...@@ -153,13 +153,15 @@ def synchronized(func: Callable):
def _check_device_initialized(device_type: str, rank: int): def _check_device_initialized(device_type: str, rank: int):
try: try:
test = Tensor(1, device=(device_type + str(rank))) test = Tensor(1, device=(device_type + str(rank)))
inited = False
del test del test
except: except Exception as e:
inited = True errmsg = (
errmsg = "The cuda env is set before the forked thread starts. Please do not use any cuda function or variable before forking." "Device initialization check failed, which may be caused "
if inited: "by using CUDA before forking the thread. Please review "
raise RuntimeError(errmsg) "the code to ensure that no CUDA functions or variables "
"are used before forking."
)
raise RuntimeError(errmsg) from e
def _check_interpreter_status(): def _check_interpreter_status():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册