Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
04b1a45a
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
04b1a45a
编写于
4月 06, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(dnn): fix cudnn crash when finalize called after cudnn dtor
GitOrigin-RevId: b0ad639921e8ba1e370696f16a9d87024a83f4c9
上级
14a089c4
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
46 addition
and
14 deletion
+46
-14
CMakeLists.txt
CMakeLists.txt
+23
-4
dnn/src/cuda/handle.cpp
dnn/src/cuda/handle.cpp
+6
-0
imperative/python/test/integration/test_dp_correctness.py
imperative/python/test/integration/test_dp_correctness.py
+1
-1
imperative/python/test/unit/utils/test_network_node.py
imperative/python/test/unit/utils/test_network_node.py
+9
-3
scripts/whl/manylinux2014/build_wheel_common.sh
scripts/whl/manylinux2014/build_wheel_common.sh
+3
-3
src/core/impl/comp_node_env.cpp
src/core/impl/comp_node_env.cpp
+4
-0
src/gopt/test/inference.cpp
src/gopt/test/inference.cpp
+0
-3
未找到文件。
CMakeLists.txt
浏览文件 @
04b1a45a
...
...
@@ -40,7 +40,8 @@ option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
option
(
MGE_WITH_TRT
"Build MegEngine with TensorRT."
ON
)
option
(
MGE_WITH_CUDA_STUB
"Build MegEngine with CUDA stub."
ON
)
option
(
MGE_WITH_NVRTC_STUB
"Build MegEngine with NVRTC stub."
OFF
)
option
(
MGE_WITH_CUDNN_SHARED
"Build MegEngine with CUDNN shared."
OFF
)
option
(
MGE_WITH_CUDNN_SHARED
"Build MegEngine with CUDNN shared."
ON
)
option
(
MGE_WITH_CUBLAS_SHARED
"Build MegEngine with CUBLAS shared."
OFF
)
option
(
MGE_USE_SYSTEM_LIB
"Build MegEngine with system libraries."
OFF
)
option
(
MGB_WITH_FLATBUFFERS
"Build MegBrain with FlatBuffers serialization support."
ON
)
option
(
MGE_WITH_CAMBRICON
"Build MegEngine with Cambricon support"
OFF
)
...
...
@@ -60,6 +61,11 @@ option(MGE_WITH_ROCM "Enable ROCM support" OFF)
option
(
MGE_WITH_LARGE_ARCHIVE
"Enable big archive link support"
OFF
)
if
(
MSVC OR WIN32
)
message
(
STATUS
"windows force cudnn static link"
)
set
(
MGE_WITH_CUDNN_SHARED OFF
)
endif
()
if
(
MGE_WITH_NVRTC_STUB OR MGE_WITH_CUDA_STUB
)
set
(
MGE_WITH_ANY_CUDA_STUB ON
)
else
()
...
...
@@ -472,15 +478,28 @@ if(MGE_WITH_CUDA)
endif
()
endif
()
if
(
MSVC OR WIN32
)
list
(
APPEND MGE_CUDA_LIBS cusolver.lib cublas.lib curand.lib cudart_static.lib cusparse.lib
)
list
(
APPEND MGE_CUDA_LIBS cusolver.lib curand.lib cudart_static.lib cusparse.lib
)
else
()
list
(
APPEND MGE_CUDA_LIBS cusolver_static curand_static culibos cudart_static cusparse_static
)
endif
()
if
(
MSVC OR WIN32
)
list
(
APPEND MGE_CUDA_LIBS cublas.lib
)
else
()
list
(
APPEND MGE_CUDA_LIBS cusolver_static cublas_static curand_static culibos cudart_static cusparse_static
)
if
(
MGE_WITH_CUBLAS_SHARED
)
list
(
APPEND MGE_CUDA_LIBS cublas
)
else
()
list
(
APPEND MGE_CUDA_LIBS cublas_static
)
endif
()
endif
()
if
(
${
CMAKE_CUDA_COMPILER_VERSION
}
VERSION_GREATER
"10.1.0"
OR
${
CMAKE_CUDA_COMPILER_VERSION
}
VERSION_EQUAL
"10.1.0"
)
if
(
MSVC OR WIN32
)
list
(
APPEND MGE_CUDA_LIBS cublasLt.lib
)
else
()
list
(
APPEND MGE_CUDA_LIBS cublasLt_static
)
if
(
MGE_WITH_CUBLAS_SHARED
)
list
(
APPEND MGE_CUDA_LIBS cublasLt
)
else
()
list
(
APPEND MGE_CUDA_LIBS cublasLt_static
)
endif
()
endif
()
endif
()
if
((
${
CMAKE_CUDA_COMPILER_VERSION
}
VERSION_GREATER
"10.0.0"
OR
${
CMAKE_CUDA_COMPILER_VERSION
}
VERSION_EQUAL
"10.0.0"
)
AND NOT MSVC AND NOT WIN32
)
...
...
dnn/src/cuda/handle.cpp
浏览文件 @
04b1a45a
...
...
@@ -54,6 +54,12 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle):
#if CUDA_VERSION >= 10010
megdnn_assert
(
cublasLtGetVersion
()
>=
10010
,
"cuda library version is too low to run cublasLt"
);
#endif
#if CUDNN_VERSION >= 8000
megdnn_log_warn
(
R"(
Cudnn8 will jit ptx code with cache. You can set
CUDA_CACHE_MAXSIZE and CUDA_CACHE_PATH environment var to avoid repeat jit(very slow).
For example `export CUDA_CACHE_MAXSIZE=2147483647` and `export CUDA_CACHE_PATH=/data/.cuda_cache`)"
);
#endif
cudnn_check
(
cudnnCreate
(
&
m_cudnn_handle
));
cublas_check
(
cublasCreate
(
&
m_cublas_handle
));
...
...
imperative/python/test/integration/test_dp_correctness.py
浏览文件 @
04b1a45a
...
...
@@ -199,4 +199,4 @@ def test_dp_correctness():
model_name
=
"mnist_model_with_test.mge"
model_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
model_name
)
set_execution_strategy
(
"HEURISTIC_REPRODUCIBLE"
)
run_test
(
model_path
,
False
,
False
,
max_err
=
1
e-5
)
run_test
(
model_path
,
False
,
False
,
max_err
=
5
e-5
)
imperative/python/test/unit/utils/test_network_node.py
浏览文件 @
04b1a45a
...
...
@@ -22,7 +22,7 @@ from megengine.utils.comp_graph_tools import GraphInference
from
megengine.utils.network
import
Network
as
Net
def
check_pygraph_dump
(
trace_func
,
inp_data
,
expect_results
):
def
check_pygraph_dump
(
trace_func
,
inp_data
,
expect_results
,
max_err
=
None
):
orig_model
=
io
.
BytesIO
()
inp_size
=
len
(
inp_data
)
out_size
=
len
(
expect_results
)
...
...
@@ -46,7 +46,12 @@ def check_pygraph_dump(trace_func, inp_data, expect_results):
results
=
graph
.
run
(
inp_dict
=
inp_dict
)
for
ind
,
tensor
in
enumerate
(
expect_results
):
np
.
testing
.
assert_equal
(
tensor
.
numpy
(),
results
[
output_names
[
ind
]])
if
max_err
:
np
.
testing
.
assert_almost_equal
(
tensor
.
numpy
(),
results
[
output_names
[
ind
]],
max_err
)
else
:
np
.
testing
.
assert_equal
(
tensor
.
numpy
(),
results
[
output_names
[
ind
]])
assert
tensor
.
dtype
==
results
[
output_names
[
ind
]].
dtype
...
...
@@ -178,7 +183,8 @@ def test_convtranspose():
data
=
Tensor
(
np
.
random
.
random
((
1
,
32
,
32
,
32
)))
result
=
fwd
(
data
)
check_pygraph_dump
(
fwd
,
[
data
],
[
result
])
# cu111 has 1e-7 diff
check_pygraph_dump
(
fwd
,
[
data
],
[
result
],
5
)
@
pytest
.
mark
.
skip
(
reason
=
"pytest aborted"
)
...
...
scripts/whl/manylinux2014/build_wheel_common.sh
浏览文件 @
04b1a45a
...
...
@@ -31,7 +31,7 @@ echo "Build with ${SDK_NAME}"
if
[
$SDK_NAME
==
"cu101"
]
;
then
CUDA_COPY_LIB_LIST
=
"
${
CUDA_LIB_DIR
}
/libnvrtc.so.10.1"
EXTRA_CMAKE_FLAG
=
" -DMGE_WITH_CUDNN_SHARED=OFF"
EXTRA_CMAKE_FLAG
=
" -DMGE_WITH_CUDNN_SHARED=OFF
-DMGE_WITH_CUBLAS_SHARED=OFF
"
BUILD_GCC8
=
"ON"
REQUIR_CUDA_VERSION
=
"10010"
REQUIR_CUDNN_VERSION
=
"7.6.3"
...
...
@@ -49,7 +49,7 @@ elif [ $SDK_NAME == "cu111" ];then
${
CUDNN_LIB_DIR
}
/libcudnn_ops_infer.so.8:
\
${
CUDNN_LIB_DIR
}
/libcudnn_ops_train.so.8:
\
${
CUDNN_LIB_DIR
}
/libcudnn.so.8"
EXTRA_CMAKE_FLAG
=
" -DMGE_WITH_CUDNN_SHARED=ON
\
EXTRA_CMAKE_FLAG
=
" -DMGE_WITH_CUDNN_SHARED=ON
-DMGE_WITH_CUBLAS_SHARED=ON
\
-gencode arch=compute_61,code=sm_61
\
arch=compute_70,code=sm_70
\
arch=compute_75,code=sm_75
\
...
...
@@ -72,7 +72,7 @@ elif [ $SDK_NAME == "cu112" ];then
${
CUDNN_LIB_DIR
}
/libcudnn_ops_infer.so.8:
\
${
CUDNN_LIB_DIR
}
/libcudnn_ops_train.so.8:
\
${
CUDNN_LIB_DIR
}
/libcudnn.so.8"
EXTRA_CMAKE_FLAG
=
" -DMGE_WITH_CUDNN_SHARED=ON
\
EXTRA_CMAKE_FLAG
=
" -DMGE_WITH_CUDNN_SHARED=ON
-DMGE_WITH_CUBLAS_SHARED=ON
\
-gencode arch=compute_61,code=sm_61
\
arch=compute_70,code=sm_70
\
arch=compute_75,code=sm_75
\
...
...
src/core/impl/comp_node_env.cpp
浏览文件 @
04b1a45a
...
...
@@ -214,6 +214,8 @@ void CompNodeEnv::init_cuda_async(int dev, CompNode comp_node,
mgb_assert
(
m_property
.
mem_alignment
==
MegDNNHandle
::
get
(
*
this
).
handle
()
->
alignment_requirement
());
auto
err
=
atexit
(
&
CompNode
::
finalize
);
mgb_assert
(
!
err
,
"failed to register CompNode::finalize at exit"
);
}
MGB_CATCH
(
std
::
exception
&
exc
,
{
mgb_log_error
(
"async cuda init failed: %s"
,
exc
.
what
());
...
...
@@ -304,6 +306,8 @@ void CompNodeEnv::init_rocm_async(int dev, CompNode comp_node,
mgb_assert
(
m_property
.
mem_alignment
==
MegDNNHandle
::
get
(
*
this
).
handle
()
->
alignment_requirement
());
auto
err
=
atexit
(
&
CompNode
::
finalize
);
mgb_assert
(
!
err
,
"failed to register CompNode::finalize at exit"
);
}
MGB_CATCH
(
std
::
exception
&
exc
,
{
mgb_log_error
(
"async rocm init failed: %s"
,
exc
.
what
());
...
...
src/gopt/test/inference.cpp
浏览文件 @
04b1a45a
...
...
@@ -1850,8 +1850,6 @@ TEST(TestEnableTensorCore, SmallInputShape) {
MGB_ASSERT_TENSOR_EQ
(
host_y
,
host_y_opt
);
}
//! close for cu111 ci, reopen it when bug fixed
#if CUDA_VERSION < 11000
TEST
(
TestEnableTensorCore
,
Nchw4Nchw
)
{
REQUIRE_GPU
(
1
);
auto
cn
=
CompNode
::
load
(
"gpu0"
);
...
...
@@ -1957,7 +1955,6 @@ TEST(TestEnableTensorCore, Nchw4Nchw) {
MGB_ASSERT_TENSOR_EQ
(
host_y
,
host_y_opt
);
}
}
#endif
TEST
(
TestEnableTensorCore
,
ConvBiasWithZ
)
{
REQUIRE_GPU
(
1
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录