Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
36b1ba05
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
36b1ba05
编写于
3月 08, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(mgb/dnn): fix cudnn8.0.4 convbias with z
GitOrigin-RevId: 09453d8a12f8773e1422f9f9b36bd2068b621ee8
上级
dadd5086
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
35 addition
and
19 deletion
+35
-19
src/gopt/impl/inference.cpp
src/gopt/impl/inference.cpp
+11
-0
src/gopt/test/inference.cpp
src/gopt/test/inference.cpp
+24
-19
未找到文件。
src/gopt/impl/inference.cpp
浏览文件 @
36b1ba05
...
...
@@ -36,6 +36,9 @@
#if MGB_ENABLE_TENSOR_RT
#include "megbrain/tensorrt/tensorrt_opr.h"
#endif
#if MGB_CUDA
#include <cudnn.h>
#endif
#include "megbrain/gopt/misc.h"
...
...
@@ -1999,6 +2002,11 @@ void FuseConvBiasZPass::apply(OptState& state) const {
auto
check_fuse_dtype
=
[
&
](
opr
::
ConvBias
*
conv_bias
,
VarNode
*
z
)
->
bool
{
return
conv_bias
->
output
(
0
)
->
dtype
().
enumv
()
==
z
->
dtype
().
enumv
();
};
#if MGB_CUDA && (CUDNN_MAJOR == 8)
auto
check_fuse_param
=
[
&
](
opr
::
ConvBias
*
conv_bias
,
VarNode
*
z
)
->
bool
{
return
conv_bias
->
input
(
0
)
!=
z
;
};
#endif
auto
get_convbias_nonline_mode
=
[
&
](
OperatorNodeBase
*
opr
)
->
NonlineMode
{
if
(
opr
->
same_type
<
opr
::
Elemwise
>
())
{
auto
elem
=
try_cast_as_op
<
opr
::
Elemwise
>
(
opr
);
...
...
@@ -2037,6 +2045,9 @@ void FuseConvBiasZPass::apply(OptState& state) const {
if
(
conv_bias
&&
check_conv_bias
(
conv_bias
)
&&
check_fuse_shape
(
conv_bias
,
z_inp
)
&&
#if MGB_CUDA && (CUDNN_MAJOR == 8)
check_fuse_param
(
conv_bias
,
z_inp
)
&&
#endif
check_fuse_dtype
(
conv_bias
,
z_inp
))
{
auto
param
=
conv_bias
->
param
();
param
.
nonlineMode
=
get_convbias_nonline_mode
(
opr
);
...
...
src/gopt/test/inference.cpp
浏览文件 @
36b1ba05
...
...
@@ -36,6 +36,10 @@
#include <random>
#if MGB_CUDA
#include <cudnn.h>
#endif
using
namespace
mgb
;
namespace
{
...
...
@@ -2211,8 +2215,6 @@ TEST(TestGoptInference, EnableTensorCore) {
MGB_ASSERT_TENSOR_EQ
(
host_y
,
host_y_opt
);
}
//! close for cu111 ci, reopen it when bug fixed
#if CUDA_VERSION < 11000
TEST
(
FuseConvBiasZPass
,
BlockFuse
)
{
REQUIRE_GPU
(
1
);
auto
cn
=
CompNode
::
load
(
"gpu0"
);
...
...
@@ -2284,6 +2286,25 @@ TEST(FuseConvBiasZPass, BlockFuse) {
OperatorNodeConfig
{
dtype
::
QuantizedS8
(
2.5
f
)});
z
=
opr
::
TypeCvt
::
make
(
z
,
dtype
::
Float32
());
SymbolVar
z_fuse
;
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_fuse_conv_bias_with_z
();
unpack_vector
(
gopt
::
optimize_for_inference
({
z
},
options
),
z_fuse
);
}
graph
->
compile
({{
z_fuse
,
{}}})
->
to_json
()
->
writeto_fpath
(
output_file
(
"FuseConvBiasZPass.BlockFuse_fuse.json"
));
auto
nr_elem_multi_type
=
find_opr_num
<
mgb
::
opr
::
ElemwiseMultiType
>
(
z_fuse
);
MGB_MARK_USED_VAR
(
nr_elem_multi_type
);
#if MGB_CUDA && (CUDNN_MAJOR == 8)
ASSERT_EQ
(
2u
,
nr_elem_multi_type
);
#else
ASSERT_EQ
(
1u
,
nr_elem_multi_type
);
//! fuse z mannually
auto
z0
=
opr
::
ConvBias
::
make
(
x
,
w1
,
b1
,
param
,
{},
...
...
@@ -2299,42 +2320,26 @@ TEST(FuseConvBiasZPass, BlockFuse) {
OperatorNodeConfig
{
dtype
::
QuantizedS8
(
2.5
f
)});
z4
=
opr
::
TypeCvt
::
make
(
z4
,
dtype
::
Float32
());
SymbolVar
z_fuse
;
SymbolVar
z_nonfuse
;
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_fuse_conv_bias_with_z
();
unpack_vector
(
gopt
::
optimize_for_inference
({
z
},
options
),
z_fuse
);
}
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
();
unpack_vector
(
gopt
::
optimize_for_inference
({
z4
},
options
),
z_nonfuse
);
}
auto
nr_elem_multi_type
=
find_opr_num
<
mgb
::
opr
::
ElemwiseMultiType
>
(
z_fuse
);
MGB_MARK_USED_VAR
(
nr_elem_multi_type
);
ASSERT_EQ
(
1u
,
nr_elem_multi_type
);
graph
->
compile
({{
z_fuse
,
{}}})
->
to_json
()
->
writeto_fpath
(
output_file
(
"FuseConvBiasZPass.BlockFuse_fuse.json"
));
graph
->
compile
({{
z_nonfuse
,
{}}})
->
to_json
()
->
writeto_fpath
(
output_file
(
"FuseConvBiasZPass.BlockFuse_nonfuse.json"
));
HostTensorND
host_z_fuse
,
host_z_nonfuse
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
z_nonfuse
,
host_z_nonfuse
),
make_callback_copy
(
z_fuse
,
host_z_fuse
)});
func
->
execute
();
MGB_ASSERT_TENSOR_EQ
(
host_z_fuse
,
host_z_nonfuse
);
#endif
}
}
#endif
TEST
(
TestEnableTensorCore
,
ShuffleMerge
)
{
REQUIRE_GPU
(
1
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录