Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
722aecd4
MegEngine
项目概览
MegEngine 天元
/
MegEngine
10 个月 前同步成功
通知
392
Star
4702
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
722aecd4
编写于
8月 18, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb): support fp16 nhwc backward
GitOrigin-RevId: 954ac6405a2e7b8b6719916c57e31a80f623b0c1
上级
7a9f2ed9
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
112 addition
and
21 deletion
+112
-21
dnn/src/cuda/convolution/backward_data/cudnn.cpp
dnn/src/cuda/convolution/backward_data/cudnn.cpp
+9
-19
dnn/src/cuda/convolution/backward_filter/cudnn.cpp
dnn/src/cuda/convolution/backward_filter/cudnn.cpp
+9
-1
dnn/src/cuda/convolution/helper.cpp
dnn/src/cuda/convolution/helper.cpp
+2
-1
dnn/test/common/convolution.cpp
dnn/test/common/convolution.cpp
+10
-0
dnn/test/common/convolution.h
dnn/test/common/convolution.h
+1
-0
dnn/test/cuda/convolution.cpp
dnn/test/cuda/convolution.cpp
+81
-0
未找到文件。
dnn/src/cuda/convolution/backward_data/cudnn.cpp
浏览文件 @
722aecd4
...
...
@@ -14,6 +14,7 @@
#include "src/cuda/utils.h"
#include "src/cuda/cudnn_wrapper.h"
#include "src/cuda/convolution/helper.h"
#include "src/cuda/conv_bias/helper.h"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -31,27 +32,16 @@ bool ConvolutionBackwardDataImpl::AlgoCUDNN::is_available(
CUDNNBwdDataDescs
D
;
if
(
!
is_cudnn_supported
(
args
.
as_fwd_args
()))
TensorLayout
bias_layout
,
z_layout
;
conv_bias
::
CanonizedFilterMeta
meta
;
meta
.
copy_from
(
args
.
filter_meta
);
conv_bias
::
BiasForwardSizeArgs
bias_args
{
args
.
handle
,
args
.
grad_layout
,
args
.
filter_layout
,
&
bias_layout
,
&
z_layout
,
meta
,
args
.
diff_layout
,
param
::
ConvBias
::
NonlineMode
::
IDENTITY
,
};
if
(
!
conv_bias
::
is_cudnn_supported
(
bias_args
))
return
false
;
#if CUDNN_VERSION >= 7500
// As in cuda10.0 and cudnn7.5, algo CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 with
// TensorCore operations produces incorrect result. So we disable
// this algo. Please remove the following code, when
// nvidia has fixed this issue.
// incorrect case:
// inp={2x8x18x18}, kern={8x8x2x2}, pad_h=pad_w=2, stride_h=stride_w=2,
// dtype=float16
if
(
args
.
filter_meta
.
dtype
==
dtype
::
Float16
())
{
const
char
*
algo_1
=
"CUDNN_CONVOLUTION_BWD_DATA_ALGO_1"
;
auto
cmp_len
=
strlen
(
algo_1
);
if
(
is_compute_capability_required
(
7
,
0
)
&&
strncmp
(
name
(),
algo_1
,
cmp_len
)
==
0
)
{
return
false
;
}
}
#endif
auto
&
cudnn
=
args
.
handle
->
cudnn
();
args
.
init_desc
(
D
);
size_t
workspace_size
;
...
...
dnn/src/cuda/convolution/backward_filter/cudnn.cpp
浏览文件 @
722aecd4
...
...
@@ -14,6 +14,7 @@
#include "src/cuda/utils.h"
#include "src/cuda/cudnn_wrapper.h"
#include "src/cuda/convolution/helper.h"
#include "src/cuda/conv_bias/helper.h"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -31,7 +32,14 @@ bool ConvolutionBackwardFilterImpl::AlgoCUDNN::is_available(
auto
&
cudnn
=
args
.
handle
->
cudnn
();
CUDNNBwdFilterDescs
D
;
if
(
!
is_cudnn_supported
(
args
.
as_fwd_args
()))
TensorLayout
bias_layout
,
z_layout
;
conv_bias
::
CanonizedFilterMeta
meta
;
meta
.
copy_from
(
args
.
grad_filter_meta
);
conv_bias
::
BiasForwardSizeArgs
bias_args
{
args
.
handle
,
args
.
src_layout
,
args
.
grad_layout
,
&
bias_layout
,
&
z_layout
,
meta
,
args
.
diff_layout
,
param
::
ConvBias
::
NonlineMode
::
IDENTITY
,
};
if
(
!
conv_bias
::
is_cudnn_supported
(
bias_args
))
return
false
;
args
.
init_desc
(
D
);
...
...
dnn/src/cuda/convolution/helper.cpp
浏览文件 @
722aecd4
...
...
@@ -33,7 +33,8 @@ bool convolution::is_cudnn_supported(const ForwardSizeArgs &args) {
args
.
dst_layout
->
dtype
.
enumv
()
!=
DTypeEnum
::
QuantizedS8
)
{
return
false
;
}
}
else
if
(
args
.
filter_meta
.
format
!=
param
::
Convolution
::
Format
::
NCHW
)
{
}
else
if
(
args
.
filter_meta
.
format
!=
param
::
Convolution
::
Format
::
NCHW
&&
args
.
filter_meta
.
format
!=
param
::
Convolution
::
Format
::
NHWC
)
{
return
false
;
}
auto
&
fm
=
args
.
filter_meta
;
...
...
dnn/test/common/convolution.cpp
浏览文件 @
722aecd4
...
...
@@ -284,6 +284,16 @@ std::vector<TestArg> convolution::get_args_cudnn_5_1_failures() {
return
args
;
}
std
::
vector
<
TestArg
>
convolution
::
get_args_cudnn_5_1_backward
()
{
std
::
vector
<
TestArg
>
args
;
args
.
emplace_back
(
param
::
Convolution
{
param
::
Convolution
::
Mode
::
CROSS_CORRELATION
,
2
,
2
,
2
,
2
},
TensorShape
{
2
,
8
,
18
,
18
},
TensorShape
{
8
,
8
,
2
,
2
});
return
args
;
}
std
::
vector
<
TestArg
>
convolution
::
get_args_x86_winograd_algorithm
()
{
std
::
vector
<
TestArg
>
args
;
for
(
size_t
ic_size
:
{
8
,
16
})
{
...
...
dnn/test/common/convolution.h
浏览文件 @
722aecd4
...
...
@@ -40,6 +40,7 @@ std::vector<TestArg> get_args_x86_direct_case_2();
std
::
vector
<
TestArg
>
get_args_fallback_templated_impl
();
std
::
vector
<
TestArg
>
get_args_fallback_non_templated_impl
();
std
::
vector
<
TestArg
>
get_args_cudnn_5_1_failures
();
std
::
vector
<
TestArg
>
get_args_cudnn_5_1_backward
();
std
::
vector
<
TestArg
>
get_args_x86_winograd_algorithm
();
std
::
vector
<
TestArg
>
get_args_BRAIN_481
();
std
::
vector
<
TestArg
>
get_args
();
...
...
dnn/test/cuda/convolution.cpp
浏览文件 @
722aecd4
...
...
@@ -238,6 +238,87 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA) {
}
}
TEST_F
(
CUDA
,
CONVOLUTION_BACKWARD_DATA_FP16_CUDNN7_5
)
{
// algo CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 with
// TensorCore operations produces incorrect result.
// Maybe nvidia has fixed this issue
// There is a test using incorrect case:
// inp={2x8x18x18}, kern={8x8x2x2}, pad_h=pad_w=2, stride_h=stride_w=2,
// dtype=float16
using
namespace
convolution
;
std
::
vector
<
TestArg
>
args
=
get_args_cudnn_5_1_backward
();
Checker
<
ConvolutionBackwardData
>
checker
(
handle_cuda
());
NormalRNG
default_rng
;
for
(
auto
&&
arg
:
args
)
{
float
scale
=
128.
f
/
sqrt
(
arg
.
filter
[
0
]
*
arg
.
filter
[
2
]
*
arg
.
filter
[
3
]);
scale
=
std
::
max
(
scale
,
1.
f
);
UniformFloatRNG
rng
(
scale
,
2
*
scale
);
arg
.
param
.
format
=
param
::
Convolution
::
Format
::
NHWC
;
arg
.
src
=
cvt_src_or_dst_nchw2nhwc
(
arg
.
src
);
arg
.
filter
=
cvt_filter_nchw2nhwc
(
arg
.
filter
);
auto
src
=
TensorLayout
(
arg
.
src
,
dtype
::
Float32
());
auto
filter
=
TensorLayout
(
arg
.
filter
,
dtype
::
Float32
());
TensorLayout
dst
;
{
auto
opr
=
handle_cuda
()
->
create_operator
<
Convolution
>
();
opr
->
param
()
=
arg
.
param
;
opr
->
deduce_layout
(
src
,
filter
,
dst
);
}
src
.
dtype
=
dst
.
dtype
=
filter
.
dtype
=
dtype
::
Float16
();
arg
.
param
.
compute_mode
=
param
::
Convolution
::
ComputeMode
::
FLOAT32
;
checker
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
)
.
set_epsilon
(
1e-2
)
.
set_param
(
arg
.
param
)
.
exec
(
TensorLayoutArray
{
filter
,
dst
,
src
});
src
.
dtype
=
dst
.
dtype
=
filter
.
dtype
=
dtype
::
Float32
();
arg
.
param
.
compute_mode
=
param
::
Convolution
::
ComputeMode
::
DEFAULT
;
checker
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
)
.
set_epsilon
(
1e-2
)
.
set_param
(
arg
.
param
)
.
exec
(
TensorLayoutArray
{
filter
,
dst
,
src
});
}
}
TEST_F
(
CUDA
,
CONVOLUTION_BACKWARD_DATA_NHWC
)
{
using
namespace
convolution
;
std
::
vector
<
TestArg
>
args
=
get_args_cuda_conv_bwd_data
();
Checker
<
ConvolutionBackwardData
>
checker
(
handle_cuda
());
NormalRNG
default_rng
;
for
(
auto
&&
arg
:
args
)
{
float
scale
=
64.
f
/
sqrt
(
arg
.
filter
[
0
]
*
arg
.
filter
[
2
]
*
arg
.
filter
[
3
]);
UniformFloatRNG
rng
(
scale
,
2
*
scale
);
arg
.
param
.
format
=
param
::
Convolution
::
Format
::
NHWC
;
arg
.
src
=
cvt_src_or_dst_nchw2nhwc
(
arg
.
src
);
arg
.
filter
=
cvt_filter_nchw2nhwc
(
arg
.
filter
);
auto
src
=
TensorLayout
(
arg
.
src
,
dtype
::
Float32
());
auto
filter
=
TensorLayout
(
arg
.
filter
,
dtype
::
Float32
());
TensorLayout
dst
;
{
auto
opr
=
handle_cuda
()
->
create_operator
<
Convolution
>
();
opr
->
param
()
=
arg
.
param
;
opr
->
deduce_layout
(
src
,
filter
,
dst
);
}
src
.
dtype
=
dst
.
dtype
=
filter
.
dtype
=
dtype
::
Float16
();
arg
.
param
.
compute_mode
=
param
::
Convolution
::
ComputeMode
::
FLOAT32
;
checker
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
)
.
set_epsilon
(
1e-2
)
.
set_param
(
arg
.
param
)
.
exec
(
TensorLayoutArray
{
filter
,
dst
,
src
});
src
.
dtype
=
dst
.
dtype
=
filter
.
dtype
=
dtype
::
Float32
();
arg
.
param
.
compute_mode
=
param
::
Convolution
::
ComputeMode
::
DEFAULT
;
checker
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
)
.
set_epsilon
(
1e-2
)
.
set_param
(
arg
.
param
)
.
exec
(
TensorLayoutArray
{
filter
,
dst
,
src
});
}
}
TEST_F
(
CUDA
,
CONVOLUTION_BACKWARD_DATA_CUDNN
)
{
if
(
cuda
::
is_compute_capability_required
(
7
,
0
))
return
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录