Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
b8febaf9
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
b8febaf9
编写于
1月 21, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(megdnn): refactor bfloat16 convolutionbackwardfilter to recursive inteface
GitOrigin-RevId: 37c08a5b8b2484df300acf71c651640eca041144
上级
f14e0c17
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
91 addition
and
61 deletion
+91
-61
dnn/src/cuda/convolution/backward_data/bfloat16.cpp
dnn/src/cuda/convolution/backward_data/bfloat16.cpp
+2
-2
dnn/src/cuda/convolution/backward_filter/algo.cpp
dnn/src/cuda/convolution/backward_filter/algo.cpp
+2
-6
dnn/src/cuda/convolution/backward_filter/algo.h
dnn/src/cuda/convolution/backward_filter/algo.h
+8
-14
dnn/src/cuda/convolution/backward_filter/bfloat16.cpp
dnn/src/cuda/convolution/backward_filter/bfloat16.cpp
+62
-39
dnn/src/cuda/convolution/opr_impl.h
dnn/src/cuda/convolution/opr_impl.h
+11
-0
dnn/test/cuda/convolution.cpp
dnn/test/cuda/convolution.cpp
+6
-0
未找到文件。
dnn/src/cuda/convolution/backward_data/bfloat16.cpp
浏览文件 @
b8febaf9
...
...
@@ -60,7 +60,7 @@ bool ConvolutionBackwardDataImpl::AlgoBFloat16::is_available(
auto
&&
config
=
sub_opr_config
(
{
*
args
.
filter_layout
,
*
args
.
diff_layout
,
*
args
.
grad_layout
},
args
.
opr
);
conv_back_data_opr
->
param
()
=
config
.
second
;
conv_back_data_opr
->
param
()
=
config
.
second
;
return
args
.
diff_layout
->
dtype
==
args
.
filter_layout
->
dtype
&&
args
.
diff_layout
->
dtype
==
dtype
::
BFloat16
()
&&
get_algorithm
(
static_cast
<
ConvolutionBackwardDataImpl
*>
(
...
...
@@ -80,7 +80,7 @@ WorkspaceBundle ConvolutionBackwardDataImpl::AlgoBFloat16::get_workspace_bundle(
auto
&&
config
=
sub_opr_config
(
{
*
args
.
filter_layout
,
*
args
.
diff_layout
,
*
args
.
grad_layout
},
args
.
opr
);
conv_back_data_opr
->
param
()
=
config
.
second
;
conv_back_data_opr
->
param
()
=
config
.
second
;
SmallVector
<
size_t
>
sizes
;
auto
get_workspace
=
[
&
sizes
](
const
TensorLayout
&
src
,
const
TensorLayout
&
dst
)
{
...
...
dnn/src/cuda/convolution/backward_filter/algo.cpp
浏览文件 @
b8febaf9
...
...
@@ -43,12 +43,8 @@ ConvolutionBackwardFilterImpl::AlgoPack::AlgoPack() {
megdnn_assert
(
all_algos_data
==
all_algos
.
data
());
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
0
]);
// group matmul
size_t
algo_size
=
all_algos
.
size
();
for
(
size_t
i
=
0
;
i
<
algo_size
;
++
i
)
{
bfloat16_refhold
.
emplace_back
(
new
AlgoBFloat16
(
all_algos
[
i
]));
all_algos
.
push_back
(
bfloat16_refhold
.
back
().
get
());
bfloat16_algos
.
push_back
(
bfloat16_refhold
.
back
().
get
());
}
all_algos
.
push_back
(
&
bfloat16
);
bfloat16_algos
.
push_back
(
&
bfloat16
);
for
(
auto
&&
algo
:
all_algos
)
{
m_all_algos_map
.
emplace
(
algo
->
info
().
desc
,
algo
);
...
...
dnn/src/cuda/convolution/backward_filter/algo.h
浏览文件 @
b8febaf9
...
...
@@ -158,27 +158,21 @@ public:
class
ConvolutionBackwardFilterImpl
::
AlgoBFloat16
final
:
public
AlgoBase
{
public:
AlgoBFloat16
(
ConvolutionBackwardFilterImpl
::
AlgoBase
*
);
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
const
char
*
name
()
const
override
{
return
m_name
.
c_str
();
}
bool
is_reproducible
()
const
override
{
return
true
;
}
MEGDNN_DECL_ALGO_TYPE
(
CUDA_BFLOAT16
)
std
::
vector
<
SearchItem
>
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
override
;
std
::
string
param
()
const
override
{
std
::
string
ret
;
serialize_write_pod
(
m_algorithm
,
ret
);
return
ret
;
const
char
*
name
()
const
override
{
return
"CONVOLUTION_BACKWARD_FILTER_BFLOAT16"
;
}
bool
is_reproducible
()
const
override
{
return
true
;
}
MEGDNN_DECL_ALGO_TYPE
(
CUDA_BFLOAT16
)
private:
std
::
string
m_name
;
ConvolutionBackwardFilterImpl
::
AlgoBase
*
m_algorithm
=
nullptr
;
SizeArgs
float_args
(
const
SizeArgs
&
args
,
ConvolutionBackwardFilterImpl
*
opr
,
TensorLayout
&
fsrc
,
TensorLayout
&
ffilter
,
TensorLayout
&
fdst
)
const
;
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
;
};
...
...
@@ -225,7 +219,7 @@ public:
AlgoChanwise
chanwise
;
std
::
vector
<
AlgoGroupConvGeneral
>
gconv
;
std
::
unordered_map
<
AlgoBase
*
,
AlgoGroupConvGeneral
*>
algo2gconv
;
std
::
vector
<
std
::
unique_ptr
<
AlgoBFloat16
>>
bfloat16_refhold
;
AlgoBFloat16
bfloat16
;
std
::
vector
<
AlgoBase
*>
//! all algorithms
...
...
dnn/src/cuda/convolution/backward_filter/bfloat16.cpp
浏览文件 @
b8febaf9
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "./algo.h"
...
...
@@ -17,33 +18,39 @@ using namespace megdnn;
using
namespace
cuda
;
using
namespace
convolution
;
ConvolutionBackwardFilterImpl
::
AlgoBFloat16
::
AlgoBFloat16
(
ConvolutionBackwardFilterImpl
::
AlgoBase
*
algorithm
)
:
m_algorithm
(
algorithm
)
{
megdnn_assert_internal
(
algorithm
);
m_name
=
ssprintf
(
"CONVOLUTION_BACKWARD_Filter_BFLOAT16:%s"
,
m_algorithm
->
name
());
}
ConvolutionBackwardFilterImpl
::
AlgoBase
::
SizeArgs
ConvolutionBackwardFilterImpl
::
AlgoBFloat16
::
float_args
(
const
SizeArgs
&
args
,
ConvolutionBackwardFilterImpl
*
opr
,
TensorLayout
&
fsrc
,
TensorLayout
&
fdiff
,
TensorLayout
&
fgrad
)
const
{
fsrc
=
*
args
.
src_layout
;
fdiff
=
*
args
.
diff_layout
;
fgrad
=
*
args
.
grad_layout
;
namespace
{
std
::
pair
<
TensorLayoutArray
,
ConvolutionBackwardFilterImpl
::
Param
>
sub_opr_config
(
const
TensorLayoutArray
&
layouts
,
const
ConvolutionBackwardFilterImpl
*
opr
)
{
megdnn_assert
(
layouts
.
size
()
>=
3
);
std
::
pair
<
TensorLayoutArray
,
ConvolutionBackwardFilterImpl
::
Param
>
ret
;
ret
.
first
=
layouts
;
auto
change_dtype
=
[](
TensorLayout
&
layout
)
{
if
(
layout
.
dtype
==
dtype
::
BFloat16
())
{
layout
.
dtype
=
dtype
::
Float32
();
}
};
change_dtype
(
fsrc
);
change_dtype
(
fdiff
);
change_dtype
(
fgrad
);
opr
->
param
()
=
args
.
opr
->
param
();
opr
->
param
().
compute_mode
=
Param
::
ComputeMode
::
DEFAULT
;
opr
->
execution_policy
()
=
{
m_algorithm
->
desc
(),
{}};
return
SizeArgs
(
opr
,
fsrc
,
fdiff
,
fgrad
);
change_dtype
(
ret
.
first
[
0
]);
change_dtype
(
ret
.
first
[
1
]);
change_dtype
(
ret
.
first
[
2
]);
ret
.
second
=
opr
->
param
();
ret
.
second
.
compute_mode
=
ConvolutionBackwardFilter
::
Param
::
ComputeMode
::
DEFAULT
;
return
ret
;
}
}
// namespace
std
::
vector
<
Algorithm
::
SearchItem
>
ConvolutionBackwardFilterImpl
::
AlgoBFloat16
::
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
{
auto
&&
config
=
sub_opr_config
(
layouts
,
static_cast
<
const
ConvolutionBackwardFilterImpl
*>
(
opr
));
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
CONVOLUTION_BACKWARD_FILTER
,
param_str
,
config
.
first
}};
}
bool
ConvolutionBackwardFilterImpl
::
AlgoBFloat16
::
is_available
(
...
...
@@ -51,25 +58,33 @@ bool ConvolutionBackwardFilterImpl::AlgoBFloat16::is_available(
TensorLayout
fsrc
,
fdiff
,
fgrad
;
auto
conv_back_filter_opr
=
args
.
handle
->
create_operator
<
ConvolutionBackwardFilter
>
();
SizeArgs
fargs
=
float_args
(
args
,
static_cast
<
ConvolutionBackwardFilterImpl
*>
(
conv_back_filter_opr
.
get
()),
fsrc
,
fdiff
,
fgrad
);
auto
&&
config
=
sub_opr_config
(
{
*
args
.
src_layout
,
*
args
.
diff_layout
,
*
args
.
grad_layout
},
args
.
opr
);
conv_back_filter_opr
->
param
()
=
config
.
second
;
return
args
.
src_layout
->
dtype
==
args
.
diff_layout
->
dtype
&&
args
.
src_layout
->
dtype
==
dtype
::
BFloat16
()
&&
m_algorithm
->
is_available
(
fargs
);
get_algorithm
(
static_cast
<
ConvolutionBackwardFilterImpl
*>
(
conv_back_filter_opr
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
}
WorkspaceBundle
ConvolutionBackwardFilterImpl
::
AlgoBFloat16
::
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
{
TensorLayout
fsrc
,
fdiff
,
fgrad
;
auto
conv_back_filter_opr
=
args
.
handle
->
create_operator
<
ConvolutionBackwardFilter
>
();
SizeArgs
fargs
=
float_args
(
args
,
static_cast
<
ConvolutionBackwardFilterImpl
*>
(
conv_back_filter_opr
.
get
()),
fsrc
,
fdiff
,
fgrad
);
if
(
args
.
opr
->
execution_policy
().
algo
.
valid
())
{
megdnn_assert
(
args
.
opr
->
execution_policy
().
sub_policy
.
size
()
==
1
);
conv_back_filter_opr
->
execution_policy
()
=
args
.
opr
->
execution_policy
().
sub_policy
[
0
];
}
auto
&&
config
=
sub_opr_config
(
{
*
args
.
src_layout
,
*
args
.
diff_layout
,
*
args
.
grad_layout
},
args
.
opr
);
conv_back_filter_opr
->
param
()
=
config
.
second
;
SmallVector
<
size_t
>
sizes
;
auto
get_workspace
=
[
&
sizes
](
const
TensorLayout
&
src
,
const
TensorLayout
&
dst
)
{
...
...
@@ -77,11 +92,14 @@ ConvolutionBackwardFilterImpl::AlgoBFloat16::get_workspace_bundle(
sizes
.
push_back
(
dst
.
span
().
dist_byte
());
}
};
get_workspace
(
*
args
.
src_layout
,
fsrc
);
get_workspace
(
*
args
.
diff_layout
,
fdiff
);
get_workspace
(
*
args
.
grad_layout
,
fgrad
);
sizes
.
push_back
(
m_algorithm
->
get_workspace_in_bytes
(
fargs
));
return
{
ptr
,
std
::
move
(
sizes
)};
get_workspace
(
*
args
.
src_layout
,
config
.
first
[
0
]);
get_workspace
(
*
args
.
diff_layout
,
config
.
first
[
1
]);
get_workspace
(
*
args
.
grad_layout
,
config
.
first
[
2
]);
sizes
.
push_back
(
conv_back_filter_opr
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]));
auto
ret
=
WorkspaceBundle
{
ptr
,
std
::
move
(
sizes
)};
return
ret
;
}
size_t
ConvolutionBackwardFilterImpl
::
AlgoBFloat16
::
get_workspace_in_bytes
(
...
...
@@ -107,7 +125,12 @@ void ConvolutionBackwardFilterImpl::AlgoBFloat16::exec(
conv_back_filter_opr
->
param
()
=
args
.
opr
->
param
();
conv_back_filter_opr
->
param
().
compute_mode
=
Param
::
ComputeMode
::
DEFAULT
;
conv_back_filter_opr
->
execution_policy
()
=
{
m_algorithm
->
desc
(),
{}};
if
(
args
.
opr
->
execution_policy
().
algo
.
valid
())
{
megdnn_assert
(
args
.
opr
->
execution_policy
().
sub_policy
.
size
()
==
1
);
conv_back_filter_opr
->
execution_policy
()
=
args
.
opr
->
execution_policy
().
sub_policy
[
0
];
}
conv_back_filter_opr
->
exec
(
fsrc_tensor
,
fdiff_tensor
,
fgrad_tensor
,
cvter
.
workspace
());
}
...
...
dnn/src/cuda/convolution/opr_impl.h
浏览文件 @
b8febaf9
...
...
@@ -152,6 +152,17 @@ public:
->
info
();
}
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
return
get_algorithm_heuristic
(
filter
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
)
->
info
();
}
const
char
*
get_algorithm_set_name
()
const
override
;
class
AlgoBase
;
...
...
dnn/test/cuda/convolution.cpp
浏览文件 @
b8febaf9
...
...
@@ -328,12 +328,18 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER)
.
set_epsilon
(
1e-1
)
.
set_param
(
arg
.
param
)
.
exec
(
TensorLayoutArray
{
src
,
dst
,
filter
});
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardFilter
>
(
ExecutionPolicyAlgoName
{
"CONVOLUTION_BACKWARD_FILTER_BFLOAT16"
,
{{
"MATMUL"
,
{}}}}));
src
.
dtype
=
dst
.
dtype
=
filter
.
dtype
=
dtype
::
BFloat16
();
checker
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
)
.
set_epsilon
(
1e-1
)
.
set_param
(
arg
.
param
)
.
exec
(
TensorLayoutArray
{
src
,
dst
,
filter
});
checker
.
reset_before_exec_callback
();
checker
.
opr
()
->
execution_policy
()
=
{};
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录