Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
659217ac
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
659217ac
编写于
1月 24, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(megdnn): refactor bfloat16 convbias to recursive inteface
GitOrigin-RevId: 378194fb7f5482f72eb95eaf23610ceec9c9c554
上级
4a1d52c9
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
104 addition
and
69 deletion
+104
-69
dnn/src/cuda/conv_bias/algo.cpp
dnn/src/cuda/conv_bias/algo.cpp
+2
-6
dnn/src/cuda/conv_bias/algo.h
dnn/src/cuda/conv_bias/algo.h
+6
-18
dnn/src/cuda/conv_bias/bfloat16.cpp
dnn/src/cuda/conv_bias/bfloat16.cpp
+64
-43
dnn/src/naive/conv_bias/opr_impl.cpp
dnn/src/naive/conv_bias/opr_impl.cpp
+3
-0
dnn/test/cuda/conv_bias.cpp
dnn/test/cuda/conv_bias.cpp
+27
-0
dnn/test/cuda/convolution.cpp
dnn/test/cuda/convolution.cpp
+2
-2
未找到文件。
dnn/src/cuda/conv_bias/algo.cpp
浏览文件 @
659217ac
...
...
@@ -63,12 +63,8 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() {
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
1
]);
// group batched_matmul
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
0
]);
// group 1x1
algo_size
=
all_algos
.
size
();
for
(
size_t
i
=
0
;
i
<
algo_size
;
++
i
)
{
bfloat16_refhold
.
emplace_back
(
new
AlgoBFloat16
(
all_algos
[
i
]));
all_algos
.
push_back
(
bfloat16_refhold
.
back
().
get
());
bfloat16_algos
.
push_back
(
bfloat16_refhold
.
back
().
get
());
}
all_algos
.
push_back
(
&
bfloat16
);
bfloat16_algos
.
push_back
(
&
bfloat16
);
size_t
all_algo_size
=
all_algos
.
size
();
#if CUDA_VERSION >= 10000
...
...
dnn/src/cuda/conv_bias/algo.h
浏览文件 @
659217ac
...
...
@@ -702,32 +702,20 @@ private:
class
ConvBiasForwardImpl
::
AlgoBFloat16
final
:
public
AlgoBase
{
public:
AlgoBFloat16
(
AlgoBase
*
impl
);
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
const
char
*
name
()
const
override
{
return
m_name
.
c_str
();
}
std
::
vector
<
SearchItem
>
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
override
;
bool
is_reproducible
()
const
override
{
return
m_impl
->
is_reproducible
();
}
const
char
*
name
()
const
override
{
return
"CONVBIAS_BFLOAT16"
;
}
bool
is_reproducible
()
const
override
{
return
true
;
}
MEGDNN_DECL_ALGO_TYPE
(
CUDA_BFLOAT16
)
std
::
string
param
()
const
override
{
std
::
string
ret
;
serialize_write_pod
(
m_impl
,
ret
);
return
ret
;
}
private:
SizeArgs
float_args
(
const
SizeArgs
&
args
,
ConvBiasForwardImpl
*
opr
,
TensorLayout
&
fsrc
,
TensorLayout
&
ffilter
,
TensorLayout
&
fbias
,
TensorLayout
&
fz
,
TensorLayout
&
fdst
)
const
;
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
;
AlgoBase
*
m_impl
;
std
::
string
m_name
;
};
...
...
@@ -766,7 +754,7 @@ public:
std
::
vector
<
AlgoInt8NCHW32IMMAImplicitGemm
>
int8_nchw32_imma
;
#endif
std
::
vector
<
std
::
unique_ptr
<
AlgoGroupConvGeneral
>>
gconv_refhold
;
std
::
vector
<
std
::
unique_ptr
<
AlgoBFloat16
>>
bfloat16_refhold
;
AlgoBFloat16
bfloat16
;
std
::
unordered_map
<
AlgoBase
*
,
AlgoGroupConvGeneral
*>
algo2gconv
;
AlgoBase
*
cudnn_conv_bias_act_from_enum
(
cudnnConvolutionFwdAlgo_t
algo
);
...
...
dnn/src/cuda/conv_bias/bfloat16.cpp
浏览文件 @
659217ac
...
...
@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/cuda/conv_bias/algo.h"
...
...
@@ -18,58 +19,70 @@ using namespace megdnn;
using
namespace
cuda
;
using
namespace
conv_bias
;
ConvBiasForwardImpl
::
AlgoBFloat16
::
AlgoBFloat16
(
ConvBiasForwardImpl
::
AlgoBase
*
algorithm
)
:
m_impl
(
algorithm
)
{
megdnn_assert_internal
(
algorithm
);
m_name
=
ssprintf
(
"BFLOAT16:%s"
,
m_impl
->
name
());
}
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
ConvBiasForwardImpl
::
AlgoBFloat16
::
float_args
(
const
SizeArgs
&
args
,
ConvBiasForwardImpl
*
opr
,
TensorLayout
&
fsrc
,
TensorLayout
&
ffilter
,
TensorLayout
&
fbias
,
TensorLayout
&
fz
,
TensorLayout
&
fdst
)
const
{
fsrc
=
*
args
.
src_layout
;
ffilter
=
*
args
.
filter_layout
;
fbias
=
*
args
.
bias_layout
;
fz
=
*
args
.
z_layout
;
fdst
=
*
args
.
dst_layout
;
namespace
{
std
::
pair
<
TensorLayoutArray
,
ConvBiasForwardImpl
::
Param
>
sub_opr_config
(
const
TensorLayoutArray
&
layouts
,
const
ConvBiasForwardImpl
*
opr
)
{
megdnn_assert
(
layouts
.
size
()
>=
3
);
std
::
pair
<
TensorLayoutArray
,
ConvBiasForwardImpl
::
Param
>
ret
;
ret
.
first
=
layouts
;
auto
change_dtype
=
[](
TensorLayout
&
layout
)
{
if
(
layout
.
dtype
==
dtype
::
BFloat16
())
{
layout
.
dtype
=
dtype
::
Float32
();
}
};
change_dtype
(
fsrc
);
change_dtype
(
ffilter
);
change_dtype
(
fbias
);
change_dtype
(
fz
);
change_dtype
(
fdst
);
opr
->
param
()
=
args
.
opr
->
param
();
opr
->
param
().
compute_mode
=
Param
::
ComputeMode
::
DEFAULT
;
opr
->
execution_policy
()
=
{
m_impl
->
desc
(),
{}};
return
SizeArgs
(
opr
,
fsrc
,
ffilter
,
fbias
,
fz
,
fdst
);
change_dtype
(
ret
.
first
[
0
]);
change_dtype
(
ret
.
first
[
1
]);
change_dtype
(
ret
.
first
[
2
]);
change_dtype
(
ret
.
first
[
3
]);
change_dtype
(
ret
.
first
[
4
]);
ret
.
second
=
opr
->
param
();
ret
.
second
.
compute_mode
=
ConvBiasForwardImpl
::
Param
::
ComputeMode
::
DEFAULT
;
return
ret
;
}
}
// namespace
std
::
vector
<
Algorithm
::
SearchItem
>
ConvBiasForwardImpl
::
AlgoBFloat16
::
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
{
auto
&&
config
=
sub_opr_config
(
layouts
,
static_cast
<
const
ConvBiasForwardImpl
*>
(
opr
));
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
CONVBIAS_FORWARD
,
param_str
,
config
.
first
}};
}
bool
ConvBiasForwardImpl
::
AlgoBFloat16
::
is_available
(
const
SizeArgs
&
args
)
const
{
TensorLayout
fsrc
,
ffilter
,
fbias
,
fz
,
fdst
;
auto
convbias_opr
=
args
.
handle
->
create_operator
<
ConvBias
>
();
SizeArgs
fargs
=
float_args
(
args
,
static_cast
<
ConvBiasForwardImpl
*>
(
convbias_opr
.
get
()),
fsrc
,
ffilter
,
fbias
,
fz
,
fdst
);
auto
&&
config
=
sub_opr_config
(
{
*
args
.
src_layout
,
*
args
.
filter_layout
,
*
args
.
bias_layout
,
*
args
.
z_layout
,
*
args
.
dst_layout
},
args
.
opr
);
convbias_opr
->
param
()
=
config
.
second
;
return
args
.
src_layout
->
dtype
==
args
.
filter_layout
->
dtype
&&
args
.
src_layout
->
dtype
==
dtype
::
BFloat16
()
&&
m_impl
->
is_available
(
fargs
);
get_algorithm
(
static_cast
<
ConvBiasForwardImpl
*>
(
convbias_opr
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
],
config
.
first
[
3
],
config
.
first
[
4
]);
}
WorkspaceBundle
ConvBiasForwardImpl
::
AlgoBFloat16
::
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
{
TensorLayout
fsrc
,
ffilter
,
fbias
,
fz
,
fdst
;
auto
convbias_opr
=
args
.
handle
->
create_operator
<
ConvBias
>
();
SizeArgs
fargs
=
float_args
(
args
,
static_cast
<
ConvBiasForwardImpl
*>
(
convbias_opr
.
get
()),
fsrc
,
ffilter
,
fbias
,
fz
,
fdst
);
if
(
args
.
opr
->
execution_policy
().
algo
.
valid
())
{
megdnn_assert
(
args
.
opr
->
execution_policy
().
sub_policy
.
size
()
==
1
);
convbias_opr
->
execution_policy
()
=
args
.
opr
->
execution_policy
().
sub_policy
[
0
];
}
auto
&&
config
=
sub_opr_config
(
{
*
args
.
src_layout
,
*
args
.
filter_layout
,
*
args
.
bias_layout
,
*
args
.
z_layout
,
*
args
.
dst_layout
},
args
.
opr
);
convbias_opr
->
param
()
=
config
.
second
;
SmallVector
<
size_t
>
sizes
;
auto
get_workspace
=
[
&
sizes
](
const
TensorLayout
&
src
,
const
TensorLayout
&
dst
)
{
...
...
@@ -77,12 +90,15 @@ WorkspaceBundle ConvBiasForwardImpl::AlgoBFloat16::get_workspace_bundle(
sizes
.
push_back
(
dst
.
span
().
dist_byte
());
}
};
get_workspace
(
*
args
.
src_layout
,
fsrc
);
get_workspace
(
*
args
.
filter_layout
,
ffilter
);
get_workspace
(
*
args
.
bias_layout
,
fbias
);
get_workspace
(
*
args
.
z_layout
,
fz
);
get_workspace
(
*
args
.
dst_layout
,
fdst
);
sizes
.
push_back
(
m_impl
->
get_workspace_in_bytes
(
fargs
));
get_workspace
(
*
args
.
src_layout
,
config
.
first
[
0
]);
get_workspace
(
*
args
.
filter_layout
,
config
.
first
[
1
]);
get_workspace
(
*
args
.
bias_layout
,
config
.
first
[
2
]);
get_workspace
(
*
args
.
z_layout
,
config
.
first
[
3
]);
get_workspace
(
*
args
.
dst_layout
,
config
.
first
[
4
]);
sizes
.
push_back
(
convbias_opr
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
],
config
.
first
[
3
],
config
.
first
[
4
],
nullptr
));
return
{
ptr
,
std
::
move
(
sizes
)};
}
...
...
@@ -110,7 +126,12 @@ void ConvBiasForwardImpl::AlgoBFloat16::exec(const ExecArgs& args) const {
auto
convbias_opr
=
args
.
handle
->
create_operator
<
ConvBias
>
();
convbias_opr
->
param
()
=
args
.
opr
->
param
();
convbias_opr
->
param
().
compute_mode
=
Param
::
ComputeMode
::
DEFAULT
;
convbias_opr
->
execution_policy
()
=
{
m_impl
->
desc
(),
{}};
if
(
args
.
opr
->
execution_policy
().
algo
.
valid
())
{
megdnn_assert
(
args
.
opr
->
execution_policy
().
sub_policy
.
size
()
==
1
);
convbias_opr
->
execution_policy
()
=
args
.
opr
->
execution_policy
().
sub_policy
[
0
];
}
convbias_opr
->
exec
(
fsrc_tensor
,
ffilter_tensor
,
fbias_tensor
,
fz_tensor
,
fdst_tensor
,
nullptr
,
cvter
.
workspace
());
}
...
...
dnn/src/naive/conv_bias/opr_impl.cpp
浏览文件 @
659217ac
...
...
@@ -214,6 +214,9 @@ void ConvBiasForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
DISPATCH_RAW
(
Float16
,
Float16
,
Float16
,
FLOAT32
,
(
convolution
::
forward_bias
<
dt_float16
,
dt_float16
,
dt_float16
,
dt_float32
>
))
DISPATCH_RAW
(
BFloat16
,
BFloat16
,
BFloat16
,
FLOAT32
,
(
convolution
::
forward_bias
<
dt_bfloat16
,
dt_bfloat16
,
dt_bfloat16
,
dt_float32
>
))
#endif
else
{
megdnn_throw
(
ssprintf
(
...
...
dnn/test/cuda/conv_bias.cpp
浏览文件 @
659217ac
...
...
@@ -8,6 +8,7 @@
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "megdnn/dtype.h"
#include "test/cuda/fixture.h"
#include "megdnn/opr_param_defs.h"
...
...
@@ -108,6 +109,32 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_F32) {
}
}
TEST_F
(
CUDA
,
CONV_BIAS_FORWARD_BF16
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
=
get_args
();
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvBiasForward
>
(
ExecutionPolicyAlgoName
{
"CONVBIAS_BFLOAT16"
,
{{
"MATMUL"
,
{}}}}));
NormalRNG
default_rng
;
for
(
auto
&&
arg
:
args
)
{
arg
.
param
.
compute_mode
=
param
::
Convolution
::
ComputeMode
::
FLOAT32
;
checker
.
set_dtype
(
0
,
dtype
::
BFloat16
())
.
set_dtype
(
1
,
dtype
::
BFloat16
())
.
set_dtype
(
2
,
dtype
::
BFloat16
())
.
set_dtype
(
3
,
dtype
::
BFloat16
())
.
set_dtype
(
4
,
dtype
::
BFloat16
())
.
set_rng
(
0
,
&
default_rng
)
.
set_rng
(
1
,
&
default_rng
)
.
set_rng
(
2
,
&
default_rng
)
.
set_epsilon
(
2e-2
)
.
set_param
(
arg
.
param
)
.
execs
({
arg
.
src
,
arg
.
filter
,
arg
.
bias
,
{},
{}});
}
}
TEST_F
(
CUDA
,
CONV_BIAS_FORWARD_QS8
)
{
require_compute_capability
(
6
,
1
);
...
...
dnn/test/cuda/convolution.cpp
浏览文件 @
659217ac
...
...
@@ -80,7 +80,8 @@ TEST_F(CUDA, CONVOLUTION_FORWARD)
Checker
<
ConvolutionForward
>
checker
(
handle_cuda
());
NormalRNG
default_rng
;
for
(
auto
&&
arg
:
args
)
{
float
scale
=
1.0
f
/
sqrt
(
arg
.
filter
[
1
]
*
arg
.
filter
[
2
]
*
arg
.
filter
[
3
]);
float
scale
=
1.0
f
/
sqrt
(
arg
.
filter
[
1
]
*
arg
.
filter
[
2
]
*
arg
.
filter
[
3
]);
UniformFloatRNG
rng
(
scale
,
2
*
scale
);
checker
.
set_dtype
(
0
,
dtype
::
Float32
()).
...
...
@@ -115,7 +116,6 @@ TEST_F(CUDA, CONVOLUTION_FORWARD)
.
set_epsilon
(
1e-1
)
.
set_param
(
arg
.
param
)
.
execs
({
arg
.
src
,
arg
.
filter
,
{}});
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录