Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
4a1d52c9
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
396
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
4a1d52c9
编写于
1月 22, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(megdnn): refactor bfloat16 matmul to recursive inteface
GitOrigin-RevId: 641c508aecab700c5c7c8cb758900ab605a29620
上级
b8febaf9
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
76 addition
and
42 deletion
+76
-42
dnn/src/cuda/matrix_mul/algos.cpp
dnn/src/cuda/matrix_mul/algos.cpp
+1
-2
dnn/src/cuda/matrix_mul/algos.h
dnn/src/cuda/matrix_mul/algos.h
+8
-13
dnn/src/cuda/matrix_mul/bfloat16.cpp
dnn/src/cuda/matrix_mul/bfloat16.cpp
+60
-27
dnn/test/cuda/matrix_mul.cpp
dnn/test/cuda/matrix_mul.cpp
+7
-0
未找到文件。
dnn/src/cuda/matrix_mul/algos.cpp
浏览文件 @
4a1d52c9
...
...
@@ -31,8 +31,7 @@ MatrixMulForwardImpl::AlgoPack::AlgoPack() {
#endif
all_algos
.
push_back
(
&
naive
);
#if !MEGDNN_DISABLE_FLOAT16
cublas_bfloat16
=
std
::
make_unique
<
AlgoBFloat16
>
(
&
cublas
);
all_algos
.
push_back
(
cublas_bfloat16
.
get
());
all_algos
.
push_back
(
&
bfloat16
);
#endif
for
(
auto
&&
algo
:
all_algos
)
{
...
...
dnn/src/cuda/matrix_mul/algos.h
浏览文件 @
4a1d52c9
...
...
@@ -148,25 +148,20 @@ public:
#if !MEGDNN_DISABLE_FLOAT16
class
MatrixMulForwardImpl
::
AlgoBFloat16
final
:
public
AlgoBase
{
public:
AlgoBFloat16
(
MatrixMulForwardImpl
::
AlgoBase
*
);
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
const
char
*
name
()
const
override
{
return
m_name
.
c_str
();
}
void
exec
(
const
ExecArgs
&
args
)
const
override
;
bool
is_reproducible
()
const
override
{
return
true
;
}
MEGDNN_DECL_ALGO_TYPE
(
CUDA_NAIVE
)
MEGDNN_DECL_ALGO_TYPE
(
CUDA_BFLOAT16
)
std
::
string
param
()
const
override
{
std
::
string
ret
;
serialize_write_pod
(
m_algorithm
,
ret
);
return
ret
;
}
std
::
vector
<
SearchItem
>
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
override
;
const
char
*
name
()
const
override
{
return
"MATMUL_BFLOAT16"
;
}
bool
is_reproducible
()
const
override
{
return
true
;
}
private:
MatrixMulForwardImpl
::
AlgoBase
*
m_algorithm
=
nullptr
;
std
::
string
m_name
;
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
;
SizeArgs
float_args
(
const
SizeArgs
&
args
)
const
;
};
#endif
...
...
@@ -185,7 +180,7 @@ public:
AlgoCuBlasLt
cublas_lt
;
#endif
#if !MEGDNN_DISABLE_FLOAT16
std
::
unique_ptr
<
AlgoBFloat16
>
cublas_
bfloat16
;
AlgoBFloat16
bfloat16
;
#endif
std
::
vector
<
AlgoBase
*>
all_algos
;
...
...
dnn/src/cuda/matrix_mul/bfloat16.cpp
浏览文件 @
4a1d52c9
...
...
@@ -6,59 +6,87 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/cuda/handle.h"
#include "src/cuda/matrix_mul/algos.h"
#include "src/cuda/utils.h"
#include "src/common/algo_chooser.h"
using
namespace
megdnn
;
using
namespace
cuda
;
MatrixMulForwardImpl
::
AlgoBFloat16
::
AlgoBFloat16
(
MatrixMulForwardImpl
::
AlgoBase
*
algorithm
)
:
m_algorithm
(
algorithm
)
{
megdnn_assert_internal
(
algorithm
);
m_name
=
ssprintf
(
"MATMUL_BFLOAT16:%s"
,
m_algorithm
->
name
());
}
MatrixMulForwardImpl
::
AlgoBase
::
SizeArgs
MatrixMulForwardImpl
::
AlgoBFloat16
::
float_args
(
const
SizeArgs
&
args
)
const
{
auto
new_args
=
args
;
namespace
{
std
::
pair
<
TensorLayoutArray
,
MatrixMulForwardImpl
::
Param
>
sub_opr_config
(
const
TensorLayoutArray
&
layouts
,
const
MatrixMulForwardImpl
*
opr
)
{
megdnn_assert
(
layouts
.
size
()
==
3
);
std
::
pair
<
TensorLayoutArray
,
MatrixMulForwardImpl
::
Param
>
ret
;
ret
.
first
=
layouts
;
auto
change_dtype
=
[](
TensorLayout
&
layout
)
{
if
(
layout
.
dtype
==
dtype
::
BFloat16
())
{
layout
.
dtype
=
dtype
::
Float32
();
}
};
change_dtype
(
new_args
.
layout_a
);
change_dtype
(
new_args
.
layout_b
);
change_dtype
(
new_args
.
layout_c
);
return
new_args
;
change_dtype
(
ret
.
first
[
0
]);
change_dtype
(
ret
.
first
[
1
]);
change_dtype
(
ret
.
first
[
2
]);
ret
.
second
=
opr
->
param
();
ret
.
second
.
compute_mode
=
MatrixMulForwardImpl
::
Param
::
ComputeMode
::
DEFAULT
;
return
ret
;
}
}
// namespace
std
::
vector
<
Algorithm
::
SearchItem
>
MatrixMulForwardImpl
::
AlgoBFloat16
::
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
{
auto
&&
config
=
sub_opr_config
(
layouts
,
static_cast
<
const
MatrixMulForwardImpl
*>
(
opr
));
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
MATRIX_MUL_FORWARD
,
param_str
,
config
.
first
}};
}
bool
MatrixMulForwardImpl
::
AlgoBFloat16
::
is_available
(
const
SizeArgs
&
args
)
const
{
auto
fargs
=
float_args
(
args
);
auto
&&
config
=
sub_opr_config
(
{
args
.
layout_a
,
args
.
layout_b
,
args
.
layout_c
},
args
.
opr
);
auto
matmul_opr
=
args
.
opr
->
handle
()
->
create_operator
<
MatrixMulForward
>
();
matmul_opr
->
param
()
=
config
.
second
;
return
args
.
layout_a
.
dtype
==
dtype
::
BFloat16
()
&&
m_algorithm
->
is_available
(
fargs
);
get_algorithm
(
static_cast
<
MatrixMulForwardImpl
*>
(
matmul_opr
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
}
WorkspaceBundle
MatrixMulForwardImpl
::
AlgoBFloat16
::
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
{
auto
fargs
=
float_args
(
args
);
auto
matmul_opr
=
args
.
opr
->
handle
()
->
create_operator
<
MatrixMulForward
>
();
if
(
args
.
opr
->
execution_policy
().
algo
.
valid
())
{
megdnn_assert
(
args
.
opr
->
execution_policy
().
sub_policy
.
size
()
==
1
);
matmul_opr
->
execution_policy
()
=
args
.
opr
->
execution_policy
().
sub_policy
[
0
];
}
auto
&&
config
=
sub_opr_config
(
{
args
.
layout_a
,
args
.
layout_b
,
args
.
layout_c
},
args
.
opr
);
matmul_opr
->
param
()
=
config
.
second
;
SmallVector
<
size_t
>
sizes
;
auto
get_workspace
=
[
&
sizes
](
const
TensorLayout
&
src
)
{
TensorLayout
dst
=
src
;
if
(
dst
.
dtype
==
dtype
::
BFloat16
())
{
dst
.
dtype
=
dtype
::
Float32
();
auto
get_workspace
=
[
&
sizes
](
const
TensorLayout
&
src
,
const
TensorLayout
&
dst
)
{
if
(
src
.
dtype
!=
dst
.
dtype
)
{
sizes
.
push_back
(
dst
.
span
().
dist_byte
());
}
};
get_workspace
(
args
.
layout_a
);
get_workspace
(
args
.
layout_b
);
get_workspace
(
args
.
layout_c
);
sizes
.
push_back
(
m_algorithm
->
get_workspace_in_bytes
(
fargs
));
get_workspace
(
args
.
layout_a
,
config
.
first
[
0
]);
get_workspace
(
args
.
layout_b
,
config
.
first
[
1
]);
get_workspace
(
args
.
layout_c
,
config
.
first
[
2
]);
sizes
.
push_back
(
matmul_opr
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]));
return
{
ptr
,
std
::
move
(
sizes
)};
}
...
...
@@ -82,7 +110,12 @@ void MatrixMulForwardImpl::AlgoBFloat16::exec(const ExecArgs& args) const {
args
.
opr
->
handle
()
->
create_operator
<
MatrixMulForward
>
();
matmul_opr
->
param
()
=
args
.
opr
->
param
();
matmul_opr
->
param
().
compute_mode
=
Param
::
ComputeMode
::
DEFAULT
;
matmul_opr
->
execution_policy
()
=
{
m_algorithm
->
desc
(),
{}};
if
(
args
.
opr
->
execution_policy
().
algo
.
valid
())
{
megdnn_assert
(
args
.
opr
->
execution_policy
().
sub_policy
.
size
()
==
1
);
matmul_opr
->
execution_policy
()
=
args
.
opr
->
execution_policy
().
sub_policy
[
0
];
}
matmul_opr
->
exec
(
a
,
b
,
c
,
ctypecvt
.
workspace
());
}
ctypecvt
.
comp_to_dst_type
(
c
,
args
.
tensor_c
);
...
...
dnn/test/cuda/matrix_mul.cpp
浏览文件 @
4a1d52c9
...
...
@@ -218,6 +218,9 @@ TEST_F(CUDA, MATRIX_MUL) {
B
=
TensorShape
{
k
,
n
};
if
(
dtype
==
dtype
::
BFloat16
())
{
param
.
compute_mode
=
param
::
MatrixMul
::
ComputeMode
::
FLOAT32
;
checker
.
set_before_exec_callback
(
AlgoChecker
<
MatrixMulForward
>
(
ExecutionPolicyAlgoName
{
"MATMUL_BFLOAT16"
,
{{
"CUBLAS"
,
{}}}}));
}
checker
.
set_param
(
param
)
.
set_dtype
(
0
,
stype
)
...
...
@@ -228,6 +231,10 @@ TEST_F(CUDA, MATRIX_MUL) {
?
5e-2
:
5e-3
)
.
execs
({
A
,
B
,
{}});
if
(
dtype
==
dtype
::
BFloat16
())
{
checker
.
reset_before_exec_callback
();
checker
.
opr
()
->
execution_policy
()
=
{};
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录