Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
08ff62de
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
396
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
08ff62de
编写于
1月 26, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(megdnn): refactor batched matmul algo in conv bias
GitOrigin-RevId: 64fda611ff39c3f6ab46761d70daef8433375688
上级
8773926e
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
119 addition
and
52 deletion
+119
-52
dnn/src/cuda/conv_bias/algo.h
dnn/src/cuda/conv_bias/algo.h
+6
-4
dnn/src/cuda/conv_bias/batched_matmul.cpp
dnn/src/cuda/conv_bias/batched_matmul.cpp
+102
-42
dnn/test/common/opr_proxy.h
dnn/test/common/opr_proxy.h
+3
-0
dnn/test/cuda/conv_bias.cpp
dnn/test/cuda/conv_bias.cpp
+8
-6
未找到文件。
dnn/src/cuda/conv_bias/algo.h
浏览文件 @
08ff62de
...
...
@@ -361,9 +361,6 @@ private:
};
class
ConvBiasForwardImpl
::
AlgoBatchedMatmul
final
:
public
AlgoBase
{
static
void
extract_matmul_layouts
(
const
SizeArgs
&
args
,
TensorLayout
&
A
,
TensorLayout
&
B
,
TensorLayout
&
C
);
public:
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
...
...
@@ -372,10 +369,15 @@ public:
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"BATCHEDMATMUL"
,
{});
"BATCHED
_
MATMUL"
,
{});
}
return
m_name
.
c_str
();
}
std
::
vector
<
SearchItem
>
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
override
;
bool
is_reproducible
()
const
override
{
return
true
;
}
MEGDNN_DECL_ALGO_TYPE
(
CUDA_BATCHED_MATMUL
)
...
...
dnn/src/cuda/conv_bias/batched_matmul.cpp
浏览文件 @
08ff62de
...
...
@@ -6,10 +6,13 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/common/algo_chooser.h"
#include "src/common/conv_bias.h"
#include "src/cuda/batched_matrix_mul/algo.h"
#include "src/cuda/conv_bias/algo.h"
#include "src/cuda/handle.h"
#include "src/cuda/utils.cuh"
...
...
@@ -18,18 +21,72 @@ using namespace megdnn;
using
namespace
cuda
;
using
namespace
conv_bias
;
namespace
{
std
::
pair
<
TensorLayoutArray
,
MatrixMulForward
::
Param
>
sub_opr_config
(
const
ConvBiasForwardImpl
::
CanonizedFilterMeta
&
fm
,
const
TensorLayout
&
src_layout
,
const
TensorLayout
&
,
const
TensorLayout
&
dst_layout
,
const
ConvBiasForwardImpl
*
opr
)
{
// A {N, OC, IC}
// B {N, IC, H * W}
// C {N, OC, H * W}
size_t
batched
=
src_layout
.
shape
[
0
];
TensorLayout
A
,
B
,
C
;
A
=
{{
batched
,
fm
.
ocpg
,
fm
.
icpg
},
fm
.
dtype
};
A
.
stride
[
0
]
=
0
;
B
.
ndim
=
3
;
B
.
shape
[
1
]
=
src_layout
.
shape
[
1
];
B
.
shape
[
2
]
=
src_layout
.
shape
[
2
]
*
src_layout
.
shape
[
3
];
B
.
shape
[
0
]
=
batched
;
B
.
stride
[
2
]
=
1
;
B
.
stride
[
1
]
=
src_layout
.
stride
[
1
];
B
.
stride
[
0
]
=
src_layout
.
stride
[
0
];
B
.
dtype
=
src_layout
.
dtype
;
C
=
{{
dst_layout
.
shape
[
0
],
dst_layout
.
shape
[
1
],
B
.
shape
[
2
]},
dst_layout
.
dtype
};
MatrixMulForward
::
Param
param
;
if
(
opr
->
param
().
compute_mode
==
param
::
Convolution
::
ComputeMode
::
FLOAT32
)
{
param
.
compute_mode
=
param
::
MatrixMul
::
ComputeMode
::
FLOAT32
;
}
return
{{
A
,
B
,
C
},
param
};
}
}
// namespace
std
::
vector
<
Algorithm
::
SearchItem
>
ConvBiasForwardImpl
::
AlgoBatchedMatmul
::
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
{
const
ConvBiasForwardImpl
*
conv_bias_opr
=
static_cast
<
const
ConvBiasForwardImpl
*>
(
opr
);
CanonizedFilterMeta
fm
=
conv_bias_opr
->
check_layout_fwd
(
layouts
[
0
],
layouts
[
1
],
layouts
[
4
]);
auto
&&
config
=
sub_opr_config
(
fm
,
layouts
[
0
],
layouts
[
1
],
layouts
[
4
],
conv_bias_opr
);
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
BATCHED_MATRIX_MUL_FORWARD
,
param_str
,
config
.
first
}};
}
bool
ConvBiasForwardImpl
::
AlgoBatchedMatmul
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
args
.
z_layout
->
ndim
>
0
)
return
false
;
//! cudnn batched matmul with discontinuous stride has many bugs, so disable
//! here.
TensorLayout
A
,
B
,
C
;
extract_matmul_layouts
(
args
,
A
,
B
,
C
);
if
(
!
B
.
is_contiguous
())
{
return
false
;
auto
bmatmul_opr
=
args
.
handle
->
create_operator
<
BatchedMatrixMulForward
>
();
if
(
args
.
opr
->
execution_policy
().
algo
.
valid
()
&&
!
args
.
opr
->
execution_policy
().
sub_policy
.
empty
())
{
megdnn_assert
(
args
.
opr
->
execution_policy
().
sub_policy
.
size
()
==
1
);
bmatmul_opr
->
execution_policy
()
=
args
.
opr
->
execution_policy
().
sub_policy
[
0
]
;
}
auto
&&
config
=
sub_opr_config
(
args
.
filter_meta
,
*
args
.
src_layout
,
*
args
.
filter_layout
,
*
args
.
dst_layout
,
args
.
opr
);
bmatmul_opr
->
param
()
=
config
.
second
;
auto
&&
fm
=
args
.
filter_meta
;
return
fm
.
format
==
Param
::
Format
::
NCHW
&&
(
fm
.
dtype
.
enumv
()
==
DTypeEnum
::
Float32
||
...
...
@@ -37,29 +94,10 @@ bool ConvBiasForwardImpl::AlgoBatchedMatmul::is_available(
fm
.
spatial_ndim
==
2
&&
fm
.
group
==
1
&&
fm
.
dilation
[
0
]
==
1
&&
fm
.
dilation
[
1
]
==
1
&&
fm
.
spatial
[
0
]
==
1
&&
fm
.
spatial
[
1
]
==
1
&&
fm
.
padding
[
0
]
==
0
&&
fm
.
padding
[
1
]
==
0
&&
fm
.
stride
[
0
]
==
1
&&
fm
.
stride
[
1
]
==
1
;
}
void
ConvBiasForwardImpl
::
AlgoBatchedMatmul
::
extract_matmul_layouts
(
const
SizeArgs
&
args
,
TensorLayout
&
A
,
TensorLayout
&
B
,
TensorLayout
&
C
)
{
auto
&&
fm
=
args
.
filter_meta
;
// A {N, OC, IC}
// B {N, IC, H * W}
// C {N, OC, H * W}
size_t
batched
=
args
.
src_layout
->
shape
[
0
];
A
=
{{
batched
,
fm
.
ocpg
,
fm
.
icpg
},
fm
.
dtype
};
A
.
stride
[
0
]
=
0
;
B
.
ndim
=
3
;
B
.
shape
[
1
]
=
args
.
src_layout
->
shape
[
1
];
B
.
shape
[
2
]
=
args
.
src_layout
->
shape
[
2
]
*
args
.
src_layout
->
shape
[
3
];
B
.
shape
[
0
]
=
batched
;
B
.
stride
[
2
]
=
1
;
B
.
stride
[
1
]
=
args
.
src_layout
->
stride
[
1
];
B
.
stride
[
0
]
=
args
.
src_layout
->
stride
[
0
];
B
.
dtype
=
args
.
src_layout
->
dtype
;
C
=
{{
args
.
dst_layout
->
shape
[
0
],
args
.
dst_layout
->
shape
[
1
],
B
.
shape
[
2
]},
args
.
dst_layout
->
dtype
};
fm
.
stride
[
1
]
==
1
&&
get_algorithm
(
static_cast
<
BatchedMatrixMulForwardImpl
*>
(
bmatmul_opr
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
}
WorkspaceBundle
ConvBiasForwardImpl
::
AlgoBatchedMatmul
::
get_workspace_bundle
(
...
...
@@ -76,11 +114,23 @@ WorkspaceBundle ConvBiasForwardImpl::AlgoBatchedMatmul::get_workspace_bundle(
SizeArgs
conv_args
=
args
;
conv_args
.
dst_layout
=
&
dst_layout
;
TensorLayout
A
,
B
,
C
;
extract_matmul_layouts
(
conv_args
,
A
,
B
,
C
);
sizes
.
insert
(
sizes
.
begin
(),
args
.
handle
->
batched_matrix_mul
()
->
get_workspace_in_bytes
(
A
,
B
,
C
));
auto
bmatmul_opr
=
args
.
handle
->
create_operator
<
BatchedMatrixMulForward
>
();
if
(
args
.
opr
->
execution_policy
().
algo
.
valid
()
&&
!
args
.
opr
->
execution_policy
().
sub_policy
.
empty
())
{
megdnn_assert
(
args
.
opr
->
execution_policy
().
sub_policy
.
size
()
==
1
);
bmatmul_opr
->
execution_policy
()
=
args
.
opr
->
execution_policy
().
sub_policy
[
0
];
}
auto
&&
config
=
sub_opr_config
(
args
.
filter_meta
,
*
args
.
src_layout
,
*
args
.
filter_layout
,
*
args
.
dst_layout
,
args
.
opr
);
bmatmul_opr
->
param
()
=
config
.
second
;
sizes
.
insert
(
sizes
.
begin
(),
args
.
handle
->
batched_matrix_mul
()
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]));
return
{
ptr
,
std
::
move
(
sizes
)};
}
...
...
@@ -104,13 +154,23 @@ void ConvBiasForwardImpl::AlgoBatchedMatmul::exec(const ExecArgs& args) const {
conv_args
.
dst_tensor
=
&
conv_dst_tensor
;
conv_args
.
dst_layout
=
&
conv_dst_tensor
.
layout
;
{
TensorND
A
,
B
,
C
;
extract_matmul_layouts
(
args
,
A
.
layout
,
B
.
layout
,
C
.
layout
);
A
.
raw_ptr
=
args
.
filter_tensor
->
raw_ptr
;
B
.
raw_ptr
=
args
.
src_tensor
->
raw_ptr
;
C
.
raw_ptr
=
args
.
dst_tensor
->
raw_ptr
;
auto
mm
=
args
.
handle
->
batched_matrix_mul
();
mm
->
exec
(
A
,
B
,
C
,
bundle
.
get_workspace
(
0
));
auto
bmatmul_opr
=
args
.
handle
->
create_operator
<
BatchedMatrixMulForward
>
();
if
(
args
.
opr
->
execution_policy
().
algo
.
valid
())
{
megdnn_assert
(
args
.
opr
->
execution_policy
().
sub_policy
.
size
()
==
1
);
bmatmul_opr
->
execution_policy
()
=
args
.
opr
->
execution_policy
().
sub_policy
[
0
];
}
auto
&&
config
=
sub_opr_config
(
args
.
filter_meta
,
*
args
.
src_layout
,
*
args
.
filter_layout
,
*
args
.
dst_layout
,
args
.
opr
);
bmatmul_opr
->
param
()
=
config
.
second
;
TensorND
A
{
args
.
filter_tensor
->
raw_ptr
,
config
.
first
[
0
]},
B
{
args
.
src_tensor
->
raw_ptr
,
config
.
first
[
1
]},
C
{
args
.
dst_tensor
->
raw_ptr
,
config
.
first
[
2
]};
bmatmul_opr
->
exec
(
A
,
B
,
C
,
bundle
.
get_workspace
(
0
));
}
handle_bias_and_nonlinear
(
args
.
handle
,
args
.
nonlinear_mode
,
&
conv_dst_tensor
,
args
.
dst_tensor
,
...
...
dnn/test/common/opr_proxy.h
浏览文件 @
08ff62de
...
...
@@ -46,6 +46,7 @@ struct OprTypeFromOprTrait;
}
cb
(
MATRIX_MUL_FORWARD
,
MatrixMulForward
);
cb
(
BATCHED_MATRIX_MUL_FORWARD
,
BatchedMatrixMulForward
);
cb
(
CONVOLUTION_FORWARD
,
ConvolutionForward
);
cb
(
CONVOLUTION_BACKWARD_DATA
,
ConvolutionBackwardData
);
cb
(
CONVOLUTION_BACKWARD_FILTER
,
ConvolutionBackwardFilter
);
...
...
@@ -66,6 +67,7 @@ cb(CONVBIAS_FORWARD, ConvBiasForward);
// clang-format off
#define FOREACH_OPR_TYPE(cb) \
cb(MATRIX_MUL_FORWARD) \
cb(BATCHED_MATRIX_MUL_FORWARD) \
cb(CONVOLUTION_FORWARD) \
cb(CONVOLUTION_BACKWARD_DATA) \
cb(CONVOLUTION_BACKWARD_FILTER) \
...
...
@@ -83,6 +85,7 @@ cb(CONVBIAS_FORWARD, ConvBiasForward);
#define FOREACH_OPR_TYPE_WITH_STMT(cb, stmt) \
cb(MATRIX_MUL_FORWARD, stmt) \
cb(BATCHED_MATRIX_MUL_FORWARD, stmt) \
cb(CONVOLUTION_FORWARD, stmt) \
cb(CONVOLUTION_BACKWARD_DATA, stmt) \
cb(CONVOLUTION_BACKWARD_FILTER, stmt) \
...
...
dnn/test/cuda/conv_bias.cpp
浏览文件 @
08ff62de
...
...
@@ -821,7 +821,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_NCHW4) {
{{
8
,
64
,
12
,
12
,
4
},
{
256
,
64
,
3
,
3
,
4
},
{
1
,
64
,
1
,
1
,
4
},
{},
{}});
}
TEST_F
(
CUDA
,
CONV_BIAS_FORWARD_
MATMUL_1x1
)
{
TEST_F
(
CUDA
,
CONV_BIAS_FORWARD_
BATCHED_MATMUL
)
{
using
namespace
conv_bias
;
std
::
vector
<
TestArg
>
args
=
get_args_1x1
();
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
...
...
@@ -834,13 +834,15 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_1x1) {
.
set_rng
(
1
,
&
default_rng
)
.
set_rng
(
2
,
&
default_rng
)
.
set_epsilon
(
1e-3
);
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvBiasForward
>
(
ExecutionPolicyAlgoName
{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"BATCHED_MATMUL"
,
{})
.
c_str
(),
{{
"CUBLAS"
,
{}}}}));
for
(
auto
&&
arg
:
args
)
{
checker
.
set_param
(
arg
.
param
);
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"BATCHEDMATMUL"
,
{})
.
c_str
()));
checker
.
execs
({
arg
.
src
,
arg
.
filter
,
arg
.
bias
,
{},
{}});
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录