Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
6ce212d2
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6ce212d2
编写于
6月 30, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(mgb): refactor group conv
GitOrigin-RevId: 7afd3126900938661927c047aac87cb4aeaa9f13
上级
febd0b17
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
484 addition
and
393 deletion
+484
-393
dnn/src/cuda/conv_bias/algo.cpp
dnn/src/cuda/conv_bias/algo.cpp
+3
-16
dnn/src/cuda/conv_bias/algo.h
dnn/src/cuda/conv_bias/algo.h
+16
-21
dnn/src/cuda/conv_bias/group_conv.cpp
dnn/src/cuda/conv_bias/group_conv.cpp
+84
-53
dnn/src/cuda/conv_bias/opr_impl.cpp
dnn/src/cuda/conv_bias/opr_impl.cpp
+7
-15
dnn/src/cuda/convolution/backward_data/algo.cpp
dnn/src/cuda/convolution/backward_data/algo.cpp
+4
-20
dnn/src/cuda/convolution/backward_data/algo.h
dnn/src/cuda/convolution/backward_data/algo.h
+18
-26
dnn/src/cuda/convolution/backward_data/group_conv.cpp
dnn/src/cuda/convolution/backward_data/group_conv.cpp
+99
-44
dnn/src/cuda/convolution/backward_filter/algo.cpp
dnn/src/cuda/convolution/backward_filter/algo.cpp
+4
-19
dnn/src/cuda/convolution/backward_filter/algo.h
dnn/src/cuda/convolution/backward_filter/algo.h
+21
-24
dnn/src/cuda/convolution/backward_filter/group_conv.cpp
dnn/src/cuda/convolution/backward_filter/group_conv.cpp
+103
-49
dnn/src/cuda/convolution/opr_impl.cpp
dnn/src/cuda/convolution/opr_impl.cpp
+13
-45
dnn/src/cuda/convolution/opr_impl.h
dnn/src/cuda/convolution/opr_impl.h
+0
-38
dnn/test/cuda/conv_bias.cpp
dnn/test/cuda/conv_bias.cpp
+3
-2
dnn/test/cuda/group_conv.cpp
dnn/test/cuda/group_conv.cpp
+109
-21
未找到文件。
dnn/src/cuda/conv_bias/algo.cpp
浏览文件 @
6ce212d2
...
...
@@ -42,24 +42,11 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() {
conv_algos
.
push_back
(
&
matmul
);
conv_algos
.
push_back
(
&
matmul8x8x32
);
conv_algos
.
push_back
(
&
batched_matmul
);
conv_algos
.
reserve
(
conv_algos
.
size
()
*
2
);
//! add gconv algos by AlgoGroupConvGeneral
size_t
algo_size
=
conv_algos
.
size
();
for
(
size_t
i
=
3
;
i
<
algo_size
;
++
i
)
{
gconv_refhold
.
emplace_back
(
new
AlgoGroupConvGeneral
(
conv_algos
[
i
]));
algo2gconv
[
conv_algos
[
i
]]
=
gconv_refhold
.
back
().
get
();
conv_algos
.
push_back
(
gconv_refhold
.
back
().
get
());
}
conv_algos
.
push_back
(
&
group
);
for
(
auto
&&
algo
:
conv_algos
)
{
all_algos
.
push_back
(
algo
);
}
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
4
]);
// group inplace_matmul
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
3
]);
// group matmul
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
2
]);
// group matmul_8x8x32
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
1
]);
// group batched_matmul
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
0
]);
// group 1x1
all_algos
.
push_back
(
&
bfloat16
);
bfloat16_algos
.
push_back
(
&
bfloat16
);
...
...
@@ -118,7 +105,7 @@ ConvBiasForwardImpl::AlgoPack ConvBiasForwardImpl::sm_algo_pack;
MEGDNN_DEF_GET_ALGO_FROM_DESC
(
ConvBiasForwardImpl
)
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
ConvBiasForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
ConvBiasForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
...
...
@@ -127,7 +114,7 @@ ConvBiasForwardImpl::AlgoBase::SizeArgs::SizeArgs(
dst
,
preprocessed_filter
)
{}
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
ConvBiasForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
ConvBiasForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
...
...
dnn/src/cuda/conv_bias/algo.h
浏览文件 @
6ce212d2
...
...
@@ -78,15 +78,15 @@ public:
AlgoBase
()
:
Algorithm
()
{
m_handle_type
=
Handle
::
HandleType
::
CUDA
;
}
struct
SizeArgs
:
public
conv_bias
::
BiasForwardSizeArgs
{
ConvBiasForwardImpl
*
opr
;
const
ConvBiasForwardImpl
*
opr
;
const
PreprocessedFilter
*
preprocessed_filter
;
std
::
string
to_string
()
const
;
SizeArgs
(
ConvBiasForwardImpl
*
opr
,
const
TensorLayout
&
src
,
SizeArgs
(
const
ConvBiasForwardImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
=
nullptr
);
SizeArgs
(
ConvBiasForwardImpl
*
opr
,
const
TensorLayout
&
src
,
SizeArgs
(
const
ConvBiasForwardImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
...
...
@@ -434,27 +434,24 @@ private:
//! implement group conv by another algo
class
ConvBiasForwardImpl
::
AlgoGroupConvGeneral
final
:
public
AlgoBase
{
public:
AlgoGroupConvGeneral
(
AlgoBase
*
impl
);
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
const
char
*
name
()
const
override
{
return
m_name
.
c_str
();
}
std
::
vector
<
SearchItem
>
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
override
;
AlgoAttribute
attribute
()
const
override
{
auto
ret
=
AlgoAttribute
::
DEFAULT
;
#define cb(attr) \
if (m_impl->contain_attribute_all(attr)) { \
ret |= attr; \
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasForward
::
algo_name
<
DirectParam
>
(
"CUDA:GROUP_CONV"
,
{});
}
return
m_name
.
c_str
();
}
MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE
(
cb
)
#undef cb
if
(
m_impl
->
contain_attribute_all
(
AlgoAttribute
::
REPRODUCIBLE
))
{
ret
|=
AlgoAttribute
::
REPRODUCIBLE
;
}
return
ret
;
AlgoAttribute
attribute
()
const
override
{
return
AlgoAttribute
::
REPRODUCIBLE
;
}
static
void
modify_size_args
(
SizeArgs
&
args
,
TensorLayout
&
src_pg
,
...
...
@@ -463,8 +460,7 @@ public:
private:
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
;
AlgoBase
*
m_impl
;
std
::
string
m_name
;
mutable
std
::
string
m_name
;
};
#if CUDA_VERSION >= 10000
...
...
@@ -1087,9 +1083,8 @@ public:
std
::
vector
<
AlgoInt4Int4NHWCIMMAImplicitGemm
>
int4_int4_nhwc_imma
;
std
::
vector
<
AlgoUInt4Int4NHWCIMMAImplicitGemm
>
uint4_int4_nhwc_imma
;
#endif
std
::
vector
<
std
::
unique_ptr
<
AlgoGroupConvGeneral
>>
gconv_refhold
;
AlgoGroupConvGeneral
group
;
AlgoBFloat16
bfloat16
;
std
::
unordered_map
<
AlgoBase
*
,
AlgoGroupConvGeneral
*>
algo2gconv
;
AlgoBase
*
cudnn_conv_bias_act_from_enum
(
cudnnConvolutionFwdAlgo_t
algo
);
...
...
dnn/src/cuda/conv_bias/group_conv.cpp
浏览文件 @
6ce212d2
...
...
@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include <utility>
#include "src/common/conv_bias.h"
#include "src/cuda/conv_bias/algo.h"
...
...
@@ -16,36 +17,80 @@ using namespace megdnn;
using
namespace
cuda
;
using
namespace
conv_bias
;
void
ConvBiasForwardImpl
::
AlgoGroupConvGeneral
::
modify_size_args
(
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
&
args
,
TensorLayout
&
src_pg
,
TensorLayout
&
dst_pg
,
TensorLayout
&
bias_pg
)
{
src_pg
=
*
args
.
src_layout
;
dst_pg
=
*
args
.
dst_layout
;
bias_pg
=
*
args
.
bias_layout
;
namespace
{
std
::
pair
<
TensorLayoutArray
,
ConvBiasForwardImpl
::
Param
>
sub_opr_config
(
const
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
TensorLayout
src_pg
=
*
args
.
src_layout
;
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
filter_layout
->
ndim
>
args
.
src_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
filter_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
filter_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
filter_layout
->
stride
[
idx
]);
}
TensorLayout
filter_pg
(
flt_shape
,
flt_stride
,
args
.
filter_layout
->
dtype
,
args
.
filter_layout
->
format
);
TensorLayout
bias_pg
=
*
args
.
bias_layout
;
TensorLayout
z_pg
=
*
args
.
z_layout
;
TensorLayout
dst_pg
=
*
args
.
dst_layout
;
auto
nr_grp
=
args
.
filter_meta
.
group
;
args
.
filter_meta
.
group
=
1
;
size_t
c_pos
;
if
(
args
.
filter_meta
.
format
==
Param
::
Format
::
NCHW
||
args
.
filter_meta
.
format
==
Param
::
Format
::
NCHW4
)
{
if
(
args
.
filter_meta
.
format
==
megdnn
::
param
::
ConvBias
::
Format
::
NCHW
||
args
.
filter_meta
.
format
==
megdnn
::
param
::
ConvBias
::
Format
::
NCHW4
)
{
c_pos
=
1
;
}
else
{
megdnn_assert
(
args
.
filter_meta
.
format
==
Param
::
Format
::
NHWC
,
megdnn_assert
(
args
.
filter_meta
.
format
==
megdnn
::
param
::
ConvBias
::
Format
::
NHWC
,
"invalid conv format"
);
c_pos
=
3
;
}
src_pg
.
shape
[
c_pos
]
/=
nr_grp
;
dst_pg
.
shape
[
c_pos
]
/=
nr_grp
;
bias_pg
.
ndim
=
0
;
args
.
src_layout
=
&
src_pg
;
args
.
dst_layout
=
&
dst_pg
;
args
.
bias_layout
=
&
bias_pg
;
args
.
nonlinear_mode
=
Param
::
NonlineMode
::
IDENTITY
;
dst_pg
.
shape
[
c_pos
]
/=
nr_grp
;
megdnn
::
param
::
ConvBias
param
=
args
.
opr
->
param
();
param
.
sparse
=
megdnn
::
param
::
ConvBias
::
Sparse
::
DENSE
;
param
.
nonlineMode
=
megdnn
::
param
::
ConvBias
::
NonlineMode
::
IDENTITY
;
std
::
pair
<
TensorLayoutArray
,
ConvBiasForwardImpl
::
Param
>
ret
;
ret
.
first
=
{
src_pg
,
filter_pg
,
bias_pg
,
z_pg
,
dst_pg
};
ret
.
second
=
param
;
return
ret
;
}
ConvBiasForwardImpl
::
AlgoGroupConvGeneral
::
AlgoGroupConvGeneral
(
AlgoBase
*
impl
)
:
m_impl
{
impl
}
{
m_name
=
ConvBiasForward
::
algo_name
<
DirectParam
>
(
ssprintf
(
"%s:%s"
,
"CUDA:GROUP_CONV"
,
impl
->
name
()),
{});
std
::
pair
<
TensorLayoutArray
,
std
::
unique_ptr
<
ConvBiasForward
>>
prepare_sub_opr
(
const
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
auto
convbias_opr
=
args
.
handle
->
create_operator
<
ConvBias
>
();
set_execution_policy
<
ConvBiasForward
,
ConvBiasForward
*>
(
args
.
opr
,
convbias_opr
.
get
());
auto
&&
config
=
sub_opr_config
(
args
);
convbias_opr
->
param
()
=
config
.
second
;
return
{
config
.
first
,
std
::
move
(
convbias_opr
)};
}
}
// namespace
std
::
vector
<
Algorithm
::
SearchItem
>
ConvBiasForwardImpl
::
AlgoGroupConvGeneral
::
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
{
AlgoBase
::
SizeArgs
args
{
static_cast
<
const
ConvBiasForwardImpl
*>
(
opr
),
layouts
[
0
],
layouts
[
1
],
layouts
[
2
],
layouts
[
3
],
layouts
[
4
]};
auto
&&
config
=
sub_opr_config
(
args
);
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
CONVBIAS_FORWARD
,
param_str
,
config
.
first
}};
}
bool
ConvBiasForwardImpl
::
AlgoGroupConvGeneral
::
is_available
(
...
...
@@ -62,10 +107,10 @@ bool ConvBiasForwardImpl::AlgoGroupConvGeneral::is_available(
param
.
format
==
param
::
ConvBias
::
Format
::
NCHW32
)
return
false
;
auto
sub_args
=
args
;
TensorLayout
src_pg
,
dst_pg
,
bias_pg
;
modify_size_args
(
sub_args
,
src_pg
,
dst_pg
,
bias_pg
);
return
m_impl
->
is_available
(
sub_args
);
auto
config
=
prepare_sub_opr
(
args
)
;
return
get_algorithm
(
static_cast
<
ConvBiasForwardImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
],
config
.
first
[
3
],
config
.
first
[
4
]
);
}
WorkspaceBundle
ConvBiasForwardImpl
::
AlgoGroupConvGeneral
::
get_workspace_bundle
(
...
...
@@ -80,12 +125,12 @@ WorkspaceBundle ConvBiasForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle(
sizes
.
push_back
(
dst_layout
.
span
().
dist_byte
());
}
auto
sub_args
=
args
;
s
ub_args
.
dst_layout
=
&
dst_layout
;
TensorLayout
src_pg
,
dst_pg
,
bias_pg
;
modify_size_args
(
sub_args
,
src_pg
,
dst_pg
,
bias_pg
);
sizes
.
insert
(
sizes
.
begin
(),
m_impl
->
get_workspace_in_bytes
(
sub_args
)
);
auto
config
=
prepare_sub_opr
(
args
)
;
s
ize_t
mm_ws
=
config
.
second
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
],
config
.
first
[
3
],
config
.
first
[
4
],
nullptr
);
sizes
.
insert
(
sizes
.
begin
(),
mm_ws
);
return
{
ptr
,
std
::
move
(
sizes
)};
}
...
...
@@ -109,28 +154,13 @@ void ConvBiasForwardImpl::AlgoGroupConvGeneral::exec(
auto
sub_args
=
args
;
sub_args
.
dst_tensor
=
&
conv_dst_tensor
;
sub_args
.
dst_layout
=
&
conv_dst_tensor
.
layout
;
TensorND
tsrc
{
*
args
.
src_tensor
},
tdst
{
conv_dst_tensor
},
tbias
{
*
args
.
bias_tensor
};
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
filter_tensor
->
layout
.
ndim
>
args
.
src_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
filter_tensor
->
layout
.
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
filter_tensor
->
layout
[
idx
]);
flt_stride
.
push_back
(
args
.
filter_tensor
->
layout
.
stride
[
idx
]);
}
TensorND
tflt
{
args
.
filter_tensor
->
raw_ptr
,
TensorLayout
{
flt_shape
,
flt_stride
,
args
.
filter_tensor
->
layout
.
dtype
,
args
.
filter_tensor
->
layout
.
format
}};
modify_size_args
(
sub_args
,
tsrc
.
layout
,
tdst
.
layout
,
tbias
.
layout
);
sub_args
.
src_tensor
=
&
tsrc
;
sub_args
.
dst_tensor
=
&
tdst
;
sub_args
.
filter_tensor
=
&
tflt
;
sub_args
.
bias_tensor
=
&
tbias
;
auto
config
=
prepare_sub_opr
(
sub_args
);
TensorND
tsrc
{
args
.
src_tensor
->
raw_ptr
,
config
.
first
[
0
]};
TensorND
tfilter
{
args
.
filter_tensor
->
raw_ptr
,
config
.
first
[
1
]};
TensorND
tbias
{
args
.
bias_tensor
->
raw_ptr
,
config
.
first
[
2
]};
TensorND
tz
{
args
.
z_tensor
->
raw_ptr
,
config
.
first
[
3
]};
TensorND
tdst
{
conv_dst_tensor
.
raw_ptr
,
config
.
first
[
4
]};
size_t
c_pos
;
if
(
args
.
filter_meta
.
format
==
Param
::
Format
::
NCHW
||
...
...
@@ -150,16 +180,17 @@ void ConvBiasForwardImpl::AlgoGroupConvGeneral::exec(
strd_dst
=
tdst
.
layout
.
stride
[
c_pos
]
*
fm
.
ocpg
*
tdst
.
layout
.
dtype
.
size
(),
strd_flt
=
fm
.
icpg
*
fm
.
ocpg
*
fm
.
spatial
[
0
]
*
fm
.
spatial
[
1
]
*
tf
lt
.
layout
.
dtype
.
size
();
tf
ilter
.
layout
.
dtype
.
size
();
if
(
args
.
filter_meta
.
format
==
Param
::
Format
::
NCHW4
)
{
strd_src
>>=
2
;
strd_dst
>>=
2
;
}
for
(
uint32_t
g
=
0
;
g
<
grp
;
++
g
)
{
m_impl
->
exec
(
sub_args
);
config
.
second
->
exec
(
tsrc
,
tfilter
,
tbias
,
tz
,
tdst
,
nullptr
,
bundle
.
get_workspace
(
0
));
incr_voidp
(
tsrc
.
raw_ptr
,
strd_src
);
incr_voidp
(
tdst
.
raw_ptr
,
strd_dst
);
incr_voidp
(
tf
lt
.
raw_ptr
,
strd_flt
);
incr_voidp
(
tf
ilter
.
raw_ptr
,
strd_flt
);
}
}
handle_bias_and_nonlinear
(
args
.
handle
,
args
.
nonlinear_mode
,
...
...
dnn/src/cuda/conv_bias/opr_impl.cpp
浏览文件 @
6ce212d2
...
...
@@ -193,25 +193,17 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
)
{
auto
orig_args
=
conv_args
;
TensorLayout
src
,
dst
,
bias
;
AlgoGroupConvGeneral
::
modify_size_args
(
conv_args
,
src
,
dst
,
bias
);
if
(
auto
algo
=
get_1x1_algo
(
conv_args
))
{
return
sm_algo_pack
.
algo2gconv
.
at
(
algo
);
}
if
(
is_cudnn_supported
(
conv_args
))
{
if
(
auto
algo
=
get_cudnn_algo
(
cudnn_conv_from_enum_wrapper
))
{
return
sm_algo_pack
.
algo2gconv
.
at
(
algo
);
}
}
conv_args
=
orig_args
;
}
if
(
auto
algo
=
get_1x1_algo
(
args
))
{
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
)
{
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
ConvBiasForwardImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
)){
return
algo
;
}
}
if
(
sm_algo_pack
.
fallback_nchw_qs8
.
is_available_attribute
(
args
,
positive_attr
,
negative_attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
fallback_nchw_qs8
;
...
...
dnn/src/cuda/convolution/backward_data/algo.cpp
浏览文件 @
6ce212d2
...
...
@@ -39,25 +39,9 @@ ConvolutionBackwardDataImpl::AlgoPack::AlgoPack() {
int8_algos
.
push_back
(
&
int8_nchw_dotprod
);
all_algos
.
push_back
(
&
int8_nchw_dotprod
);
all_algos
.
reserve
(
all_algos
.
size
()
*
2
);
// add gconv algos by AlgoGroupConvGeneral
auto
all_algos_data
=
all_algos
.
data
();
size_t
group_algo_start
=
2
;
for
(
size_t
i
=
group_algo_start
;
i
<
all_algos
.
size
();
++
i
)
{
gconv
.
push_back
({
all_algos
[
i
]});
}
for
(
size_t
i
=
group_algo_start
;
i
<
all_algos
.
size
();
++
i
)
{
algo2gconv
[
all_algos
[
i
]]
=
&
gconv
[
i
-
group_algo_start
];
}
for
(
auto
&&
i
:
gconv
)
{
all_algos
.
push_back
(
&
i
);
}
megdnn_assert
(
all_algos_data
==
all_algos
.
data
());
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
0
]);
// group matmul
all_algos
.
push_back
(
&
bfloat16
);
bfloat16_algos
.
push_back
(
&
bfloat16
);
all_algos
.
push_back
(
&
group
);
for
(
auto
&&
algo
:
all_algos
)
{
m_all_algos_map
.
emplace
(
algo
->
info
().
desc
,
algo
);
...
...
@@ -80,13 +64,13 @@ ConvolutionBackwardDataImpl::AlgoPack::cudnn_from_enum(
ConvolutionBackwardDataImpl
::
AlgoPack
ConvolutionBackwardDataImpl
::
sm_algo_pack
;
ConvolutionBackwardDataImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
ConvolutionBackwardDataImpl
*
o
,
const
TensorLayout
&
filter
,
const
ConvolutionBackwardDataImpl
*
o
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
:
SizeArgs
(
o
,
filter
,
o
->
make_canonized_filter_meta
(
grad
.
ndim
,
filter
),
diff
,
grad
)
{}
ConvolutionBackwardDataImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
ConvolutionBackwardDataImpl
*
o
,
const
TensorLayout
&
filter
,
const
ConvolutionBackwardDataImpl
*
o
,
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
:
handle
{
concrete_handle
(
o
->
handle
())},
...
...
@@ -97,7 +81,7 @@ ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs(
opr
{
o
}
{}
ConvolutionBackwardDataImpl
::
AlgoBase
::
ExecArgs
::
ExecArgs
(
ConvolutionBackwardDataImpl
*
opr
,
_megdnn_tensor_in
filter
,
const
ConvolutionBackwardDataImpl
*
opr
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
:
SizeArgs
(
opr
,
filter
.
layout
,
diff
.
layout
,
grad
.
layout
),
...
...
dnn/src/cuda/convolution/backward_data/algo.h
浏览文件 @
6ce212d2
...
...
@@ -49,15 +49,17 @@ public:
HandleImpl
*
handle
;
CanonizedFilterMeta
filter_meta
;
const
TensorLayout
*
diff_layout
,
*
grad_layout
,
*
filter_layout
;
ConvolutionBackwardDataImpl
*
opr
;
const
ConvolutionBackwardDataImpl
*
opr
;
std
::
string
to_string
()
const
;
void
init_desc
(
convolution
::
CUDNNBwdDataDescs
&
desc
)
const
{
desc
.
set
(
filter_meta
,
*
diff_layout
,
*
grad_layout
,
opr
->
param
());
}
SizeArgs
(
ConvolutionBackwardDataImpl
*
opr
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
);
SizeArgs
(
ConvolutionBackwardDataImpl
*
opr
,
const
TensorLayout
&
filter
,
SizeArgs
(
const
ConvolutionBackwardDataImpl
*
opr
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
);
SizeArgs
(
const
ConvolutionBackwardDataImpl
*
opr
,
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
);
...
...
@@ -70,7 +72,7 @@ public:
const
TensorND
*
filter_tensor
,
*
diff_tensor
,
*
grad_tensor
;
Workspace
workspace
;
ExecArgs
(
ConvolutionBackwardDataImpl
*
opr
,
_megdnn_tensor_in
filter
,
ExecArgs
(
const
ConvolutionBackwardDataImpl
*
opr
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
);
};
...
...
@@ -219,35 +221,26 @@ private:
//! implement group conv by another algo
class
ConvolutionBackwardDataImpl
::
AlgoGroupConvGeneral
final
:
public
AlgoBase
{
AlgoBase
*
m_impl
;
std
::
string
m_name
;
public:
AlgoGroupConvGeneral
(
AlgoBase
*
impl
);
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
const
char
*
name
()
const
override
{
return
m_name
.
c_str
();
}
std
::
vector
<
SearchItem
>
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
override
;
const
char
*
name
()
const
override
{
return
"CUDA:GROUP_CONV_BACKWARD_DATA"
;
}
static
void
modify_size_args
(
SizeArgs
&
args
,
TensorLayout
&
diff_pg
,
TensorLayout
&
grad_pg
);
MEGDNN_DECL_ALGO_TYPE
(
CUDA_GROUP_CONV_GENERAL
)
AlgoAttribute
attribute
()
const
override
{
auto
ret
=
AlgoAttribute
::
DEFAULT
;
#define cb(attr) \
if (m_impl->contain_attribute_all(attr)) { \
ret |= attr; \
}
MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE
(
cb
)
#undef cb
if
(
m_impl
->
contain_attribute_all
(
AlgoAttribute
::
REPRODUCIBLE
))
{
ret
|=
AlgoAttribute
::
REPRODUCIBLE
;
}
return
ret
;
return
AlgoAttribute
::
REPRODUCIBLE
;
}
private:
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
;
};
class
ConvolutionBackwardDataImpl
::
AlgoInt8NCHW4DotProdImplicitGemm
final
...
...
@@ -319,9 +312,8 @@ public:
AlgoMatmul
matmul
;
AlgoChanwise
chanwise
;
AlgoChanwiseSmall
chanwise_small
;
std
::
vector
<
AlgoGroupConvGeneral
>
gconv
;
std
::
unordered_map
<
AlgoBase
*
,
AlgoGroupConvGeneral
*>
algo2gconv
;
AlgoBFloat16
bfloat16
;
AlgoGroupConvGeneral
group
;
std
::
vector
<
AlgoInt8NCHW4DotProdImplicitGemm
>
int8_nchw4_dotprod
;
AlgoInt8NCHWDotProdImplicitGemm
int8_nchw_dotprod
;
...
...
dnn/src/cuda/convolution/backward_data/group_conv.cpp
浏览文件 @
6ce212d2
...
...
@@ -16,24 +16,63 @@ using namespace megdnn;
using
namespace
cuda
;
using
namespace
convolution
;
void
ConvolutionBackwardDataImpl
::
AlgoGroupConvGeneral
::
modify_size_args
(
ConvolutionBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
,
TensorLayout
&
diff_pg
,
TensorLayout
&
grad_pg
)
{
diff_pg
=
*
args
.
diff_layout
;
grad_pg
=
*
args
.
grad_layout
;
namespace
{
std
::
pair
<
TensorLayoutArray
,
Convolution
::
Param
>
sub_opr_config
(
const
ConvolutionBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
filter_layout
->
ndim
>
args
.
diff_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
filter_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
filter_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
filter_layout
->
stride
[
idx
]);
}
TensorLayout
filter_pg
(
flt_shape
,
flt_stride
,
args
.
filter_layout
->
dtype
,
args
.
filter_layout
->
format
);
TensorLayout
diff_pg
=
*
args
.
diff_layout
;
TensorLayout
grad_pg
=
*
args
.
grad_layout
;
auto
nr_grp
=
args
.
filter_meta
.
group
;
args
.
filter_meta
.
group
=
1
;
diff_pg
.
shape
[
1
]
/=
nr_grp
;
grad_pg
.
shape
[
1
]
/=
nr_grp
;
args
.
diff_layout
=
&
diff_pg
;
args
.
grad_layout
=
&
grad_pg
;
size_t
c_pos
=
1
;
diff_pg
.
shape
[
c_pos
]
/=
nr_grp
;
grad_pg
.
shape
[
c_pos
]
/=
nr_grp
;
megdnn
::
param
::
Convolution
param
=
args
.
opr
->
param
();
param
.
sparse
=
megdnn
::
param
::
ConvBias
::
Sparse
::
DENSE
;
std
::
pair
<
TensorLayoutArray
,
ConvolutionBackwardDataImpl
::
Param
>
ret
;
ret
.
first
=
{
filter_pg
,
diff_pg
,
grad_pg
};
ret
.
second
=
param
;
return
ret
;
}
ConvolutionBackwardDataImpl
::
AlgoGroupConvGeneral
::
AlgoGroupConvGeneral
(
AlgoBase
*
impl
)
:
m_impl
{
impl
}
{
m_name
=
"group_conv:"
;
m_name
+=
impl
->
name
();
std
::
pair
<
TensorLayoutArray
,
std
::
unique_ptr
<
ConvolutionBackwardData
>>
prepare_sub_opr
(
const
ConvolutionBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
auto
conv_bwd_data_opr
=
args
.
handle
->
create_operator
<
ConvolutionBackwardData
>
();
set_execution_policy
<
ConvolutionBackwardData
,
ConvolutionBackwardData
*>
(
args
.
opr
,
conv_bwd_data_opr
.
get
());
auto
&&
config
=
sub_opr_config
(
args
);
conv_bwd_data_opr
->
param
()
=
config
.
second
;
return
{
config
.
first
,
std
::
move
(
conv_bwd_data_opr
)};
}
}
// namespace
std
::
vector
<
Algorithm
::
SearchItem
>
ConvolutionBackwardDataImpl
::
AlgoGroupConvGeneral
::
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
{
AlgoBase
::
SizeArgs
args
{
static_cast
<
const
ConvolutionBackwardDataImpl
*>
(
opr
),
layouts
[
0
],
layouts
[
1
],
layouts
[
2
]};
auto
&&
config
=
sub_opr_config
(
args
);
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
CONVOLUTION_BACKWARD_DATA
,
param_str
,
config
.
first
}};
}
bool
ConvolutionBackwardDataImpl
::
AlgoGroupConvGeneral
::
is_available
(
...
...
@@ -46,44 +85,60 @@ bool ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::is_available(
}
if
(
args
.
filter_meta
.
group
<=
1
)
return
false
;
auto
sub_args
=
args
;
TensorLayout
diff_pg
,
grad_pg
;
modify_size_args
(
sub_args
,
diff_pg
,
grad_pg
);
return
m_impl
->
is_available
(
sub_args
);
if
(
args
.
filter_meta
.
format
!=
megdnn
::
param
::
Convolution
::
Format
::
NCHW
)
{
return
false
;
}
auto
config
=
prepare_sub_opr
(
args
);
return
get_algorithm
(
static_cast
<
ConvolutionBackwardDataImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
}
WorkspaceBundle
ConvolutionBackwardDataImpl
::
AlgoGroupConvGeneral
::
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
{
auto
config
=
prepare_sub_opr
(
args
);
size_t
sizes
=
config
.
second
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
return
{
ptr
,
{
sizes
}};
}
size_t
ConvolutionBackwardDataImpl
::
AlgoGroupConvGeneral
::
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
{
auto
sub_args
=
args
;
TensorLayout
diff_pg
,
grad_pg
;
modify_size_args
(
sub_args
,
diff_pg
,
grad_pg
);
return
m_impl
->
get_workspace_in_bytes
(
sub_args
);
return
get_workspace_bundle
(
nullptr
,
args
).
total_size_in_bytes
();
}
void
ConvolutionBackwardDataImpl
::
AlgoGroupConvGeneral
::
exec
(
const
ExecArgs
&
args
)
const
{
auto
sub_args
=
args
;
TensorND
tflt
{
*
args
.
filter_tensor
},
tdiff
{
*
args
.
diff_tensor
},
tgrad
{
*
args
.
grad_tensor
};
modify_size_args
(
sub_args
,
tdiff
.
layout
,
tgrad
.
layout
);
sub_args
.
filter_tensor
=
&
tflt
;
sub_args
.
diff_tensor
=
&
tdiff
;
sub_args
.
grad_tensor
=
&
tgrad
;
auto
grp
=
args
.
filter_meta
.
group
;
auto
&&
fm
=
args
.
filter_meta
;
auto
strd_flt
=
(
fm
.
icpg
*
fm
.
ocpg
*
fm
.
spatial
[
0
]
*
fm
.
spatial
[
1
]
*
tflt
.
layout
.
dtype
.
size
()),
strd_diff
=
(
tdiff
.
layout
.
stride
[
1
]
*
fm
.
ocpg
*
tdiff
.
layout
.
dtype
.
size
()),
strd_grad
=
(
tgrad
.
layout
.
stride
[
1
]
*
fm
.
icpg
*
tgrad
.
layout
.
dtype
.
size
());
for
(
uint32_t
g
=
0
;
g
<
grp
;
++
g
)
{
m_impl
->
exec
(
sub_args
);
incr_voidp
(
tflt
.
raw_ptr
,
strd_flt
);
incr_voidp
(
tdiff
.
raw_ptr
,
strd_diff
);
incr_voidp
(
tgrad
.
raw_ptr
,
strd_grad
);
auto
bundle
=
get_workspace_bundle
(
args
.
workspace
.
raw_ptr
,
args
);
{
auto
config
=
prepare_sub_opr
(
args
);
TensorND
tfilter
{
args
.
filter_tensor
->
raw_ptr
,
config
.
first
[
0
]};
TensorND
tdiff
{
args
.
diff_tensor
->
raw_ptr
,
config
.
first
[
1
]};
TensorND
tgrad
{
args
.
grad_tensor
->
raw_ptr
,
config
.
first
[
2
]};
size_t
c_pos
=
1
;
auto
&&
fm
=
args
.
filter_meta
;
auto
strd_flt
=
fm
.
icpg
*
fm
.
ocpg
*
fm
.
spatial
[
0
]
*
fm
.
spatial
[
1
]
*
tfilter
.
layout
.
dtype
.
size
(),
strd_diff
=
tdiff
.
layout
.
stride
[
c_pos
]
*
fm
.
ocpg
*
tdiff
.
layout
.
dtype
.
size
(),
strd_grad
=
(
tgrad
.
layout
.
stride
[
c_pos
]
*
fm
.
icpg
*
tgrad
.
layout
.
dtype
.
size
());
auto
grp
=
args
.
filter_meta
.
group
;
for
(
uint32_t
g
=
0
;
g
<
grp
;
++
g
)
{
config
.
second
->
exec
(
tfilter
,
tdiff
,
tgrad
,
bundle
.
get_workspace
(
0
));
incr_voidp
(
tfilter
.
raw_ptr
,
strd_flt
);
incr_voidp
(
tdiff
.
raw_ptr
,
strd_diff
);
incr_voidp
(
tgrad
.
raw_ptr
,
strd_grad
);
}
}
}
...
...
dnn/src/cuda/convolution/backward_filter/algo.cpp
浏览文件 @
6ce212d2
...
...
@@ -26,23 +26,8 @@ ConvolutionBackwardFilterImpl::AlgoPack::AlgoPack() {
all_algos
.
push_back
(
&
i
);
}
all_algos
.
push_back
(
&
matmul
);
all_algos
.
push_back
(
&
group
);
all_algos
.
reserve
(
all_algos
.
size
()
*
2
);
// add gconv algos by AlgoGroupConvGeneral
auto
all_algos_data
=
all_algos
.
data
();
for
(
size_t
i
=
1
;
i
<
all_algos
.
size
();
++
i
)
{
gconv
.
push_back
({
all_algos
[
i
]});
}
for
(
size_t
i
=
1
;
i
<
all_algos
.
size
();
++
i
)
{
algo2gconv
[
all_algos
[
i
]]
=
&
gconv
[
i
-
1
];
}
for
(
auto
&&
i
:
gconv
)
{
all_algos
.
push_back
(
&
i
);
}
megdnn_assert
(
all_algos_data
==
all_algos
.
data
());
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
0
]);
// group matmul
all_algos
.
push_back
(
&
bfloat16
);
bfloat16_algos
.
push_back
(
&
bfloat16
);
...
...
@@ -68,7 +53,7 @@ ConvolutionBackwardFilterImpl::AlgoPack
ConvolutionBackwardFilterImpl
::
sm_algo_pack
;
ConvolutionBackwardFilterImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
ConvolutionBackwardFilterImpl
*
o
,
const
ConvolutionBackwardFilterImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
:
SizeArgs
(
o
,
src
,
diff
,
grad
,
o
->
make_canonized_filter_meta
(
src
.
ndim
,
grad
))
...
...
@@ -76,7 +61,7 @@ ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs(
}
ConvolutionBackwardFilterImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
ConvolutionBackwardFilterImpl
*
o
,
const
TensorLayout
&
src
,
const
ConvolutionBackwardFilterImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
)
:
handle
{
concrete_handle
(
o
->
handle
())},
...
...
@@ -87,7 +72,7 @@ ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs(
opr
{
o
}
{}
ConvolutionBackwardFilterImpl
::
AlgoBase
::
ExecArgs
::
ExecArgs
(
ConvolutionBackwardFilterImpl
*
opr
,
const
ConvolutionBackwardFilterImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
...
...
dnn/src/cuda/convolution/backward_filter/algo.h
浏览文件 @
6ce212d2
...
...
@@ -45,16 +45,18 @@ public:
HandleImpl
*
handle
;
const
TensorLayout
*
src_layout
,
*
diff_layout
,
*
grad_layout
;
CanonizedFilterMeta
grad_filter_meta
;
ConvolutionBackwardFilterImpl
*
opr
;
const
ConvolutionBackwardFilterImpl
*
opr
;
std
::
string
to_string
()
const
;
void
init_desc
(
convolution
::
CUDNNBwdFilterDescs
&
desc
)
const
{
desc
.
set
(
*
src_layout
,
*
diff_layout
,
grad_filter_meta
,
opr
->
param
());
}
SizeArgs
(
ConvolutionBackwardFilterImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
);
SizeArgs
(
ConvolutionBackwardFilterImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
SizeArgs
(
const
ConvolutionBackwardFilterImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
);
SizeArgs
(
const
ConvolutionBackwardFilterImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
);
convolution
::
ForwardSizeArgs
as_fwd_args
()
const
{
...
...
@@ -66,9 +68,9 @@ public:
const
TensorND
*
src_tensor
,
*
diff_tensor
,
*
grad_tensor
;
Workspace
workspace
;
ExecArgs
(
ConvolutionBackwardFilterImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
);
ExecArgs
(
const
ConvolutionBackwardFilterImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
diff
,
_megdnn_
tensor_out
grad
,
_megdnn_
workspace
workspace
);
};
virtual
bool
is_available
(
const
SizeArgs
&
args
)
const
=
0
;
virtual
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
=
0
;
...
...
@@ -203,29 +205,25 @@ private:
//! implement group conv by another algo
class
ConvolutionBackwardFilterImpl
::
AlgoGroupConvGeneral
final
:
public
AlgoBase
{
AlgoBase
*
m_impl
;
std
::
string
m_name
;
public:
AlgoGroupConvGeneral
(
AlgoBase
*
impl
);
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
std
::
vector
<
SearchItem
>
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
override
;
const
char
*
name
()
const
override
{
return
m_name
.
c_str
();
}
static
void
modify_size_args
(
SizeArgs
&
args
,
TensorLayout
&
src_pg
,
TensorLayout
&
diff_pg
);
const
char
*
name
()
const
override
{
return
"CUDA:GROUP_CONV_BACKWARD_FILTER"
;
}
MEGDNN_DECL_ALGO_TYPE
(
CUDA_GROUP_CONV_GENERAL
)
AlgoAttribute
attribute
()
const
override
{
auto
ret
=
static_cast
<
AlgoAttribute
>
(
0
);
if
(
m_impl
->
contain_attribute_all
(
AlgoAttribute
::
REPRODUCIBLE
))
{
ret
|=
AlgoAttribute
::
REPRODUCIBLE
;
}
return
ret
;
return
AlgoAttribute
::
REPRODUCIBLE
;
}
private:
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
;
};
class
ConvolutionBackwardFilterImpl
::
AlgoPack
:
NonCopyableObj
{
...
...
@@ -240,8 +238,7 @@ public:
std
::
vector
<
AlgoCUDNN
>
cudnn
;
AlgoMatmul
matmul
;
AlgoChanwise
chanwise
;
std
::
vector
<
AlgoGroupConvGeneral
>
gconv
;
std
::
unordered_map
<
AlgoBase
*
,
AlgoGroupConvGeneral
*>
algo2gconv
;
AlgoGroupConvGeneral
group
;
AlgoBFloat16
bfloat16
;
std
::
vector
<
AlgoBase
*>
...
...
dnn/src/cuda/convolution/backward_filter/group_conv.cpp
浏览文件 @
6ce212d2
...
...
@@ -15,25 +15,63 @@ using namespace megdnn;
using
namespace
cuda
;
using
namespace
convolution
;
void
ConvolutionBackwardFilterImpl
::
AlgoGroupConvGeneral
::
modify_size_args
(
ConvolutionBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
,
TensorLayout
&
src_pg
,
TensorLayout
&
diff_pg
)
{
src_pg
=
*
args
.
src_layout
;
diff_pg
=
*
args
.
diff_layout
;
namespace
{
std
::
pair
<
TensorLayoutArray
,
Convolution
::
Param
>
sub_opr_config
(
const
ConvolutionBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
grad_layout
->
ndim
>
args
.
diff_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
grad_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
grad_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
grad_layout
->
stride
[
idx
]);
}
TensorLayout
filter_pg
(
flt_shape
,
flt_stride
,
args
.
grad_layout
->
dtype
,
args
.
grad_layout
->
format
);
TensorLayout
src_pg
=
*
args
.
src_layout
;
TensorLayout
diff_pg
=
*
args
.
diff_layout
;
auto
nr_grp
=
args
.
grad_filter_meta
.
group
;
args
.
grad_filter_meta
.
group
=
1
;
src_pg
.
shape
[
1
]
/=
nr_grp
;
diff_pg
.
shape
[
1
]
/=
nr_grp
;
args
.
src_layout
=
&
src_pg
;
args
.
diff_layout
=
&
diff_pg
;
size_t
c_pos
=
1
;
src_pg
.
shape
[
c_pos
]
/=
nr_grp
;
diff_pg
.
shape
[
c_pos
]
/=
nr_grp
;
megdnn
::
param
::
Convolution
param
=
args
.
opr
->
param
();
param
.
sparse
=
megdnn
::
param
::
ConvBias
::
Sparse
::
DENSE
;
std
::
pair
<
TensorLayoutArray
,
ConvolutionBackwardFilterImpl
::
Param
>
ret
;
ret
.
first
=
{
src_pg
,
diff_pg
,
filter_pg
};
ret
.
second
=
param
;
return
ret
;
}
ConvolutionBackwardFilterImpl
::
AlgoGroupConvGeneral
::
AlgoGroupConvGeneral
(
AlgoBase
*
impl
)
:
m_impl
{
impl
}
{
m_name
=
"group_conv:"
;
m_name
+=
impl
->
name
();
std
::
pair
<
TensorLayoutArray
,
std
::
unique_ptr
<
ConvolutionBackwardFilter
>>
prepare_sub_opr
(
const
ConvolutionBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
auto
conv_bwd_filter_opr
=
args
.
handle
->
create_operator
<
ConvolutionBackwardFilter
>
();
set_execution_policy
<
ConvolutionBackwardFilter
,
ConvolutionBackwardFilter
*>
(
args
.
opr
,
conv_bwd_filter_opr
.
get
());
auto
&&
config
=
sub_opr_config
(
args
);
conv_bwd_filter_opr
->
param
()
=
config
.
second
;
return
{
config
.
first
,
std
::
move
(
conv_bwd_filter_opr
)};
}
}
// namespace
std
::
vector
<
Algorithm
::
SearchItem
>
ConvolutionBackwardFilterImpl
::
AlgoGroupConvGeneral
::
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
{
AlgoBase
::
SizeArgs
args
{
static_cast
<
const
ConvolutionBackwardFilterImpl
*>
(
opr
),
layouts
[
0
],
layouts
[
1
],
layouts
[
2
]};
auto
&&
config
=
sub_opr_config
(
args
);
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
CONVOLUTION_BACKWARD_FILTER
,
param_str
,
config
.
first
}};
}
bool
ConvolutionBackwardFilterImpl
::
AlgoGroupConvGeneral
::
is_available
(
...
...
@@ -44,44 +82,60 @@ bool ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::is_available(
}
if
(
args
.
grad_filter_meta
.
group
<=
1
)
return
false
;
auto
sub_args
=
args
;
TensorLayout
src_pg
,
diff_pg
;
modify_size_args
(
sub_args
,
src_pg
,
diff_pg
);
return
m_impl
->
is_available
(
sub_args
);
if
(
args
.
grad_filter_meta
.
format
!=
megdnn
::
param
::
Convolution
::
Format
::
NCHW
)
{
return
false
;
}
auto
config
=
prepare_sub_opr
(
args
);
return
get_algorithm
(
static_cast
<
ConvolutionBackwardFilterImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
}
size_t
ConvolutionBackwardFilterImpl
::
AlgoGroupConvGeneral
::
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
{
auto
sub_args
=
args
;
TensorLayout
src_pg
,
diff_pg
;
modify_size_args
(
sub_args
,
src_pg
,
diff_pg
);
return
m_impl
->
get_workspace_in_bytes
(
sub_args
);
WorkspaceBundle
ConvolutionBackwardFilterImpl
::
AlgoGroupConvGeneral
::
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
{
auto
config
=
prepare_sub_opr
(
args
);
size_t
sizes
=
config
.
second
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
return
{
ptr
,
{
sizes
}};
}
size_t
ConvolutionBackwardFilterImpl
::
AlgoGroupConvGeneral
::
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
{
return
get_workspace_bundle
(
nullptr
,
args
).
total_size_in_bytes
();
}
void
ConvolutionBackwardFilterImpl
::
AlgoGroupConvGeneral
::
exec
(
const
ExecArgs
&
args
)
const
{
auto
sub_args
=
args
;
TensorND
tsrc
{
*
args
.
src_tensor
},
tdiff
{
*
args
.
diff_tensor
},
tgrad
{
*
args
.
grad_tensor
};
modify_size_args
(
sub_args
,
tsrc
.
layout
,
tdiff
.
layout
);
sub_args
.
src_tensor
=
&
tsrc
;
sub_args
.
diff_tensor
=
&
tdiff
;
sub_args
.
grad_tensor
=
&
tgrad
;
auto
&&
fm
=
args
.
grad_filter_meta
;
auto
grp
=
fm
.
group
;
auto
strd_src
=
(
tsrc
.
layout
.
stride
[
1
]
*
fm
.
icpg
*
tsrc
.
layout
.
dtype
.
size
()),
strd_diff
=
(
tdiff
.
layout
.
stride
[
1
]
*
fm
.
ocpg
*
tdiff
.
layout
.
dtype
.
size
()),
strd_grad
=
(
fm
.
icpg
*
fm
.
ocpg
*
fm
.
spatial
[
0
]
*
fm
.
spatial
[
1
]
*
tgrad
.
layout
.
dtype
.
size
());
for
(
uint32_t
g
=
0
;
g
<
grp
;
++
g
)
{
m_impl
->
exec
(
sub_args
);
incr_voidp
(
tsrc
.
raw_ptr
,
strd_src
);
incr_voidp
(
tdiff
.
raw_ptr
,
strd_diff
);
incr_voidp
(
tgrad
.
raw_ptr
,
strd_grad
);
const
ExecArgs
&
args
)
const
{
auto
bundle
=
get_workspace_bundle
(
args
.
workspace
.
raw_ptr
,
args
);
{
auto
config
=
prepare_sub_opr
(
args
);
TensorND
tsrc
{
args
.
src_tensor
->
raw_ptr
,
config
.
first
[
0
]};
TensorND
tdiff
{
args
.
diff_tensor
->
raw_ptr
,
config
.
first
[
1
]};
TensorND
tgrad
{
args
.
grad_tensor
->
raw_ptr
,
config
.
first
[
2
]};
size_t
c_pos
=
1
;
auto
&&
fm
=
args
.
grad_filter_meta
;
auto
strd_src
=
tsrc
.
layout
.
stride
[
c_pos
]
*
fm
.
icpg
*
tsrc
.
layout
.
dtype
.
size
(),
strd_diff
=
tdiff
.
layout
.
stride
[
c_pos
]
*
fm
.
ocpg
*
tdiff
.
layout
.
dtype
.
size
(),
strd_grad
=
fm
.
icpg
*
fm
.
ocpg
*
fm
.
spatial
[
0
]
*
fm
.
spatial
[
1
]
*
tgrad
.
layout
.
dtype
.
size
();
auto
grp
=
fm
.
group
;
for
(
uint32_t
g
=
0
;
g
<
grp
;
++
g
)
{
config
.
second
->
exec
(
tsrc
,
tdiff
,
tgrad
,
bundle
.
get_workspace
(
0
));
incr_voidp
(
tsrc
.
raw_ptr
,
strd_src
);
incr_voidp
(
tdiff
.
raw_ptr
,
strd_diff
);
incr_voidp
(
tgrad
.
raw_ptr
,
strd_grad
);
}
}
}
...
...
dnn/src/cuda/convolution/opr_impl.cpp
浏览文件 @
6ce212d2
...
...
@@ -104,19 +104,7 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic(
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
auto
fm
=
check_layout_fwd
(
grad
,
filter
,
diff
);
return
get_algorithm_heuristic
(
filter
,
fm
,
diff
,
grad
,
workspace_limit_in_bytes
,
positive_attr
,
negative_attr
);
}
ConvolutionBackwardDataImpl
::
Algorithm
*
ConvolutionBackwardDataImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
filter
,
filter_meta
,
diff
,
grad
);
AlgoBase
::
SizeArgs
args
(
this
,
filter
,
diff
,
grad
);
if
(
args
.
filter_meta
.
group
>
1
&&
sm_algo_pack
.
chanwise
.
is_available_attribute
(
...
...
@@ -186,14 +174,11 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic(const TensorLayout& filter,
}
if
(
args
.
filter_meta
.
group
>
1
)
{
auto
orig_args
=
args
;
TensorLayout
a
,
b
;
AlgoGroupConvGeneral
::
modify_size_args
(
args
,
a
,
b
);
if
(
is_cudnn_supported
(
args
.
as_fwd_args
()))
{
if
(
auto
algo
=
get_cudnn_algo
())
return
sm_algo_pack
.
algo2gconv
.
at
(
algo
);
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
ConvolutionBackwardDataImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
))
{
return
algo
;
}
args
=
orig_args
;
}
if
(
args
.
filter_layout
->
dtype
.
enumv
()
!=
...
...
@@ -212,7 +197,7 @@ size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes(
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
{
AlgoBase
::
SizeArgs
args
(
this
,
filter
,
diff
,
grad
);
return
get_algorithm
(
this
,
filter
,
args
.
filter_meta
,
diff
,
grad
)
return
get_algorithm
(
this
,
filter
,
diff
,
grad
)
->
get_workspace_in_bytes
(
args
);
}
...
...
@@ -227,8 +212,7 @@ void ConvolutionBackwardFilterImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
{
AlgoBase
::
ExecArgs
args
(
this
,
src
,
diff
,
grad
,
workspace
);
auto
algo
=
get_algorithm
(
this
,
src
.
layout
,
diff
.
layout
,
grad
.
layout
,
args
.
grad_filter_meta
);
auto
algo
=
get_algorithm
(
this
,
src
.
layout
,
diff
.
layout
,
grad
.
layout
);
algo
->
check_workspace
(
args
,
workspace
).
exec
(
args
);
}
...
...
@@ -246,20 +230,7 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic(
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
auto
fm
=
check_layout_fwd
(
src
,
grad
,
diff
);
return
get_algorithm_heuristic
(
src
,
diff
,
grad
,
fm
,
workspace_limit_in_bytes
,
positive_attr
,
negative_attr
);
}
ConvolutionBackwardFilterImpl
::
Algorithm
*
ConvolutionBackwardFilterImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
diff
,
grad
,
grad_meta
);
AlgoBase
::
SizeArgs
args
(
this
,
src
,
diff
,
grad
);
if
(
args
.
grad_filter_meta
.
group
>
1
&&
sm_algo_pack
.
chanwise
.
is_available_attribute
(
...
...
@@ -332,14 +303,11 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic(
}
if
(
args
.
grad_filter_meta
.
group
>
1
)
{
auto
orig_args
=
args
;
TensorLayout
a
,
b
;
AlgoGroupConvGeneral
::
modify_size_args
(
args
,
a
,
b
);
if
(
is_cudnn_supported
(
args
.
as_fwd_args
()))
{
if
(
auto
algo
=
get_cudnn_algo
())
return
sm_algo_pack
.
algo2gconv
.
at
(
algo
);
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
ConvolutionBackwardFilterImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
))
{
return
algo
;
}
args
=
orig_args
;
}
if
(
args
.
src_layout
->
dtype
.
enumv
()
!=
DTypeTrait
<
dtype
::
BFloat16
>::
enumv
)
{
...
...
@@ -357,7 +325,7 @@ size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
diff
,
grad
);
return
get_algorithm
(
this
,
src
,
diff
,
grad
,
args
.
grad_filter_meta
)
return
get_algorithm
(
this
,
src
,
diff
,
grad
)
->
get_workspace_in_bytes
(
args
);
}
...
...
dnn/src/cuda/convolution/opr_impl.h
浏览文件 @
6ce212d2
...
...
@@ -74,17 +74,6 @@ public:
using
ConvolutionBackwardData
::
ConvolutionBackwardData
;
void
exec
(
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
override
;
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
return
get_algorithm_heuristic
(
filter
,
filter_meta
,
diff
,
grad
,
workspace_limit_in_bytes
,
positive_attr
,
negative_attr
)
->
info
();
}
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
...
...
@@ -128,14 +117,6 @@ protected:
const
AlgoAttribute
&
negative_attr
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
);
static
AlgoPack
sm_algo_pack
;
};
...
...
@@ -147,17 +128,6 @@ public:
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
override
;
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
return
get_algorithm_heuristic
(
src
,
diff
,
grad
,
grad_meta
,
workspace_limit_in_bytes
,
positive_attr
,
negative_attr
)
->
info
();
}
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
...
...
@@ -195,14 +165,6 @@ protected:
const
AlgoAttribute
&
negative_attr
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
);
static
AlgoPack
sm_algo_pack
;
};
...
...
dnn/test/cuda/conv_bias.cpp
浏览文件 @
6ce212d2
...
...
@@ -1034,10 +1034,11 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_GROUP) {
// float case
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
ConvBias
>
(
ExecutionPolicyAlgoName
{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CUDA:GROUP_CONV"
,
{})
.
c_str
()));
.
c_str
(),
{{
"CUDNN"
,
{}}}}));
ConvBias
::
Param
param
;
param
.
sparse
=
ConvBias
::
Param
::
Sparse
::
GROUP
;
param
.
nonlineMode
=
mode
;
...
...
dnn/test/cuda/group_conv.cpp
浏览文件 @
6ce212d2
...
...
@@ -108,39 +108,33 @@ TEST_F(CUDA, GROUP_CONV_FORWARD)
}
TEST_F
(
CUDA
,
GROUP_CONV_FORWARD_1x1
)
{
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
IH
,
size_t
IW
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
group
)
{
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
IH
,
size_t
IW
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
group
)
{
Checker
<
Convolution
>
checker
(
handle_cuda
());
#if CUDNN_MAJOR <= 6
std
::
string
conv1x1_name
=
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"BATCHEDMATMUL"
,
{});
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ExecutionPolicyAlgoName
{
"DEFAULT"
,
{{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
ssprintf
(
"%s:%s"
,
"CUDA:GROUP_CONV"
,
conv1x1_name
.
c_str
())
.
c_str
(),
{})
.
c_str
(),
{}}}}));
#endif
ConvBiasForward
::
algo_name
<
ConvBias
::
MatmulParam
>
(
"INPLACE_MATMUL"
,
{});
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ExecutionPolicyAlgoName
{
"DEFAULT"
,
{{
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
DirectParam
>
(
"CUDA:GROUP_CONV"
,
{})
.
c_str
(),
{{
conv1x1_name
.
c_str
(),
{}}}}}}));
Convolution
::
Param
param
;
param
.
sparse
=
Convolution
::
Param
::
Sparse
::
GROUP
;
auto
ICg
=
IC
/
group
;
auto
OCg
=
OC
/
group
;
checker
.
set_param
(
param
).
exec
(
{{
N
,
IC
,
IH
,
IW
},
{
group
,
OCg
,
ICg
,
FH
,
FW
},
{}});
checker
.
set_param
(
param
).
exec
(
{
{
N
,
IC
,
IH
,
IW
},
{
group
,
OCg
,
ICg
,
FH
,
FW
},
{}});
};
size_t
ic
=
192
;
for
(
size_t
g
=
2
;
g
<=
3
;
g
+=
1
)
{
for
(
size_t
ih
=
8
;
ih
<=
128
;
ih
*=
4
)
{
size_t
iw
=
ih
;
run
(
2
,
ic
,
ih
,
iw
,
1
,
1
,
ic
/
g
,
g
);
run
(
2
,
ic
,
ih
+
1
,
iw
+
1
,
1
,
1
,
ic
/
g
,
g
);
run
(
2
,
ic
,
ih
+
1
,
iw
+
1
,
1
,
1
,
ic
/
g
,
g
);
}
}
}
...
...
@@ -189,6 +183,54 @@ TEST_F(CUDA, GROUP_CONV_BACKWARD_DATA)
8
);
}
TEST_F
(
CUDA
,
GROUP_CONV_BACKWARD_DATA_CUDNN
)
{
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
IH
,
size_t
IW
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
OH
,
size_t
OW
,
size_t
PH
,
size_t
PW
,
size_t
SH
,
size_t
SW
,
size_t
group
)
{
Checker
<
ConvolutionBackwardData
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardData
>
(
ExecutionPolicyAlgoName
{
"CUDA:GROUP_CONV_BACKWARD_DATA"
,
{{
"CUDNN"
,
{}}}}));
ConvolutionBackwardData
::
Param
param
;
param
.
sparse
=
Convolution
::
Param
::
Sparse
::
GROUP
;
param
.
pad_h
=
PH
;
param
.
pad_w
=
PW
;
param
.
stride_h
=
SH
;
param
.
stride_w
=
SW
;
auto
ICg
=
IC
/
group
;
auto
OCg
=
OC
/
group
;
checker
.
set_param
(
param
).
exec
({{
group
,
OCg
,
ICg
,
FH
,
FW
},
{
N
,
OC
,
OH
,
OW
},
{
N
,
IC
,
IH
,
IW
}});
};
// normal case
run
(
2
,
64
,
7
,
7
,
3
,
3
,
32
,
5
,
5
,
0
,
0
,
1
,
1
,
2
);
// padded case
run
(
2
,
32
,
7
,
7
,
3
,
3
,
64
,
7
,
7
,
1
,
1
,
1
,
1
,
4
);
// strided case
run
(
2
,
32
,
7
,
7
,
3
,
3
,
64
,
3
,
3
,
0
,
0
,
2
,
2
,
8
);
}
TEST_F
(
CUDA
,
GROUP_CONV_BACKWARD_FILTER
)
{
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
IH
,
size_t
IW
,
...
...
@@ -233,6 +275,52 @@ TEST_F(CUDA, GROUP_CONV_BACKWARD_FILTER)
8
);
}
TEST_F
(
CUDA
,
GROUP_CONV_BACKWARD_FILTER_CUDNN
)
{
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
IH
,
size_t
IW
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
OH
,
size_t
OW
,
size_t
PH
,
size_t
PW
,
size_t
SH
,
size_t
SW
,
size_t
group
)
{
Checker
<
ConvolutionBackwardFilter
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardFilter
>
(
ExecutionPolicyAlgoName
{
"CUDA:GROUP_CONV_BACKWARD_FILTER"
,
{{
"CUDNN"
,
{}}}}));
ConvolutionBackwardFilter
::
Param
param
;
param
.
sparse
=
Convolution
::
Param
::
Sparse
::
GROUP
;
param
.
pad_h
=
PH
;
param
.
pad_w
=
PW
;
param
.
stride_h
=
SH
;
param
.
stride_w
=
SW
;
auto
ICg
=
IC
/
group
;
auto
OCg
=
OC
/
group
;
checker
.
set_param
(
param
).
exec
({{
N
,
IC
,
IH
,
IW
},
{
N
,
OC
,
OH
,
OW
},
{
group
,
OCg
,
ICg
,
FH
,
FW
}});
};
// normal case
run
(
2
,
64
,
7
,
7
,
3
,
3
,
32
,
5
,
5
,
0
,
0
,
1
,
1
,
2
);
// padded case
run
(
2
,
32
,
7
,
7
,
3
,
3
,
64
,
7
,
7
,
1
,
1
,
1
,
1
,
4
);
// strided case
run
(
2
,
32
,
7
,
7
,
3
,
3
,
64
,
3
,
3
,
0
,
0
,
2
,
2
,
8
);
}
}
// namespace test
}
// namespace megdnn
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录