Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
4a92346b
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
4a92346b
编写于
7月 07, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(mgb): refactor group conv3d
GitOrigin-RevId: 15360a3a417b34da99967525de174def66945060
上级
6ce212d2
变更
13
显示空白变更内容
内联
并排
Showing
13 changed file
with
509 addition
and
439 deletion
+509
-439
dnn/src/cuda/convolution3d/backward_data/algo.cpp
dnn/src/cuda/convolution3d/backward_data/algo.cpp
+10
-25
dnn/src/cuda/convolution3d/backward_data/algo.h
dnn/src/cuda/convolution3d/backward_data/algo.h
+23
-26
dnn/src/cuda/convolution3d/backward_data/group_conv.cpp
dnn/src/cuda/convolution3d/backward_data/group_conv.cpp
+101
-48
dnn/src/cuda/convolution3d/backward_filter/algo.cpp
dnn/src/cuda/convolution3d/backward_filter/algo.cpp
+13
-28
dnn/src/cuda/convolution3d/backward_filter/algo.h
dnn/src/cuda/convolution3d/backward_filter/algo.h
+24
-27
dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp
dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp
+104
-50
dnn/src/cuda/convolution3d/forward/algo.cpp
dnn/src/cuda/convolution3d/forward/algo.cpp
+16
-34
dnn/src/cuda/convolution3d/forward/algo.h
dnn/src/cuda/convolution3d/forward/algo.h
+15
-27
dnn/src/cuda/convolution3d/forward/group_conv.cpp
dnn/src/cuda/convolution3d/forward/group_conv.cpp
+105
-53
dnn/src/cuda/convolution3d/helper.h
dnn/src/cuda/convolution3d/helper.h
+1
-0
dnn/src/cuda/convolution3d/opr_impl.cpp
dnn/src/cuda/convolution3d/opr_impl.cpp
+21
-62
dnn/src/cuda/convolution3d/opr_impl.h
dnn/src/cuda/convolution3d/opr_impl.h
+0
-53
dnn/test/cuda/group_conv3d.cpp
dnn/test/cuda/group_conv3d.cpp
+76
-6
未找到文件。
dnn/src/cuda/convolution3d/backward_data/algo.cpp
浏览文件 @
4a92346b
...
...
@@ -24,21 +24,7 @@ Convolution3DBackwardDataImpl::AlgoPack::AlgoPack() {
for
(
auto
&&
i
:
cudnn
)
{
all_algos
.
push_back
(
&
i
);
}
all_algos
.
reserve
(
all_algos
.
size
()
*
2
);
// add gconv algos by AlgoGroupConvGeneral
auto
all_algos_data
=
all_algos
.
data
();
for
(
size_t
i
=
1
;
i
<
all_algos
.
size
();
++
i
)
{
gconv
.
push_back
({
all_algos
[
i
]});
}
for
(
size_t
i
=
1
;
i
<
all_algos
.
size
();
++
i
)
{
algo2gconv
[
all_algos
[
i
]]
=
&
gconv
[
i
-
1
];
}
for
(
auto
&&
i
:
gconv
)
{
all_algos
.
push_back
(
&
i
);
}
megdnn_assert
(
all_algos_data
==
all_algos
.
data
());
all_algos
.
push_back
(
&
group
);
for
(
auto
&&
algo
:
all_algos
)
{
m_all_algos_map
.
emplace
(
algo
->
info
().
desc
,
algo
);
...
...
@@ -61,27 +47,26 @@ Convolution3DBackwardDataImpl::AlgoPack::cudnn_from_enum(
Convolution3DBackwardDataImpl
::
AlgoPack
Convolution3DBackwardDataImpl
::
sm_algo_pack
;
Convolution3DBackwardDataImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
Convolution3DBackwardDataImpl
*
o
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
:
SizeArgs
(
o
,
o
->
make_canonized_filter_meta
(
grad
.
ndim
,
filter
),
diff
,
grad
)
{
}
const
Convolution3DBackwardDataImpl
*
o
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
:
SizeArgs
(
o
,
filter
,
o
->
make_canonized_filter_meta
(
grad
.
ndim
,
filter
),
diff
,
grad
)
{}
Convolution3DBackwardDataImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
Convolution3DBackwardDataImpl
*
o
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
diff
,
const
Convolution3DBackwardDataImpl
*
o
,
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter
_meta
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
:
handle
{
concrete_handle
(
o
->
handle
())},
filter_meta
{
filter
},
filter_meta
{
filter
_meta
},
diff_layout
{
&
diff
},
grad_layout
{
&
grad
},
filter_layout
{
&
filter
},
opr
{
o
}
{
}
Convolution3DBackwardDataImpl
::
AlgoBase
::
ExecArgs
::
ExecArgs
(
Convolution3DBackwardDataImpl
*
opr
,
const
Convolution3DBackwardDataImpl
*
opr
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
...
...
dnn/src/cuda/convolution3d/backward_data/algo.h
浏览文件 @
4a92346b
...
...
@@ -42,31 +42,33 @@ public:
struct
SizeArgs
{
HandleImpl
*
handle
;
CanonizedFilterMeta
filter_meta
;
const
TensorLayout
*
diff_layout
,
*
grad_layout
;
Convolution3DBackwardDataImpl
*
opr
;
const
TensorLayout
*
diff_layout
,
*
grad_layout
,
*
filter_layout
;
const
Convolution3DBackwardDataImpl
*
opr
;
std
::
string
to_string
()
const
;
void
init_desc
(
convolution3d
::
CUDNNBwdDataDescs
&
desc
)
const
{
desc
.
set
(
filter_meta
,
*
diff_layout
,
*
grad_layout
,
opr
->
param
());
}
SizeArgs
(
Convolution3DBackwardDataImpl
*
opr
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
);
SizeArgs
(
Convolution3DBackwardDataImpl
*
opr
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
diff
,
SizeArgs
(
const
Convolution3DBackwardDataImpl
*
opr
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
);
SizeArgs
(
const
Convolution3DBackwardDataImpl
*
opr
,
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
);
convolution3d
::
ForwardSizeArgs
as_fwd_args
()
const
{
return
{
handle
,
grad_layout
,
filter_meta
,
diff
_layout
,
opr
->
param
().
data_type
};
return
{
handle
,
grad_layout
,
filter
_layout
,
filter_meta
,
diff_layout
,
opr
->
param
().
data_type
};
}
};
struct
ExecArgs
:
public
SizeArgs
{
const
TensorND
*
filter_tensor
,
*
diff_tensor
,
*
grad_tensor
;
Workspace
workspace
;
ExecArgs
(
Convolution3DBackwardDataImpl
*
opr
,
_megdnn_tensor_in
filte
r
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
);
ExecArgs
(
const
Convolution3DBackwardDataImpl
*
op
r
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
diff
,
_megdnn_
tensor_out
grad
,
_megdnn_
workspace
workspace
);
};
virtual
bool
is_available
(
const
SizeArgs
&
args
)
const
=
0
;
virtual
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
=
0
;
...
...
@@ -154,29 +156,25 @@ public:
//! implement group conv by another algo
class
Convolution3DBackwardDataImpl
::
AlgoGroupConvGeneral
final
:
public
AlgoBase
{
AlgoBase
*
m_impl
;
std
::
string
m_name
;
public:
AlgoGroupConvGeneral
(
AlgoBase
*
impl
);
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
std
::
vector
<
SearchItem
>
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
override
;
const
char
*
name
()
const
override
{
return
m_name
.
c_str
();
}
const
char
*
name
()
const
override
{
return
"CUDA:GROUP_CONV3D_BACKWARD_DATA"
;
}
static
void
modify_size_args
(
SizeArgs
&
args
,
TensorLayout
&
diff_pg
,
TensorLayout
&
grad_pg
);
AlgoAttribute
attribute
()
const
override
{
auto
ret
=
static_cast
<
AlgoAttribute
>
(
0
);
if
(
m_impl
->
contain_attribute_all
(
AlgoAttribute
::
REPRODUCIBLE
))
{
ret
|=
AlgoAttribute
::
REPRODUCIBLE
;
}
return
ret
;
return
AlgoAttribute
::
REPRODUCIBLE
;
}
MEGDNN_DECL_ALGO_TYPE
(
CUDA_GROUP_CONV_GENERAL
)
private:
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
;
};
class
Convolution3DBackwardDataImpl
::
AlgoPack
:
NonCopyableObj
{
...
...
@@ -190,8 +188,7 @@ public:
std
::
vector
<
AlgoCUDNN
>
cudnn
;
AlgoChanwise
chanwise
;
std
::
vector
<
AlgoGroupConvGeneral
>
gconv
;
std
::
unordered_map
<
AlgoBase
*
,
AlgoGroupConvGeneral
*>
algo2gconv
;
AlgoGroupConvGeneral
group
;
std
::
vector
<
AlgoBase
*>
//! all algorithms
...
...
dnn/src/cuda/convolution3d/backward_data/group_conv.cpp
浏览文件 @
4a92346b
...
...
@@ -15,69 +15,122 @@ using namespace megdnn;
using
namespace
cuda
;
using
namespace
convolution3d
;
void
Convolution3DBackwardDataImpl
::
AlgoGroupConvGeneral
::
modify_size_args
(
Convolution3DBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
,
TensorLayout
&
diff_pg
,
TensorLayout
&
grad_pg
)
{
diff_pg
=
*
args
.
diff_layout
;
grad_pg
=
*
args
.
grad_layout
;
namespace
{
std
::
pair
<
TensorLayoutArray
,
Convolution3DBackwardDataImpl
::
Param
>
sub_opr_config
(
const
Convolution3DBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
filter_layout
->
ndim
>
args
.
grad_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
filter_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
filter_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
filter_layout
->
stride
[
idx
]);
}
TensorLayout
filter_pg
(
flt_shape
,
flt_stride
,
args
.
filter_layout
->
dtype
,
args
.
filter_layout
->
format
);
TensorLayout
diff_pg
=
*
args
.
diff_layout
;
TensorLayout
grad_pg
=
*
args
.
grad_layout
;
auto
nr_grp
=
args
.
filter_meta
.
group
;
args
.
filter_meta
.
group
=
1
;
diff_pg
.
shape
[
1
]
/=
nr_grp
;
grad_pg
.
shape
[
1
]
/=
nr_grp
;
args
.
diff_layout
=
&
diff_pg
;
args
.
grad_layout
=
&
grad_pg
;
size_t
c_pos
=
1
;
diff_pg
.
shape
[
c_pos
]
/=
nr_grp
;
grad_pg
.
shape
[
c_pos
]
/=
nr_grp
;
megdnn
::
param
::
Convolution3D
param
=
args
.
opr
->
param
();
param
.
sparse
=
megdnn
::
param
::
Convolution3D
::
Sparse
::
DENSE
;
std
::
pair
<
TensorLayoutArray
,
Convolution3DBackwardDataImpl
::
Param
>
ret
;
ret
.
first
=
{
filter_pg
,
diff_pg
,
grad_pg
};
ret
.
second
=
param
;
return
ret
;
}
Convolution3DBackwardDataImpl
::
AlgoGroupConvGeneral
::
AlgoGroupConvGeneral
(
AlgoBase
*
impl
)
:
m_impl
{
impl
}
{
m_name
=
"group_conv3d:"
;
m_name
+=
impl
->
name
();
std
::
pair
<
TensorLayoutArray
,
std
::
unique_ptr
<
Convolution3DBackwardData
>>
prepare_sub_opr
(
const
Convolution3DBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
auto
conv3d_backdata_opr
=
args
.
handle
->
create_operator
<
Convolution3DBackwardData
>
();
set_execution_policy
<
Convolution3DBackwardData
,
Convolution3DBackwardData
*>
(
args
.
opr
,
conv3d_backdata_opr
.
get
());
auto
&&
config
=
sub_opr_config
(
args
);
conv3d_backdata_opr
->
param
()
=
config
.
second
;
return
{
config
.
first
,
std
::
move
(
conv3d_backdata_opr
)};
}
}
// namespace
std
::
vector
<
Algorithm
::
SearchItem
>
Convolution3DBackwardDataImpl
::
AlgoGroupConvGeneral
::
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
{
AlgoBase
::
SizeArgs
args
{
static_cast
<
const
Convolution3DBackwardDataImpl
*>
(
opr
),
layouts
[
0
],
layouts
[
1
],
layouts
[
2
]};
auto
&&
config
=
sub_opr_config
(
args
);
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
CONVOLUTION3D_BACKWARD_DATA
,
param_str
,
config
.
first
}};
}
bool
Convolution3DBackwardDataImpl
::
AlgoGroupConvGeneral
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
args
.
filter_meta
.
group
<=
1
)
return
false
;
auto
sub_args
=
args
;
TensorLayout
diff_pg
,
grad_pg
;
modify_size_args
(
sub_args
,
diff_pg
,
grad_pg
);
return
m_impl
->
is_available
(
sub_args
);
if
(
args
.
filter_meta
.
format
!=
Param
::
Format
::
NCDHW
)
{
return
false
;
}
auto
config
=
prepare_sub_opr
(
args
);
return
get_algorithm
(
static_cast
<
Convolution3DBackwardDataImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
}
WorkspaceBundle
Convolution3DBackwardDataImpl
::
AlgoGroupConvGeneral
::
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
{
auto
config
=
prepare_sub_opr
(
args
);
size_t
sizes
=
config
.
second
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
return
{
ptr
,
{
sizes
}};
}
size_t
Convolution3DBackwardDataImpl
::
AlgoGroupConvGeneral
::
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
{
auto
sub_args
=
args
;
TensorLayout
diff_pg
,
grad_pg
;
modify_size_args
(
sub_args
,
diff_pg
,
grad_pg
);
return
m_impl
->
get_workspace_in_bytes
(
sub_args
);
size_t
Convolution3DBackwardDataImpl
::
AlgoGroupConvGeneral
::
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
{
return
get_workspace_bundle
(
nullptr
,
args
).
total_size_in_bytes
();
}
void
Convolution3DBackwardDataImpl
::
AlgoGroupConvGeneral
::
exec
(
const
ExecArgs
&
args
)
const
{
auto
sub_args
=
args
;
TensorND
tflt
{
*
args
.
filter_tensor
},
tdiff
{
*
args
.
diff_tensor
},
tgrad
{
*
args
.
grad_tensor
};
modify_size_args
(
sub_args
,
tdiff
.
layout
,
tgrad
.
layout
);
sub_args
.
filter_tensor
=
&
tflt
;
sub_args
.
diff_tensor
=
&
tdiff
;
sub_args
.
grad_tensor
=
&
tgrad
;
const
ExecArgs
&
args
)
const
{
auto
bundle
=
get_workspace_bundle
(
args
.
workspace
.
raw_ptr
,
args
);
{
auto
config
=
prepare_sub_opr
(
args
);
TensorND
tfilter
{
args
.
filter_tensor
->
raw_ptr
,
config
.
first
[
0
]};
TensorND
tdiff
{
args
.
diff_tensor
->
raw_ptr
,
config
.
first
[
1
]};
TensorND
tgrad
{
args
.
grad_tensor
->
raw_ptr
,
config
.
first
[
2
]};
size_t
c_pos
=
1
;
auto
grp
=
args
.
filter_meta
.
group
;
auto
&&
fm
=
args
.
filter_meta
;
auto
strd_flt
=
(
fm
.
icpg
*
fm
.
ocpg
*
fm
.
spatial
[
0
]
*
fm
.
spatial
[
1
]
*
fm
.
spatial
[
2
]
*
tflt
.
layout
.
dtype
.
size
()),
strd_diff
=
(
tdiff
.
layout
.
stride
[
1
]
*
fm
.
ocpg
*
tdiff
.
layout
.
dtype
.
size
()),
strd_grad
=
(
tgrad
.
layout
.
stride
[
1
]
*
fm
.
icpg
*
tgrad
.
layout
.
dtype
.
size
());
for
(
uint32_t
g
=
0
;
g
<
grp
;
++
g
)
{
m_impl
->
exec
(
sub_args
);
incr_voidp
(
tflt
.
raw_ptr
,
strd_flt
);
auto
&&
fm
=
args
.
filter_meta
;
auto
strd_flt
=
(
fm
.
icpg
*
fm
.
ocpg
*
fm
.
spatial
[
0
]
*
fm
.
spatial
[
1
]
*
fm
.
spatial
[
2
]
*
tfilter
.
layout
.
dtype
.
size
()),
strd_diff
=
(
tdiff
.
layout
.
stride
[
c_pos
]
*
fm
.
ocpg
*
tdiff
.
layout
.
dtype
.
size
()),
strd_grad
=
(
tgrad
.
layout
.
stride
[
c_pos
]
*
fm
.
icpg
*
tgrad
.
layout
.
dtype
.
size
());
for
(
uint32_t
g
=
0
;
g
<
grp
;
++
g
)
{
config
.
second
->
exec
(
tfilter
,
tdiff
,
tgrad
,
bundle
.
get_workspace
(
0
));
incr_voidp
(
tfilter
.
raw_ptr
,
strd_flt
);
incr_voidp
(
tdiff
.
raw_ptr
,
strd_diff
);
incr_voidp
(
tgrad
.
raw_ptr
,
strd_grad
);
}
}
}
// vim: syntax=cpp.doxygen
...
...
dnn/src/cuda/convolution3d/backward_filter/algo.cpp
浏览文件 @
4a92346b
...
...
@@ -26,21 +26,7 @@ Convolution3DBackwardFilterImpl::AlgoPack::AlgoPack() {
}
all_algos
.
push_back
(
&
inplace_matmul
);
all_algos
.
reserve
(
all_algos
.
size
()
*
2
);
// add gconv algos by AlgoGroupConvGeneral
auto
all_algos_data
=
all_algos
.
data
();
for
(
size_t
i
=
1
;
i
<
all_algos
.
size
();
++
i
)
{
gconv
.
push_back
({
all_algos
[
i
]});
}
for
(
size_t
i
=
1
;
i
<
all_algos
.
size
();
++
i
)
{
algo2gconv
[
all_algos
[
i
]]
=
&
gconv
[
i
-
1
];
}
for
(
auto
&&
i
:
gconv
)
{
all_algos
.
push_back
(
&
i
);
}
megdnn_assert
(
all_algos_data
==
all_algos
.
data
());
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
0
]);
//group inplace_matmul
all_algos
.
push_back
(
&
group
);
for
(
auto
&&
algo
:
all_algos
)
{
m_all_algos_map
.
emplace
(
algo
->
info
().
desc
,
algo
);
...
...
@@ -64,27 +50,26 @@ Convolution3DBackwardFilterImpl::AlgoPack
Convolution3DBackwardFilterImpl
::
sm_algo_pack
;
Convolution3DBackwardFilterImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
Convolution3DBackwardFilterImpl
*
o
,
const
Convolution3DBackwardFilterImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
:
SizeArgs
(
o
,
src
,
diff
,
o
->
make_canonized_filter_meta
(
src
.
ndim
,
grad
))
SizeArgs
(
o
,
src
,
diff
,
grad
,
o
->
make_canonized_filter_meta
(
src
.
ndim
,
grad
))
{
}
Convolution3DBackwardFilterImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
Convolution3DBackwardFilterImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
CanonizedFilterMeta
&
grad
)
:
handle
{
concrete_handle
(
o
->
handle
())},
const
Convolution3DBackwardFilterImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
)
:
handle
{
concrete_handle
(
o
->
handle
())},
src_layout
{
&
src
},
diff_layout
{
&
diff
},
grad_filter_meta
{
grad
},
opr
{
o
}
{
}
grad_layout
{
&
grad
},
grad_filter_meta
{
grad_meta
},
opr
{
o
}
{}
Convolution3DBackwardFilterImpl
::
AlgoBase
::
ExecArgs
::
ExecArgs
(
Convolution3DBackwardFilterImpl
*
opr
,
const
Convolution3DBackwardFilterImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
...
...
dnn/src/cuda/convolution3d/backward_filter/algo.h
浏览文件 @
4a92346b
...
...
@@ -36,31 +36,34 @@ public:
struct
SizeArgs
{
HandleImpl
*
handle
;
const
TensorLayout
*
src_layout
,
*
diff_layout
;
const
TensorLayout
*
src_layout
,
*
diff_layout
,
*
grad_layout
;
CanonizedFilterMeta
grad_filter_meta
;
Convolution3DBackwardFilterImpl
*
opr
;
const
Convolution3DBackwardFilterImpl
*
opr
;
std
::
string
to_string
()
const
;
void
init_desc
(
convolution3d
::
CUDNNBwdFilterDescs
&
desc
)
const
{
desc
.
set
(
*
src_layout
,
*
diff_layout
,
grad_filter_meta
,
opr
->
param
());
}
SizeArgs
(
Convolution3DBackwardFilterImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
);
SizeArgs
(
Convolution3DBackwardFilterImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
CanonizedFilterMeta
&
grad
);
SizeArgs
(
const
Convolution3DBackwardFilterImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
);
SizeArgs
(
const
Convolution3DBackwardFilterImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
);
convolution3d
::
ForwardSizeArgs
as_fwd_args
()
const
{
return
{
handle
,
src_layout
,
grad_filter_meta
,
diff
_layout
,
opr
->
param
().
data_type
};
return
{
handle
,
src_layout
,
grad
_layout
,
grad_filter_meta
,
diff_layout
,
opr
->
param
().
data_type
};
}
};
struct
ExecArgs
:
public
SizeArgs
{
const
TensorND
*
src_tensor
,
*
diff_tensor
,
*
grad_tensor
;
Workspace
workspace
;
ExecArgs
(
Convolution3DBackwardFilterImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
);
ExecArgs
(
const
Convolution3DBackwardFilterImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
diff
,
_megdnn_
tensor_out
grad
,
_megdnn_
workspace
workspace
);
};
virtual
bool
is_available
(
const
SizeArgs
&
args
)
const
=
0
;
virtual
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
=
0
;
...
...
@@ -162,30 +165,25 @@ public:
//! implement group conv by another algo
class
Convolution3DBackwardFilterImpl
::
AlgoGroupConvGeneral
final
:
public
AlgoBase
{
AlgoBase
*
m_impl
;
std
::
string
m_name
;
public:
AlgoGroupConvGeneral
(
AlgoBase
*
impl
);
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
std
::
vector
<
SearchItem
>
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
override
;
const
char
*
name
()
const
override
{
return
m_name
.
c_str
();
}
const
char
*
name
()
const
override
{
return
"CUDA:GROUP_CONV3D_BACKWARD_FILTER"
;
}
AlgoAttribute
attribute
()
const
override
{
auto
ret
=
static_cast
<
AlgoAttribute
>
(
0
);
if
(
m_impl
->
contain_attribute_all
(
AlgoAttribute
::
REPRODUCIBLE
))
{
ret
|=
AlgoAttribute
::
REPRODUCIBLE
;
}
return
ret
;
return
AlgoAttribute
::
REPRODUCIBLE
;
}
static
void
modify_size_args
(
SizeArgs
&
args
,
TensorLayout
&
src_pg
,
TensorLayout
&
diff_pg
);
MEGDNN_DECL_ALGO_TYPE
(
CUDA_GROUP_CONV_GENERAL
)
private:
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
;
};
class
Convolution3DBackwardFilterImpl
::
AlgoPack
:
NonCopyableObj
{
...
...
@@ -200,8 +198,7 @@ public:
std
::
vector
<
AlgoCUDNN
>
cudnn
;
AlgoInplaceMatmul
inplace_matmul
;
AlgoChanwise
chanwise
;
std
::
vector
<
AlgoGroupConvGeneral
>
gconv
;
std
::
unordered_map
<
AlgoBase
*
,
AlgoGroupConvGeneral
*>
algo2gconv
;
AlgoGroupConvGeneral
group
;
std
::
vector
<
AlgoBase
*>
//! all algorithms
...
...
dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp
浏览文件 @
4a92346b
...
...
@@ -15,70 +15,124 @@ using namespace megdnn;
using
namespace
cuda
;
using
namespace
convolution3d
;
void
Convolution3DBackwardFilterImpl
::
AlgoGroupConvGeneral
::
modify_size_args
(
Convolution3DBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
,
TensorLayout
&
src_pg
,
TensorLayout
&
diff_pg
)
{
src_pg
=
*
args
.
src_layout
;
diff_pg
=
*
args
.
diff_layout
;
namespace
{
std
::
pair
<
TensorLayoutArray
,
Convolution3DBackwardFilterImpl
::
Param
>
sub_opr_config
(
const
Convolution3DBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
grad_layout
->
ndim
>
args
.
src_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
grad_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
grad_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
grad_layout
->
stride
[
idx
]);
}
TensorLayout
grad_pg
(
flt_shape
,
flt_stride
,
args
.
grad_layout
->
dtype
,
args
.
grad_layout
->
format
);
TensorLayout
src_pg
=
*
args
.
src_layout
;
TensorLayout
diff_pg
=
*
args
.
diff_layout
;
auto
nr_grp
=
args
.
grad_filter_meta
.
group
;
args
.
grad_filter_meta
.
group
=
1
;
src_pg
.
shape
[
1
]
/=
nr_grp
;
diff_pg
.
shape
[
1
]
/=
nr_grp
;
args
.
src_layout
=
&
src_pg
;
args
.
diff_layout
=
&
diff_pg
;
size_t
c_pos
=
1
;
src_pg
.
shape
[
c_pos
]
/=
nr_grp
;
diff_pg
.
shape
[
c_pos
]
/=
nr_grp
;
megdnn
::
param
::
Convolution3D
param
=
args
.
opr
->
param
();
param
.
sparse
=
megdnn
::
param
::
Convolution3D
::
Sparse
::
DENSE
;
std
::
pair
<
TensorLayoutArray
,
Convolution3DBackwardFilterImpl
::
Param
>
ret
;
ret
.
first
=
{
src_pg
,
diff_pg
,
grad_pg
};
ret
.
second
=
param
;
return
ret
;
}
Convolution3DBackwardFilterImpl
::
AlgoGroupConvGeneral
::
AlgoGroupConvGeneral
(
AlgoBase
*
impl
)
:
m_impl
{
impl
}
{
m_name
=
"group_conv3d:"
;
m_name
+=
impl
->
name
();
std
::
pair
<
TensorLayoutArray
,
std
::
unique_ptr
<
Convolution3DBackwardFilter
>>
prepare_sub_opr
(
const
Convolution3DBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
auto
conv3d_backfilter_opr
=
args
.
handle
->
create_operator
<
Convolution3DBackwardFilter
>
();
set_execution_policy
<
Convolution3DBackwardFilter
,
Convolution3DBackwardFilter
*>
(
args
.
opr
,
conv3d_backfilter_opr
.
get
());
auto
&&
config
=
sub_opr_config
(
args
);
conv3d_backfilter_opr
->
param
()
=
config
.
second
;
return
{
config
.
first
,
std
::
move
(
conv3d_backfilter_opr
)};
}
}
// namespace
std
::
vector
<
Algorithm
::
SearchItem
>
Convolution3DBackwardFilterImpl
::
AlgoGroupConvGeneral
::
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
{
AlgoBase
::
SizeArgs
args
{
static_cast
<
const
Convolution3DBackwardFilterImpl
*>
(
opr
),
layouts
[
0
],
layouts
[
1
],
layouts
[
2
]};
auto
&&
config
=
sub_opr_config
(
args
);
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
CONVOLUTION3D_BACKWARD_FILTER
,
param_str
,
config
.
first
}};
}
bool
Convolution3DBackwardFilterImpl
::
AlgoGroupConvGeneral
::
is_available
(
const
SizeArgs
&
args
)
const
{
const
SizeArgs
&
args
)
const
{
if
(
args
.
grad_filter_meta
.
group
<=
1
)
return
false
;
auto
sub_args
=
args
;
TensorLayout
src_pg
,
diff_pg
;
modify_size_args
(
sub_args
,
src_pg
,
diff_pg
);
return
m_impl
->
is_available
(
sub_args
);
if
(
args
.
grad_filter_meta
.
format
!=
Param
::
Format
::
NCDHW
)
{
return
false
;
}
auto
config
=
prepare_sub_opr
(
args
);
return
get_algorithm
(
static_cast
<
Convolution3DBackwardFilterImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
}
WorkspaceBundle
Convolution3DBackwardFilterImpl
::
AlgoGroupConvGeneral
::
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
{
auto
config
=
prepare_sub_opr
(
args
);
size_t
sizes
=
config
.
second
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
return
{
ptr
,
{
sizes
}};
}
size_t
Convolution3DBackwardFilterImpl
::
AlgoGroupConvGeneral
::
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
{
auto
sub_args
=
args
;
TensorLayout
src_pg
,
diff_pg
;
modify_size_args
(
sub_args
,
src_pg
,
diff_pg
);
return
m_impl
->
get_workspace_in_bytes
(
sub_args
);
size_t
Convolution3DBackwardFilterImpl
::
AlgoGroupConvGeneral
::
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
{
return
get_workspace_bundle
(
nullptr
,
args
).
total_size_in_bytes
();
}
void
Convolution3DBackwardFilterImpl
::
AlgoGroupConvGeneral
::
exec
(
const
ExecArgs
&
args
)
const
{
auto
sub_args
=
args
;
TensorND
tsrc
{
*
args
.
src_tensor
},
tdiff
{
*
args
.
diff_tensor
},
tgrad
{
*
args
.
grad_tensor
};
modify_size_args
(
sub_args
,
tsrc
.
layout
,
tdiff
.
layout
);
sub_args
.
src_tensor
=
&
tsrc
;
sub_args
.
diff_tensor
=
&
tdiff
;
sub_args
.
grad_tensor
=
&
tgrad
;
auto
&&
fm
=
args
.
grad_filter_meta
;
auto
grp
=
fm
.
group
;
auto
strd_src
=
(
tsrc
.
layout
.
stride
[
1
]
*
fm
.
icpg
*
tsrc
.
layout
.
dtype
.
size
()),
strd_diff
=
(
tdiff
.
layout
.
stride
[
1
]
*
fm
.
ocpg
*
tdiff
.
layout
.
dtype
.
size
()),
strd_grad
=
(
fm
.
icpg
*
fm
.
ocpg
*
fm
.
spatial
[
0
]
*
fm
.
spatial
[
1
]
*
fm
.
spatial
[
2
]
*
tgrad
.
layout
.
dtype
.
size
());
for
(
uint32_t
g
=
0
;
g
<
grp
;
++
g
)
{
m_impl
->
exec
(
sub_args
);
const
ExecArgs
&
args
)
const
{
auto
bundle
=
get_workspace_bundle
(
args
.
workspace
.
raw_ptr
,
args
);
{
auto
config
=
prepare_sub_opr
(
args
);
TensorND
tsrc
{
args
.
src_tensor
->
raw_ptr
,
config
.
first
[
0
]};
TensorND
tdiff
{
args
.
diff_tensor
->
raw_ptr
,
config
.
first
[
1
]};
TensorND
tgrad
{
args
.
grad_tensor
->
raw_ptr
,
config
.
first
[
2
]};
size_t
c_pos
=
1
;
auto
grp
=
args
.
grad_filter_meta
.
group
;
auto
&&
fm
=
args
.
grad_filter_meta
;
auto
strd_src
=
(
tsrc
.
layout
.
stride
[
c_pos
]
*
fm
.
icpg
*
tsrc
.
layout
.
dtype
.
size
()),
strd_diff
=
(
tdiff
.
layout
.
stride
[
c_pos
]
*
fm
.
ocpg
*
tdiff
.
layout
.
dtype
.
size
()),
strd_grad
=
(
fm
.
icpg
*
fm
.
ocpg
*
fm
.
spatial
[
0
]
*
fm
.
spatial
[
1
]
*
fm
.
spatial
[
2
]
*
tgrad
.
layout
.
dtype
.
size
());
for
(
uint32_t
g
=
0
;
g
<
grp
;
++
g
)
{
config
.
second
->
exec
(
tsrc
,
tdiff
,
tgrad
,
bundle
.
get_workspace
(
0
));
incr_voidp
(
tsrc
.
raw_ptr
,
strd_src
);
incr_voidp
(
tdiff
.
raw_ptr
,
strd_diff
);
incr_voidp
(
tgrad
.
raw_ptr
,
strd_grad
);
}
}
}
// vim: syntax=cpp.doxygen
...
...
dnn/src/cuda/convolution3d/forward/algo.cpp
浏览文件 @
4a92346b
...
...
@@ -28,22 +28,7 @@ Convolution3DForwardImpl::AlgoPack::AlgoPack() {
}
all_algos
.
push_back
(
&
inplace_matmul
);
all_algos
.
push_back
(
&
a1x1x1
);
all_algos
.
reserve
(
all_algos
.
size
()
*
2
);
// add gconv algos by AlgoGroupConvGeneral
auto
all_algos_data
=
all_algos
.
data
();
for
(
size_t
i
=
1
;
i
<
all_algos
.
size
();
++
i
)
{
gconv
.
push_back
({
all_algos
[
i
]});
}
for
(
size_t
i
=
1
;
i
<
all_algos
.
size
();
++
i
)
{
algo2gconv
[
all_algos
[
i
]]
=
&
gconv
[
i
-
1
];
}
for
(
auto
&&
i
:
gconv
)
{
all_algos
.
push_back
(
&
i
);
}
megdnn_assert
(
all_algos_data
==
all_algos
.
data
());
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
1
]);
// group inplace_matmul
non_cudnn_algos
.
push_back
(
all_algos
.
rbegin
()[
0
]);
// group 1x1x1
all_algos
.
push_back
(
&
group
);
for
(
auto
&&
algo
:
all_algos
)
{
m_all_algos_map
.
emplace
(
algo
->
info
().
desc
,
algo
);
...
...
@@ -66,28 +51,25 @@ Convolution3DForwardImpl::AlgoPack::cudnn_from_enum(
Convolution3DForwardImpl
::
AlgoPack
Convolution3DForwardImpl
::
sm_algo_pack
;
Convolution3DForwardImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
Convolution3DForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
:
SizeArgs
(
o
,
src
,
o
->
make_canonized_filter_meta
(
src
.
ndim
,
filter
),
dst
)
{
}
const
Convolution3DForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
:
SizeArgs
(
o
,
src
,
filter
,
o
->
make_canonized_filter_meta
(
src
.
ndim
,
filter
),
dst
)
{}
Convolution3DForwardImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
Convolution3DForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
dst
)
:
ForwardSizeArgs
{
concrete_handle
(
o
->
handle
()),
&
src
,
filter
,
&
dst
,
o
->
param
().
data_type
},
opr
{
o
}
{
}
const
Convolution3DForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
dst
)
:
ForwardSizeArgs
{
concrete_handle
(
o
->
handle
()),
&
src
,
&
filter
,
filter_meta
,
&
dst
,
o
->
param
().
data_type
},
opr
{
o
}
{}
Convolution3DForwardImpl
::
AlgoBase
::
ExecArgs
::
ExecArgs
(
Convolution3DForwardImpl
*
opr
,
const
Convolution3DForwardImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
...
...
dnn/src/cuda/convolution3d/forward/algo.h
浏览文件 @
4a92346b
...
...
@@ -48,22 +48,24 @@ public:
AlgoBase
()
:
Algorithm
()
{
m_handle_type
=
Handle
::
HandleType
::
CUDA
;
}
struct
SizeArgs
:
public
convolution3d
::
ForwardSizeArgs
{
Convolution3DForwardImpl
*
opr
;
const
Convolution3DForwardImpl
*
opr
;
std
::
string
to_string
()
const
;
void
init_desc
(
convolution3d
::
CUDNNForwardDescs
&
desc
)
const
{
desc
.
set
(
*
src_layout
,
filter_meta
,
*
dst_layout
,
opr
->
param
());
}
SizeArgs
(
Convolution3DForwardImpl
*
opr
,
const
TensorLayout
&
src
,
SizeArgs
(
const
Convolution3DForwardImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
);
SizeArgs
(
Convolution3DForwardImpl
*
opr
,
const
TensorLayout
&
src
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
dst
);
SizeArgs
(
const
Convolution3DForwardImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
dst
);
};
struct
ExecArgs
:
public
SizeArgs
{
const
TensorND
*
src_tensor
,
*
filter_tensor
,
*
dst_tensor
;
Workspace
workspace
;
ExecArgs
(
Convolution3DForwardImpl
*
opr
,
_megdnn_tensor_in
src
,
ExecArgs
(
const
Convolution3DForwardImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
);
};
...
...
@@ -114,35 +116,22 @@ public:
//! implement group conv by another algo
class
Convolution3DForwardImpl
::
AlgoGroupConvGeneral
final
:
public
AlgoBase
{
AlgoBase
*
m_impl
;
std
::
string
m_name
;
public:
AlgoGroupConvGeneral
(
AlgoBase
*
impl
);
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
std
::
vector
<
SearchItem
>
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
override
;
const
char
*
name
()
const
override
{
return
m_name
.
c_str
()
;
}
const
char
*
name
()
const
override
{
return
"CUDA:GROUP_CONV3D_FORWARD"
;
}
AlgoAttribute
attribute
()
const
override
{
auto
ret
=
AlgoAttribute
::
DEFAULT
;
if
(
m_impl
->
contain_attribute_all
(
AlgoAttribute
::
REPRODUCIBLE
))
{
ret
|=
AlgoAttribute
::
REPRODUCIBLE
;
}
#define cb(attr) \
if (m_impl->contain_attribute_all(attr)) { \
ret |= attr; \
}
MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE
(
cb
)
#undef cb
return
ret
;
return
AlgoAttribute
::
REPRODUCIBLE
;
}
static
void
modify_size_args
(
SizeArgs
&
args
,
TensorLayout
&
src_pg
,
TensorLayout
&
dst_pg
);
MEGDNN_DECL_ALGO_TYPE
(
CUDA_GROUP_CONV_GENERAL
)
private:
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
;
};
class
Convolution3DForwardImpl
::
AlgoCUDNN
final
:
public
AlgoBase
{
...
...
@@ -226,8 +215,7 @@ public:
Algo1x1x1
a1x1x1
;
AlgoInplaceMatmul
inplace_matmul
;
AlgoChanwise
chanwise
;
std
::
vector
<
AlgoGroupConvGeneral
>
gconv
;
std
::
unordered_map
<
AlgoBase
*
,
AlgoGroupConvGeneral
*>
algo2gconv
;
AlgoGroupConvGeneral
group
;
std
::
vector
<
AlgoBase
*>
//! all algorithms
...
...
dnn/src/cuda/convolution3d/forward/group_conv.cpp
浏览文件 @
4a92346b
...
...
@@ -15,61 +15,110 @@ using namespace megdnn;
using
namespace
cuda
;
using
namespace
convolution3d
;
void
Convolution3DForwardImpl
::
AlgoGroupConvGeneral
::
modify_size_args
(
Convolution3DForwardImpl
::
AlgoBase
::
SizeArgs
&
args
,
TensorLayout
&
src_pg
,
TensorLayout
&
dst_pg
)
{
src_pg
=
*
args
.
src_layout
;
dst_pg
=
*
args
.
dst_layout
;
namespace
{
std
::
pair
<
TensorLayoutArray
,
Convolution3DForwardImpl
::
Param
>
sub_opr_config
(
const
Convolution3DForwardImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
TensorLayout
src_pg
=
*
args
.
src_layout
;
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
// check if the first dim is group
if
(
args
.
filter_layout
->
ndim
>
args
.
src_layout
->
ndim
)
++
idx
;
for
(;
idx
<
args
.
filter_layout
->
ndim
;
++
idx
)
{
flt_shape
.
push_back
(
args
.
filter_layout
->
shape
[
idx
]);
flt_stride
.
push_back
(
args
.
filter_layout
->
stride
[
idx
]);
}
TensorLayout
filter_pg
(
flt_shape
,
flt_stride
,
args
.
filter_layout
->
dtype
,
args
.
filter_layout
->
format
);
TensorLayout
dst_pg
=
*
args
.
dst_layout
;
auto
nr_grp
=
args
.
filter_meta
.
group
;
args
.
filter_meta
.
group
=
1
;
size_t
c_pos
;
if
(
args
.
filter_meta
.
format
==
Param
::
Format
::
NCDHW
)
{
if
(
args
.
filter_meta
.
format
==
param
::
Convolution3D
::
Format
::
NCDHW
)
{
c_pos
=
1
;
}
else
{
megdnn_assert
(
args
.
filter_meta
.
format
==
Param
::
Format
::
NDHWC
,
megdnn_assert
(
args
.
filter_meta
.
format
==
param
::
Convolution3D
::
Format
::
NDHWC
,
"invalid conv format"
);
c_pos
=
4
;
}
src_pg
.
shape
[
c_pos
]
/=
nr_grp
;
dst_pg
.
shape
[
c_pos
]
/=
nr_grp
;
args
.
src_layout
=
&
src_pg
;
args
.
dst_layout
=
&
dst_pg
;
megdnn
::
param
::
Convolution3D
param
=
args
.
opr
->
param
();
param
.
sparse
=
megdnn
::
param
::
Convolution3D
::
Sparse
::
DENSE
;
std
::
pair
<
TensorLayoutArray
,
Convolution3DForwardImpl
::
Param
>
ret
;
ret
.
first
=
{
src_pg
,
filter_pg
,
dst_pg
};
ret
.
second
=
param
;
return
ret
;
}
Convolution3DForwardImpl
::
AlgoGroupConvGeneral
::
AlgoGroupConvGeneral
(
AlgoBase
*
impl
)
:
m_impl
{
impl
}
{
m_name
=
"group_conv3d:"
;
m_name
+=
impl
->
name
();
std
::
pair
<
TensorLayoutArray
,
std
::
unique_ptr
<
Convolution3DForward
>>
prepare_sub_opr
(
const
Convolution3DForwardImpl
::
AlgoBase
::
SizeArgs
&
args
)
{
auto
conv3d_opr
=
args
.
handle
->
create_operator
<
Convolution3D
>
();
set_execution_policy
<
Convolution3DForward
,
Convolution3DForward
*>
(
args
.
opr
,
conv3d_opr
.
get
());
auto
&&
config
=
sub_opr_config
(
args
);
conv3d_opr
->
param
()
=
config
.
second
;
return
{
config
.
first
,
std
::
move
(
conv3d_opr
)};
}
}
// namespace
std
::
vector
<
Algorithm
::
SearchItem
>
Convolution3DForwardImpl
::
AlgoGroupConvGeneral
::
get_subopr_list
(
const
TensorLayoutArray
&
layouts
,
const
OperatorBase
*
opr
)
const
{
AlgoBase
::
SizeArgs
args
{
static_cast
<
const
Convolution3DForwardImpl
*>
(
opr
),
layouts
[
0
],
layouts
[
1
],
layouts
[
2
]};
auto
&&
config
=
sub_opr_config
(
args
);
std
::
string
param_str
;
Algorithm
::
serialize_write_pod
(
config
.
second
,
param_str
);
return
{{
Algorithm
::
OprType
::
CONVOLUTION3D_FORWARD
,
param_str
,
config
.
first
}};
}
bool
Convolution3DForwardImpl
::
AlgoGroupConvGeneral
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
args
.
filter_meta
.
group
<=
1
)
return
false
;
auto
sub_args
=
args
;
TensorLayout
src_pg
,
dst_pg
;
modify_size_args
(
sub_args
,
src_pg
,
dst_pg
);
return
m_impl
->
is_available
(
sub_args
);
if
(
args
.
filter_meta
.
format
!=
Param
::
Format
::
NCDHW
&&
args
.
filter_meta
.
format
!=
Param
::
Format
::
NDHWC
)
{
return
false
;
}
auto
config
=
prepare_sub_opr
(
args
);
return
get_algorithm
(
static_cast
<
Convolution3DForwardImpl
*>
(
config
.
second
.
get
()),
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
}
WorkspaceBundle
Convolution3DForwardImpl
::
AlgoGroupConvGeneral
::
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
{
auto
config
=
prepare_sub_opr
(
args
);
size_t
sizes
=
config
.
second
->
get_workspace_in_bytes
(
config
.
first
[
0
],
config
.
first
[
1
],
config
.
first
[
2
]);
return
{
ptr
,
{
sizes
}};
}
size_t
Convolution3DForwardImpl
::
AlgoGroupConvGeneral
::
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
{
auto
sub_args
=
args
;
TensorLayout
src_pg
,
dst_pg
;
modify_size_args
(
sub_args
,
src_pg
,
dst_pg
);
return
m_impl
->
get_workspace_in_bytes
(
sub_args
);
const
SizeArgs
&
args
)
const
{
return
get_workspace_bundle
(
nullptr
,
args
).
total_size_in_bytes
();
}
void
Convolution3DForwardImpl
::
AlgoGroupConvGeneral
::
exec
(
const
ExecArgs
&
args
)
const
{
auto
sub_args
=
args
;
TensorND
tsrc
{
*
args
.
src_tensor
},
tdst
{
*
args
.
dst_tensor
},
tflt
{
*
args
.
filter_tensor
};
modify_size_args
(
sub_args
,
tsrc
.
layout
,
tdst
.
layout
);
sub_args
.
src_tensor
=
&
tsrc
;
sub_args
.
dst_tensor
=
&
tdst
;
sub_args
.
filter_tensor
=
&
tflt
;
const
ExecArgs
&
args
)
const
{
auto
bundle
=
get_workspace_bundle
(
args
.
workspace
.
raw_ptr
,
args
);
{
auto
config
=
prepare_sub_opr
(
args
);
TensorND
tsrc
{
args
.
src_tensor
->
raw_ptr
,
config
.
first
[
0
]};
TensorND
tfilter
{
args
.
filter_tensor
->
raw_ptr
,
config
.
first
[
1
]};
TensorND
tdst
{
args
.
dst_tensor
->
raw_ptr
,
config
.
first
[
2
]};
size_t
c_pos
;
if
(
args
.
filter_meta
.
format
==
Param
::
Format
::
NCDHW
)
{
...
...
@@ -82,17 +131,20 @@ void Convolution3DForwardImpl::AlgoGroupConvGeneral::exec(
auto
grp
=
args
.
filter_meta
.
group
;
auto
&&
fm
=
args
.
filter_meta
;
auto
strd_src
=
tsrc
.
layout
.
stride
[
c_pos
]
*
fm
.
icpg
*
tsrc
.
layout
.
dtype
.
size
(),
strd_dst
=
tdst
.
layout
.
stride
[
c_pos
]
*
fm
.
ocpg
*
tdst
.
layout
.
dtype
.
size
(),
strd_flt
=
fm
.
icpg
*
fm
.
ocpg
*
fm
.
spatial
[
0
]
*
fm
.
spatial
[
1
]
*
fm
.
spatial
[
2
]
*
tflt
.
layout
.
dtype
.
size
();
for
(
uint32_t
g
=
0
;
g
<
grp
;
++
g
)
{
m_impl
->
exec
(
sub_args
);
auto
&&
fm
=
args
.
filter_meta
;
auto
strd_src
=
tsrc
.
layout
.
stride
[
c_pos
]
*
fm
.
icpg
*
tsrc
.
layout
.
dtype
.
size
(),
strd_dst
=
tdst
.
layout
.
stride
[
c_pos
]
*
fm
.
ocpg
*
tdst
.
layout
.
dtype
.
size
(),
strd_flt
=
fm
.
icpg
*
fm
.
ocpg
*
fm
.
spatial
[
0
]
*
fm
.
spatial
[
1
]
*
fm
.
spatial
[
2
]
*
tfilter
.
layout
.
dtype
.
size
();
for
(
uint32_t
g
=
0
;
g
<
grp
;
++
g
)
{
config
.
second
->
exec
(
tsrc
,
tfilter
,
tdst
,
bundle
.
get_workspace
(
0
));
incr_voidp
(
tsrc
.
raw_ptr
,
strd_src
);
incr_voidp
(
tdst
.
raw_ptr
,
strd_dst
);
incr_voidp
(
tflt
.
raw_ptr
,
strd_flt
);
incr_voidp
(
tfilter
.
raw_ptr
,
strd_flt
);
}
}
}
...
...
dnn/src/cuda/convolution3d/helper.h
浏览文件 @
4a92346b
...
...
@@ -26,6 +26,7 @@ namespace convolution3d {
struct
ForwardSizeArgs
{
HandleImpl
*
handle
;
const
TensorLayout
*
src_layout
;
const
TensorLayout
*
filter_layout
;
CanonizedFilterMeta
filter_meta
;
const
TensorLayout
*
dst_layout
;
param
::
Convolution3D
::
DataType
data_type
;
...
...
dnn/src/cuda/convolution3d/opr_impl.cpp
浏览文件 @
4a92346b
...
...
@@ -35,16 +35,6 @@ Convolution3DForwardImpl::get_algorithm_heuristic(
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
auto
fm
=
check_layout_fwd
(
src
,
filter
,
dst
);
return
get_algorithm_heuristic
(
src
,
fm
,
dst
,
workspace_limit_in_bytes
,
positive_attr
,
negative_attr
);
}
Convolution3DForwardImpl
::
Algorithm
*
Convolution3DForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
filter
,
dst
);
#if CUDNN_MAJOR < 7 || (CUDNN_MAJOR == 7 && CUDNN_MINOR < 5)
...
...
@@ -98,18 +88,14 @@ Convolution3DForwardImpl::get_algorithm_heuristic(
if
(
auto
algo
=
get_cudnn_algo
())
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
)
{
auto
orig_args
=
args
;
TensorLayout
a
,
b
;
AlgoGroupConvGeneral
::
modify_size_args
(
args
,
a
,
b
);
if
(
prefer_1x1x1
())
{
return
sm_algo_pack
.
algo2gconv
.
at
(
&
sm_algo_pack
.
a1x1x1
);
}
if
(
is_cudnn_supported
(
args
))
{
if
(
auto
algo
=
get_cudnn_algo
())
return
sm_algo_pack
.
algo2gconv
.
at
(
algo
);
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
Convolution3DForwardImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
))
{
return
algo
;
}
args
=
orig_args
;
}
return
megdnn
::
get_algo_match_attribute
<
Convolution3DForwardImpl
>
(
...
...
@@ -129,7 +115,7 @@ size_t Convolution3DForwardImpl::get_workspace_in_bytes(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
filter
,
dst
);
return
get_algorithm
(
this
,
src
,
args
.
filter_meta
,
dst
)
return
get_algorithm
(
this
,
src
,
filter
,
dst
)
->
get_workspace_in_bytes
(
args
);
}
...
...
@@ -138,7 +124,7 @@ void Convolution3DForwardImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
{
AlgoBase
::
ExecArgs
args
(
this
,
src
,
filter
,
dst
,
workspace
);
auto
algo
=
get_algorithm
(
this
,
src
.
layout
,
args
.
filter_meta
,
dst
.
layout
);
auto
algo
=
get_algorithm
(
this
,
src
.
layout
,
filter
.
layout
,
dst
.
layout
);
algo
->
check_workspace
(
args
,
workspace
).
exec
(
args
);
}
...
...
@@ -151,7 +137,7 @@ void Convolution3DBackwardDataImpl::exec(_megdnn_tensor_in filter,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
{
AlgoBase
::
ExecArgs
args
(
this
,
filter
,
diff
,
grad
,
workspace
);
auto
algo
=
get_algorithm
(
this
,
args
.
filter_meta
,
diff
.
layout
,
grad
.
layout
);
auto
algo
=
get_algorithm
(
this
,
filter
.
layout
,
diff
.
layout
,
grad
.
layout
);
algo
->
check_workspace
(
args
,
workspace
).
exec
(
args
);
}
...
...
@@ -169,17 +155,6 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic(
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
auto
fm
=
check_layout_fwd
(
grad
,
filter
,
diff
);
return
get_algorithm_heuristic
(
fm
,
diff
,
grad
,
workspace_limit_in_bytes
,
positive_attr
,
negative_attr
);
}
Convolution3DBackwardDataImpl
::
Algorithm
*
Convolution3DBackwardDataImpl
::
get_algorithm_heuristic
(
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
filter
,
diff
,
grad
);
if
(
args
.
filter_meta
.
group
>
1
&&
...
...
@@ -215,14 +190,11 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic(
}
if
(
args
.
filter_meta
.
group
>
1
)
{
auto
orig_args
=
args
;
TensorLayout
a
,
b
;
AlgoGroupConvGeneral
::
modify_size_args
(
args
,
a
,
b
);
if
(
is_cudnn_supported
(
args
.
as_fwd_args
()))
{
if
(
auto
algo
=
get_cudnn_algo
())
return
sm_algo_pack
.
algo2gconv
.
at
(
algo
);
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
Convolution3DBackwardDataImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
))
{
return
algo
;
}
args
=
orig_args
;
}
return
megdnn
::
get_algo_match_attribute
<
Convolution3DBackwardDataImpl
>
(
...
...
@@ -234,7 +206,7 @@ size_t Convolution3DBackwardDataImpl::get_workspace_in_bytes(
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
{
AlgoBase
::
SizeArgs
args
(
this
,
filter
,
diff
,
grad
);
return
get_algorithm
(
this
,
args
.
filter_meta
,
diff
,
grad
)
return
get_algorithm
(
this
,
filter
,
diff
,
grad
)
->
get_workspace_in_bytes
(
args
);
}
...
...
@@ -248,7 +220,7 @@ void Convolution3DBackwardFilterImpl::exec(_megdnn_tensor_in src,
_megdnn_workspace
workspace
)
{
AlgoBase
::
ExecArgs
args
(
this
,
src
,
diff
,
grad
,
workspace
);
auto
algo
=
get_algorithm
(
this
,
src
.
layout
,
diff
.
layout
,
args
.
grad_filter_meta
);
get_algorithm
(
this
,
src
.
layout
,
diff
.
layout
,
grad
.
layout
);
algo
->
check_workspace
(
args
,
workspace
).
exec
(
args
);
}
...
...
@@ -266,17 +238,6 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic(
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
auto
fm
=
check_layout_fwd
(
src
,
grad
,
diff
);
return
get_algorithm_heuristic
(
src
,
diff
,
fm
,
workspace_limit_in_bytes
,
positive_attr
,
negative_attr
);
}
Convolution3DBackwardFilterImpl
::
Algorithm
*
Convolution3DBackwardFilterImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
CanonizedFilterMeta
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
diff
,
grad
);
if
(
args
.
grad_filter_meta
.
group
>
1
&&
...
...
@@ -310,15 +271,13 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic(
if
(
auto
algo
=
get_cudnn_algo
())
return
algo
;
}
if
(
args
.
grad_filter_meta
.
group
>
1
)
{
auto
orig_args
=
args
;
TensorLayout
a
,
b
;
AlgoGroupConvGeneral
::
modify_size_args
(
args
,
a
,
b
);
if
(
is_cudnn_supported
(
args
.
as_fwd_args
()))
{
if
(
auto
algo
=
get_cudnn_algo
())
return
sm_algo_pack
.
algo2gconv
.
at
(
algo
);
if
(
auto
algo
=
megdnn
::
get_algo_match_attribute
<
Convolution3DBackwardFilterImpl
>
(
&
sm_algo_pack
.
group
,
positive_attr
,
negative_attr
))
{
return
algo
;
}
args
=
orig_args
;
}
return
megdnn
::
get_algo_match_attribute
<
Convolution3DBackwardFilterImpl
>
(
...
...
@@ -330,7 +289,7 @@ size_t Convolution3DBackwardFilterImpl::get_workspace_in_bytes(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
diff
,
grad
);
return
get_algorithm
(
this
,
src
,
diff
,
args
.
grad_filter_meta
)
return
get_algorithm
(
this
,
src
,
diff
,
grad
)
->
get_workspace_in_bytes
(
args
);
}
...
...
dnn/src/cuda/convolution3d/opr_impl.h
浏览文件 @
4a92346b
...
...
@@ -21,17 +21,6 @@ public:
using
Convolution3DForward
::
Convolution3DForward
;
void
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
override
;
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
src
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
return
get_algorithm_heuristic
(
src
,
filter
,
dst
,
workspace_limit_in_bytes
,
positive_attr
,
negative_attr
)
->
info
();
}
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
override
;
...
...
@@ -57,13 +46,6 @@ protected:
const
AlgoAttribute
&
negative_attr
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
);
static
AlgoPack
sm_algo_pack
;
};
...
...
@@ -72,16 +54,6 @@ public:
using
Convolution3DBackwardData
::
Convolution3DBackwardData
;
void
exec
(
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
override
;
AlgorithmInfo
get_algorithm_info_heuristic
(
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
return
get_algorithm_heuristic
(
filter
,
diff
,
grad
,
workspace_limit_in_bytes
,
positive_attr
,
negative_attr
)
->
info
();
}
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
override
;
...
...
@@ -109,13 +81,6 @@ protected:
const
AlgoAttribute
&
negative_attr
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
);
static
AlgoPack
sm_algo_pack
;
};
...
...
@@ -127,17 +92,6 @@ public:
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
override
;
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
CanonizedFilterMeta
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
)
{
return
get_algorithm_heuristic
(
src
,
diff
,
grad
,
workspace_limit_in_bytes
,
positive_attr
,
negative_attr
)
->
info
();
}
const
char
*
get_algorithm_set_name
()
const
override
;
class
AlgoBase
;
...
...
@@ -162,13 +116,6 @@ protected:
const
AlgoAttribute
&
negative_attr
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
CanonizedFilterMeta
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
positive_attr
,
const
AlgoAttribute
&
negative_attr
);
static
AlgoPack
sm_algo_pack
;
};
}
// namespace cuda
...
...
dnn/test/cuda/group_conv3d.cpp
浏览文件 @
4a92346b
...
...
@@ -66,12 +66,10 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD_1x1x1) {
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
ID
,
size_t
IH
,
size_t
IW
,
size_t
FD
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
group
)
{
Checker
<
Convolution3D
>
checker
(
handle_cuda
());
#if CUDNN_MAJOR <= 6
bool
require_algo
=
true
;
checker
.
set_before_exec_callback
(
AlgoChecker
<
Convolution3DForward
>
{
"group_conv3d:1x1x1"
,
&
require_algo
});
#endif
checker
.
set_before_exec_callback
(
AlgoChecker
<
Convolution3DForward
>
(
ExecutionPolicyAlgoName
{
"CUDA:GROUP_CONV3D_FORWARD"
,
{{
"1x1x1"
,
{}}}}));
Convolution3D
::
Param
param
;
param
.
sparse
=
Convolution3D
::
Param
::
Sparse
::
GROUP
;
auto
ICg
=
IC
/
group
;
...
...
@@ -125,6 +123,45 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_DATA) {
run
(
2
,
32
,
64
,
64
,
64
,
3
,
3
,
3
,
32
,
62
,
62
,
62
,
0
,
0
,
0
,
1
,
1
,
1
,
4
);
}
TEST_F
(
CUDA
,
GROUP_CONVOLUTION3D_BACKWARD_DATA_CUDNN
)
{
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
ID
,
size_t
IH
,
size_t
IW
,
size_t
FD
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
OD
,
size_t
OH
,
size_t
OW
,
size_t
PD
,
size_t
PH
,
size_t
PW
,
size_t
SD
,
size_t
SH
,
size_t
SW
,
size_t
group
)
{
Checker
<
Convolution3DBackwardData
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
AlgoChecker
<
Convolution3DBackwardData
>
(
ExecutionPolicyAlgoName
{
"CUDA:GROUP_CONV3D_BACKWARD_DATA"
,
{{
"CUDNN"
,
{}}}}));
Convolution3DBackwardData
::
Param
param
;
param
.
sparse
=
Convolution3D
::
Param
::
Sparse
::
GROUP
;
param
.
pad_d
=
PD
;
param
.
pad_h
=
PH
;
param
.
pad_w
=
PW
;
param
.
stride_d
=
SD
;
param
.
stride_h
=
SH
;
param
.
stride_w
=
SW
;
auto
ICg
=
IC
/
group
;
auto
OCg
=
OC
/
group
;
checker
.
set_param
(
param
).
exec
({{
group
,
OCg
,
ICg
,
FD
,
FH
,
FW
},
{
N
,
OC
,
OD
,
OH
,
OW
},
{
N
,
IC
,
ID
,
IH
,
IW
}});
};
// bug case in prev ver
run
(
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
1
,
1
,
3
,
0
,
0
,
1
,
1
,
1
,
1
,
2
);
run
(
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
1
,
1
,
2
,
0
,
0
,
1
,
1
,
1
,
2
,
2
);
run
(
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
1
,
2
,
1
,
0
,
1
,
0
,
1
,
2
,
1
,
2
);
run
(
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
1
,
1
,
1
,
0
,
0
,
2
,
1
,
1
,
2
);
// normal case
run
(
2
,
64
,
7
,
7
,
7
,
3
,
3
,
3
,
32
,
5
,
5
,
5
,
0
,
0
,
0
,
1
,
1
,
1
,
2
);
// padded case
run
(
2
,
32
,
7
,
7
,
7
,
3
,
3
,
3
,
64
,
7
,
7
,
7
,
1
,
1
,
1
,
1
,
1
,
1
,
4
);
// strided case
run
(
2
,
32
,
7
,
7
,
7
,
3
,
3
,
3
,
64
,
3
,
3
,
3
,
0
,
0
,
0
,
2
,
2
,
2
,
8
);
// bigger case
run
(
2
,
32
,
64
,
64
,
64
,
3
,
3
,
3
,
32
,
62
,
62
,
62
,
0
,
0
,
0
,
1
,
1
,
1
,
4
);
}
TEST_F
(
CUDA
,
GROUP_CONVOLUTION3D_BACKWARD_FILTER
)
{
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
ID
,
size_t
IH
,
size_t
IW
,
size_t
FD
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
OD
,
...
...
@@ -153,6 +190,39 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) {
run
(
2
,
32
,
7
,
7
,
7
,
3
,
3
,
3
,
64
,
3
,
3
,
3
,
0
,
0
,
0
,
2
,
2
,
2
,
8
);
}
TEST_F
(
CUDA
,
GROUP_CONVOLUTION3D_BACKWARD_FILTER_CUDNN
)
{
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
ID
,
size_t
IH
,
size_t
IW
,
size_t
FD
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
OD
,
size_t
OH
,
size_t
OW
,
size_t
PD
,
size_t
PH
,
size_t
PW
,
size_t
SD
,
size_t
SH
,
size_t
SW
,
size_t
group
)
{
Checker
<
Convolution3DBackwardFilter
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
AlgoChecker
<
Convolution3DBackwardFilter
>
(
ExecutionPolicyAlgoName
{
"CUDA:GROUP_CONV3D_BACKWARD_FILTER"
,
{{
"CUDNN"
,
{}}}}));
Convolution3DBackwardFilter
::
Param
param
;
param
.
sparse
=
Convolution3D
::
Param
::
Sparse
::
GROUP
;
param
.
pad_d
=
PD
;
param
.
pad_h
=
PH
;
param
.
pad_w
=
PW
;
param
.
stride_d
=
SD
;
param
.
stride_h
=
SH
;
param
.
stride_w
=
SW
;
auto
ICg
=
IC
/
group
;
auto
OCg
=
OC
/
group
;
checker
.
set_param
(
param
).
exec
({{
N
,
IC
,
ID
,
IH
,
IW
},
{
N
,
OC
,
OD
,
OH
,
OW
},
{
group
,
OCg
,
ICg
,
FD
,
FH
,
FW
}});
};
// normal case
run
(
2
,
64
,
7
,
7
,
7
,
3
,
3
,
3
,
32
,
5
,
5
,
5
,
0
,
0
,
0
,
1
,
1
,
1
,
2
);
// padded case
run
(
2
,
32
,
7
,
7
,
7
,
3
,
3
,
3
,
64
,
7
,
7
,
7
,
1
,
1
,
1
,
1
,
1
,
1
,
4
);
// strided case
run
(
2
,
32
,
7
,
7
,
7
,
3
,
3
,
3
,
64
,
3
,
3
,
3
,
0
,
0
,
0
,
2
,
2
,
2
,
8
);
}
}
// namespace test
}
// namespace megdnn
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录