Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
d915c5a3
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
d915c5a3
编写于
6月 24, 2021
作者:
M
Megvii Engine Team
提交者:
huangxinda
7月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(mgb): make convolution3D handle noncontiguous tensors
GitOrigin-RevId: 3d3c31b02161532637948ba9aec42d161ec05e92
上级
d04cd67f
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
175 addition
and
21 deletion
+175
-21
dnn/src/common/convolution3d.cpp
dnn/src/common/convolution3d.cpp
+20
-5
dnn/src/cuda/convolution/backward_data/group_conv.cpp
dnn/src/cuda/convolution/backward_data/group_conv.cpp
+2
-0
dnn/src/cuda/convolution/backward_filter/chanwise.cpp
dnn/src/cuda/convolution/backward_filter/chanwise.cpp
+1
-1
dnn/src/cuda/convolution/backward_filter/group_conv.cpp
dnn/src/cuda/convolution/backward_filter/group_conv.cpp
+2
-0
dnn/src/cuda/convolution3d/backward_data/algo.cpp
dnn/src/cuda/convolution3d/backward_data/algo.cpp
+1
-1
dnn/src/cuda/convolution3d/backward_data/chanwise.cpp
dnn/src/cuda/convolution3d/backward_data/chanwise.cpp
+4
-0
dnn/src/cuda/convolution3d/backward_data/group_conv.cpp
dnn/src/cuda/convolution3d/backward_data/group_conv.cpp
+2
-0
dnn/src/cuda/convolution3d/backward_filter/algo.cpp
dnn/src/cuda/convolution3d/backward_filter/algo.cpp
+1
-1
dnn/src/cuda/convolution3d/backward_filter/chanwise.cpp
dnn/src/cuda/convolution3d/backward_filter/chanwise.cpp
+4
-0
dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp
dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp
+2
-0
dnn/src/cuda/convolution3d/backward_filter/inplace_matmul.cpp
...src/cuda/convolution3d/backward_filter/inplace_matmul.cpp
+4
-0
dnn/src/cuda/convolution3d/forward/algo.cpp
dnn/src/cuda/convolution3d/forward/algo.cpp
+1
-1
dnn/src/cuda/convolution3d/forward/chanwise.cpp
dnn/src/cuda/convolution3d/forward/chanwise.cpp
+4
-0
dnn/src/cuda/convolution3d/forward/group_conv.cpp
dnn/src/cuda/convolution3d/forward/group_conv.cpp
+2
-0
dnn/src/naive/convolution3d/helper.h
dnn/src/naive/convolution3d/helper.h
+0
-2
dnn/test/cuda/convolution.cpp
dnn/test/cuda/convolution.cpp
+0
-10
dnn/test/cuda/convolution3d.cpp
dnn/test/cuda/convolution3d.cpp
+125
-0
未找到文件。
dnn/src/common/convolution3d.cpp
浏览文件 @
d915c5a3
...
...
@@ -122,8 +122,6 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBase::deduce_layout_fwd(
TensorLayout
&
dst
)
const
{
auto
errmsg
=
[
&
]()
{
return
get_errmsg
(
src
,
filter
,
dst
,
param
());
};
MEGDNN_MARK_USED_VAR
(
errmsg
);
megdnn_assert_contiguous
(
src
);
megdnn_assert_contiguous
(
filter
);
megdnn_assert
(
src
.
ndim
>=
5
_z
,
"%s"
,
errmsg
().
c_str
());
megdnn_assert
(
src
.
dtype
==
filter
.
dtype
,
"%s"
,
errmsg
().
c_str
());
if
(
param
().
data_type
==
Param
::
DataType
::
FLOAT
)
{
...
...
@@ -170,6 +168,8 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBase::deduce_layout_fwd(
Convolution3DBase
::
CanonizedFilterMeta
Convolution3DBase
::
check_layout_fwd
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
const
{
megdnn_assert_contiguous
(
src
);
megdnn_assert_contiguous
(
filter
);
TensorLayout
dst_expected
;
auto
ret
=
deduce_layout_fwd
(
src
,
filter
,
dst_expected
);
megdnn_assert_eq_layout
(
dst_expected
,
dst
);
...
...
@@ -185,7 +185,12 @@ void Convolution3DForward::deduce_layout(const TensorLayout& src,
Convolution3DBase
::
CanonizedFilterMeta
Convolution3DForward
::
check_exec
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_in_bytes
)
{
auto
ret
=
check_layout_fwd
(
src
,
filter
,
dst
);
auto
src_fwd
=
src
;
auto
dst_fwd
=
dst
;
src_fwd
.
init_contiguous_stride
();
dst_fwd
.
init_contiguous_stride
();
auto
ret
=
check_layout_fwd
(
src_fwd
,
filter
,
dst_fwd
);
auto
required_workspace_in_bytes
=
get_workspace_in_bytes
(
src
,
filter
,
dst
);
megdnn_assert
(
workspace_in_bytes
>=
required_workspace_in_bytes
);
return
ret
;
...
...
@@ -196,7 +201,12 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBackwardData::check_exec(
const
TensorLayout
&
grad
,
size_t
workspace_in_bytes
)
{
megdnn_assert
(
param
().
data_type
==
Param
::
DataType
::
FLOAT
,
"only float type is supported for conv backward"
);
auto
ret
=
check_layout_fwd
(
grad
,
filter
,
diff
);
auto
diff_fwd
=
diff
;
auto
grad_fwd
=
grad
;
diff_fwd
.
init_contiguous_stride
();
grad_fwd
.
init_contiguous_stride
();
auto
ret
=
check_layout_fwd
(
grad_fwd
,
filter
,
diff_fwd
);
auto
required_workspace_in_bytes
=
get_workspace_in_bytes
(
filter
,
diff
,
grad
);
megdnn_assert
(
workspace_in_bytes
>=
required_workspace_in_bytes
);
...
...
@@ -244,7 +254,12 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBackwardFilter::check_exec(
const
TensorLayout
&
grad
,
size_t
workspace_in_bytes
)
{
megdnn_assert
(
param
().
data_type
==
Param
::
DataType
::
FLOAT
,
"only float type is supported for conv backward"
);
auto
ret
=
check_layout_fwd
(
src
,
grad
,
diff
);
auto
src_fwd
=
src
;
auto
diff_fwd
=
diff
;
src_fwd
.
init_contiguous_stride
();
diff_fwd
.
init_contiguous_stride
();
auto
ret
=
check_layout_fwd
(
src_fwd
,
grad
,
diff_fwd
);
auto
required_workspace_in_bytes
=
get_workspace_in_bytes
(
src
,
diff
,
grad
);
megdnn_assert
(
workspace_in_bytes
>=
required_workspace_in_bytes
);
return
ret
;
...
...
dnn/src/cuda/convolution/backward_data/group_conv.cpp
浏览文件 @
d915c5a3
...
...
@@ -44,6 +44,8 @@ bool ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::is_available(
args
.
diff_layout
->
dtype
==
dtype
::
QuantizedS8
()))
{
return
false
;
}
if
(
args
.
filter_meta
.
group
<=
1
)
return
false
;
auto
sub_args
=
args
;
TensorLayout
diff_pg
,
grad_pg
;
modify_size_args
(
sub_args
,
diff_pg
,
grad_pg
);
...
...
dnn/src/cuda/convolution/backward_filter/chanwise.cpp
浏览文件 @
d915c5a3
...
...
@@ -19,7 +19,7 @@ using namespace convolution;
bool
ConvolutionBackwardFilterImpl
::
AlgoChanwise
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
!
args
.
grad
_layout
->
is_contiguous
()
||
if
(
!
args
.
src
_layout
->
is_contiguous
()
||
!
args
.
diff_layout
->
is_contiguous
())
{
return
false
;
}
...
...
dnn/src/cuda/convolution/backward_filter/group_conv.cpp
浏览文件 @
d915c5a3
...
...
@@ -42,6 +42,8 @@ bool ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::is_available(
args
.
diff_layout
->
dtype
==
dtype
::
BFloat16
())
{
return
false
;
}
if
(
args
.
grad_filter_meta
.
group
<=
1
)
return
false
;
auto
sub_args
=
args
;
TensorLayout
src_pg
,
diff_pg
;
modify_size_args
(
sub_args
,
src_pg
,
diff_pg
);
...
...
dnn/src/cuda/convolution3d/backward_data/algo.cpp
浏览文件 @
d915c5a3
...
...
@@ -64,7 +64,7 @@ Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs(
Convolution3DBackwardDataImpl
*
o
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
:
SizeArgs
(
o
,
o
->
check_layout_fwd
(
grad
,
filter
,
diff
),
diff
,
grad
)
SizeArgs
(
o
,
o
->
make_canonized_filter_meta
(
grad
.
ndim
,
filter
),
diff
,
grad
)
{
}
...
...
dnn/src/cuda/convolution3d/backward_data/chanwise.cpp
浏览文件 @
d915c5a3
...
...
@@ -19,6 +19,10 @@ using namespace convolution3d;
bool
Convolution3DBackwardDataImpl
::
AlgoChanwise
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
!
args
.
grad_layout
->
is_contiguous
()
||
!
args
.
diff_layout
->
is_contiguous
())
{
return
false
;
}
auto
&&
fm
=
args
.
filter_meta
;
return
args
.
filter_meta
.
format
==
Param
::
Format
::
NCDHW
&&
args
.
diff_layout
->
dtype
.
category
()
==
DTypeCategory
::
FLOAT
&&
...
...
dnn/src/cuda/convolution3d/backward_data/group_conv.cpp
浏览文件 @
d915c5a3
...
...
@@ -38,6 +38,8 @@ Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral(
bool
Convolution3DBackwardDataImpl
::
AlgoGroupConvGeneral
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
args
.
filter_meta
.
group
<=
1
)
return
false
;
auto
sub_args
=
args
;
TensorLayout
diff_pg
,
grad_pg
;
modify_size_args
(
sub_args
,
diff_pg
,
grad_pg
);
...
...
dnn/src/cuda/convolution3d/backward_filter/algo.cpp
浏览文件 @
d915c5a3
...
...
@@ -67,7 +67,7 @@ Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs(
Convolution3DBackwardFilterImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
:
SizeArgs
(
o
,
src
,
diff
,
o
->
check_layout_fwd
(
src
,
grad
,
diff
))
SizeArgs
(
o
,
src
,
diff
,
o
->
make_canonized_filter_meta
(
src
.
ndim
,
grad
))
{
}
...
...
dnn/src/cuda/convolution3d/backward_filter/chanwise.cpp
浏览文件 @
d915c5a3
...
...
@@ -19,6 +19,10 @@ using namespace convolution3d;
bool
Convolution3DBackwardFilterImpl
::
AlgoChanwise
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
!
args
.
src_layout
->
is_contiguous
()
||
!
args
.
diff_layout
->
is_contiguous
())
{
return
false
;
}
auto
&&
fm
=
args
.
grad_filter_meta
;
return
fm
.
format
==
Param
::
Format
::
NCDHW
&&
args
.
diff_layout
->
dtype
.
category
()
==
DTypeCategory
::
FLOAT
&&
...
...
dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp
浏览文件 @
d915c5a3
...
...
@@ -38,6 +38,8 @@ Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral(
bool
Convolution3DBackwardFilterImpl
::
AlgoGroupConvGeneral
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
args
.
grad_filter_meta
.
group
<=
1
)
return
false
;
auto
sub_args
=
args
;
TensorLayout
src_pg
,
diff_pg
;
modify_size_args
(
sub_args
,
src_pg
,
diff_pg
);
...
...
dnn/src/cuda/convolution3d/backward_filter/inplace_matmul.cpp
浏览文件 @
d915c5a3
...
...
@@ -17,6 +17,10 @@ using namespace cuda;
bool
Convolution3DBackwardFilterImpl
::
AlgoInplaceMatmul
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
!
args
.
src_layout
->
is_contiguous
()
||
!
args
.
diff_layout
->
is_contiguous
())
{
return
false
;
}
auto
&&
fm
=
args
.
grad_filter_meta
;
return
args
.
grad_filter_meta
.
format
==
Param
::
Format
::
NCDHW
&&
args
.
src_layout
->
dtype
==
dtype
::
Float32
()
&&
...
...
dnn/src/cuda/convolution3d/forward/algo.cpp
浏览文件 @
d915c5a3
...
...
@@ -69,7 +69,7 @@ Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs(
Convolution3DForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
:
SizeArgs
(
o
,
src
,
o
->
check_layout_fwd
(
src
,
filter
,
dst
),
dst
)
SizeArgs
(
o
,
src
,
o
->
make_canonized_filter_meta
(
src
.
ndim
,
filter
),
dst
)
{
}
...
...
dnn/src/cuda/convolution3d/forward/chanwise.cpp
浏览文件 @
d915c5a3
...
...
@@ -19,6 +19,10 @@ using namespace convolution3d;
bool
Convolution3DForwardImpl
::
AlgoChanwise
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
!
args
.
src_layout
->
is_contiguous
()
||
!
args
.
dst_layout
->
is_contiguous
())
{
return
false
;
}
auto
&&
fm
=
args
.
filter_meta
;
return
args
.
filter_meta
.
format
==
Param
::
Format
::
NCDHW
&&
args
.
src_layout
->
dtype
.
category
()
==
DTypeCategory
::
FLOAT
&&
...
...
dnn/src/cuda/convolution3d/forward/group_conv.cpp
浏览文件 @
d915c5a3
...
...
@@ -45,6 +45,8 @@ Convolution3DForwardImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral(
bool
Convolution3DForwardImpl
::
AlgoGroupConvGeneral
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
args
.
filter_meta
.
group
<=
1
)
return
false
;
auto
sub_args
=
args
;
TensorLayout
src_pg
,
dst_pg
;
modify_size_args
(
sub_args
,
src_pg
,
dst_pg
);
...
...
dnn/src/naive/convolution3d/helper.h
浏览文件 @
d915c5a3
...
...
@@ -215,7 +215,6 @@ void backward_data(_megdnn_tensor_in filter,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
const
Convolution3D
::
CanonizedFilterMeta
&
filter_meta
)
{
megdnn_assert
(
grad
.
layout
.
is_contiguous
());
memset
(
grad
.
raw_ptr
,
0
,
grad
.
layout
.
span
().
dist_byte
());
megdnn_assert
(
filter_meta
.
spatial_ndim
==
3
);
compute3d
<
gtype
,
ftype
,
dtype
,
StrategyBwdData
>
(
...
...
@@ -227,7 +226,6 @@ void backward_filter(_megdnn_tensor_in src,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
const
Convolution3D
::
CanonizedFilterMeta
&
filter_meta
)
{
megdnn_assert
(
grad
.
layout
.
is_contiguous
());
memset
(
grad
.
raw_ptr
,
0
,
grad
.
layout
.
span
().
dist_byte
());
megdnn_assert
(
filter_meta
.
spatial_ndim
==
3
);
compute3d
<
stype
,
gtype
,
dtype
,
StrategyBwdFlt
>
(
...
...
dnn/test/cuda/convolution.cpp
浏览文件 @
d915c5a3
...
...
@@ -384,16 +384,6 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A) {
}
checker
.
set_rng
(
0
,
&
rng
).
set_rng
(
1
,
&
rng
).
set_param
(
arg
.
param
).
exec
(
TensorLayoutArray
{
filter
,
dst
,
src
});
//! noncontiguous case
{
param
::
Convolution
param
;
param
.
pad_h
=
param
.
pad_w
=
1
;
checker
.
set_param
(
param
).
execl
(
TensorLayoutArray
{
{{
16
,
16
,
3
,
3
},
{
144
,
9
,
3
,
1
},
dtype
::
QuantizedS8
{
1.3
f
}},
{{
2
,
16
,
7
,
7
},
{
1568
,
49
,
7
,
1
},
dtype
::
QuantizedS8
{
1.2
f
}},
{{
2
,
16
,
7
,
7
},
{
1568
,
49
,
7
,
1
},
dtype
::
QuantizedS8
{
1.2
f
}}
});
}
}
}
...
...
dnn/test/cuda/convolution3d.cpp
浏览文件 @
d915c5a3
...
...
@@ -150,6 +150,77 @@ TEST_F(CUDA, CONVOLUTION3D_MATMUL_FORWARD) {
}
}
TEST_F
(
CUDA
,
CONVOLUTION3D_FORWARD_NONCONTIG_CUDNN
)
{
using
namespace
convolution3d
;
Checker
<
Convolution3DForward
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
AlgoChecker
<
Convolution3DForward
>
(
"CUDNN"
));
param
::
Convolution3D
param
;
param
.
pad_d
=
param
.
pad_h
=
param
.
pad_w
=
1
;
checker
.
set_dtype
(
0
,
dtype
::
Float32
())
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_epsilon
(
1e-3
);
//! noncontiguous case
{
checker
.
set_param
(
param
).
execl
(
TensorLayoutArray
{
{{
4
,
5
,
16
,
16
,
16
},
{
40960
,
4096
,
256
,
16
,
1
},
dtype
::
Float32
()},
{{
5
,
5
,
3
,
3
,
3
},
{
135
,
27
,
9
,
3
,
1
},
dtype
::
Float32
()},
{{
4
,
5
,
16
,
16
,
16
},
{
40960
,
4096
,
256
,
16
,
1
},
dtype
::
Float32
()}});
}
}
TEST_F
(
CUDA
,
CONVOLUTION3D_FORWARD_NONCONTIG_INPLACE_MATMUL
)
{
using
namespace
convolution3d
;
Checker
<
Convolution3DForward
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
AlgoChecker
<
Convolution3DForward
>
(
"INPLACE_MATMUL"
));
param
::
Convolution3D
param
;
param
.
pad_d
=
param
.
pad_h
=
param
.
pad_w
=
1
;
checker
.
set_dtype
(
0
,
dtype
::
Float32
())
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_epsilon
(
1e-3
);
//! noncontiguous case
{
checker
.
set_param
(
param
).
execl
(
TensorLayoutArray
{
{{
4
,
5
,
16
,
16
,
16
},
{
40960
,
4096
,
256
,
16
,
1
},
dtype
::
Float32
()},
{{
5
,
5
,
3
,
3
,
3
},
{
135
,
27
,
9
,
3
,
1
},
dtype
::
Float32
()},
{{
4
,
5
,
16
,
16
,
16
},
{
40960
,
4096
,
256
,
16
,
1
},
dtype
::
Float32
()}});
}
}
TEST_F
(
CUDA
,
CONVOLUTION3D_FORWARD_NONCONTIG_1x1x1
)
{
using
namespace
convolution3d
;
Checker
<
Convolution3DForward
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
AlgoChecker
<
Convolution3DForward
>
(
"1x1x1"
));
param
::
Convolution3D
param
;
checker
.
set_dtype
(
0
,
dtype
::
Float32
())
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_epsilon
(
1e-3
);
//! noncontiguous case
{
checker
.
set_param
(
param
).
execl
(
TensorLayoutArray
{
{{
4
,
5
,
16
,
16
,
16
},
{
40960
,
4096
,
256
,
16
,
1
},
dtype
::
Float32
()},
{{
5
,
5
,
1
,
1
,
1
},
{
5
,
1
,
1
,
1
,
1
},
dtype
::
Float32
()},
{{
4
,
5
,
16
,
16
,
16
},
{
40960
,
4096
,
256
,
16
,
1
},
dtype
::
Float32
()}});
}
}
#if MEGDNN_WITH_BENCHMARK
TEST_F
(
CUDA
,
BENCHMARK_CONVOLUTION3D_MATMUL_BACKWARD_FILTER
)
{
using
namespace
convolution3d
;
...
...
@@ -343,6 +414,60 @@ TEST_F(CUDA, CONVOLUTION3D_MATMUL_BACKWARD_FILTER) {
}
}
TEST_F
(
CUDA
,
CONVOLUTION3D_BACKWARD_DATA_NONCONTIG_CUDNN
)
{
using
namespace
convolution3d
;
Checker
<
Convolution3DBackwardData
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
AlgoChecker
<
Convolution3DBackwardData
>
(
"CUDNN"
));
Convolution3DBackwardData
::
Param
param
;
param
.
pad_d
=
param
.
pad_h
=
param
.
pad_w
=
1
;
NormalRNG
default_rng
;
checker
.
set_dtype
(
0
,
dtype
::
Float32
())
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_rng
(
0
,
&
default_rng
)
.
set_rng
(
1
,
&
default_rng
)
.
set_epsilon
(
1e-3
)
.
set_param
(
param
);
//! noncontiguous case
{
checker
.
execl
(
TensorLayoutArray
{
{{
5
,
5
,
3
,
3
,
3
},
{
135
,
27
,
9
,
3
,
1
},
dtype
::
Float32
()},
{{
4
,
5
,
16
,
16
,
16
},
{
40960
,
4096
,
256
,
16
,
1
},
dtype
::
Float32
()},
{{
4
,
5
,
16
,
16
,
16
},
{
40960
,
4096
,
256
,
16
,
1
},
dtype
::
Float32
()}});
}
}
TEST_F
(
CUDA
,
CONVOLUTION3D_BACKWARD_FILTER_NONCONTIG_CUDNN
)
{
using
namespace
convolution3d
;
Checker
<
Convolution3DBackwardFilter
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
AlgoChecker
<
Convolution3DBackwardFilter
>
(
"CUDNN"
));
Convolution3DBackwardFilter
::
Param
param
;
param
.
pad_d
=
param
.
pad_h
=
param
.
pad_w
=
1
;
NormalRNG
default_rng
;
checker
.
set_dtype
(
0
,
dtype
::
Float32
())
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_rng
(
0
,
&
default_rng
)
.
set_rng
(
1
,
&
default_rng
)
.
set_epsilon
(
1e-3
)
.
set_param
(
param
);
//! noncontiguous case
{
checker
.
execl
(
TensorLayoutArray
{
{{
4
,
5
,
16
,
16
,
16
},
{
40960
,
4096
,
256
,
16
,
1
},
dtype
::
Float32
()},
{{
4
,
5
,
16
,
16
,
16
},
{
40960
,
4096
,
256
,
16
,
1
},
dtype
::
Float32
()},
{{
5
,
5
,
3
,
3
,
3
},
{
135
,
27
,
9
,
3
,
1
},
dtype
::
Float32
()}});
}
}
/*
TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) {
auto eps_getter = [](bool f16, int stage, const char *name) -> float {
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录