Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
2c4ff543
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
2c4ff543
编写于
7月 27, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(mgb): fix cudnn ConvolutionBackwardData
GitOrigin-RevId: 1fffc06eaa6fe66435715ec5a93c86dd37de985e
上级
7138e4fd
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
103 addition
and
15 deletion
+103
-15
dnn/src/cuda/convolution/opr_impl.cpp
dnn/src/cuda/convolution/opr_impl.cpp
+19
-15
src/opr/test/dnn/convolution.cpp
src/opr/test/dnn/convolution.cpp
+84
-0
未找到文件。
dnn/src/cuda/convolution/opr_impl.cpp
浏览文件 @
2c4ff543
...
...
@@ -142,14 +142,16 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic(
for
(
int
i
=
0
;
i
<
ret_count
;
++
i
)
{
if
(
algo_perf
[
i
].
memory
>
workspace_limit_in_bytes
)
continue
;
if
((
positive_attr
&
AlgoAttribute
::
REPRODUCIBLE
))
{
if
(
algo_perf
[
i
].
determinism
==
CUDNN_DETERMINISTIC
)
{
return
reinterpret_cast
<
AlgoBase
*>
(
sm_algo_pack
.
cudnn_from_enum
(
algo_perf
[
i
].
algo
));
if
((
positive_attr
&
AlgoAttribute
::
REPRODUCIBLE
)
&&
(
algo_perf
[
i
].
determinism
!=
CUDNN_DETERMINISTIC
))
{
continue
;
}
}
else
{
return
reinterpret_cast
<
AlgoBase
*>
(
AlgoBase
*
conv_bd_data_algo
=
reinterpret_cast
<
AlgoBase
*>
(
sm_algo_pack
.
cudnn_from_enum
(
algo_perf
[
i
].
algo
));
if
(
conv_bd_data_algo
->
is_available_attribute
(
args
,
positive_attr
,
negative_attr
,
workspace_limit_in_bytes
))
{
return
conv_bd_data_algo
;
}
}
return
nullptr
;
...
...
@@ -269,14 +271,16 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic(
for
(
int
i
=
0
;
i
<
ret_count
;
++
i
)
{
if
(
algo_perf
[
i
].
memory
>
workspace_limit_in_bytes
)
continue
;
if
((
positive_attr
&
AlgoAttribute
::
REPRODUCIBLE
))
{
if
(
algo_perf
[
i
].
determinism
==
CUDNN_DETERMINISTIC
)
{
return
reinterpret_cast
<
AlgoBase
*>
(
sm_algo_pack
.
cudnn_from_enum
(
algo_perf
[
i
].
algo
));
if
((
positive_attr
&
AlgoAttribute
::
REPRODUCIBLE
)
&&
(
algo_perf
[
i
].
determinism
!=
CUDNN_DETERMINISTIC
))
{
continue
;
}
}
else
{
return
reinterpret_cast
<
AlgoBase
*>
(
AlgoBase
*
conv_bd_filter_algo
=
reinterpret_cast
<
AlgoBase
*>
(
sm_algo_pack
.
cudnn_from_enum
(
algo_perf
[
i
].
algo
));
if
(
conv_bd_filter_algo
->
is_available_attribute
(
args
,
positive_attr
,
negative_attr
,
workspace_limit_in_bytes
))
{
return
conv_bd_filter_algo
;
}
}
return
nullptr
;
...
...
src/opr/test/dnn/convolution.cpp
浏览文件 @
2c4ff543
...
...
@@ -582,6 +582,90 @@ TEST(TestOprDNN, ConvolutionBackwardDataBfloat16ExePolicy) {
}
}
#if MGB_ENABLE_FASTRUN
TEST
(
TestOprDNN
,
ConvolutionBackwardDataFloat16ExePolicy
)
{
REQUIRE_GPU
(
1
);
Param
param
{
Mode
::
CROSS_CORRELATION
,
1
,
1
,
1
,
1
};
param
.
compute_mode
=
Param
::
ComputeMode
::
FLOAT32
;
using
Policy
=
opr
::
Convolution
::
ExecutionPolicy
;
using
S
=
Policy
::
Strategy
;
auto
gen_fp16
=
[](
HostTensorND
&
dest
)
{
RNGxorshf
rng
{
next_rand_seed
()};
auto
rand_real
=
[
&
rng
]()
{
std
::
uniform_real_distribution
<
float
>
dist
(
-
1
,
1
);
return
dist
(
rng
);
};
auto
ptr
=
dest
.
ptr
<
dt_float16
>
();
size_t
elems
=
dest
.
shape
().
total_nr_elems
();
for
(
size_t
i
=
0
;
i
<
elems
;
i
++
)
{
ptr
[
i
]
=
dt_float16
(
rand_real
());
}
};
auto
f32_to_f16
=
[](
const
std
::
shared_ptr
<
HostTensorND
>&
src
)
->
std
::
shared_ptr
<
HostTensorND
>
{
auto
ret
=
std
::
make_shared
<
HostTensorND
>
(
src
->
comp_node
(),
src
->
shape
(),
dtype
::
Float16
{});
for
(
size_t
i
=
0
;
i
<
src
->
layout
().
total_nr_elems
();
i
++
)
{
ret
->
ptr
<
dt_float16
>
()[
i
]
=
src
->
ptr
<
dt_float32
>
()[
i
];
}
return
ret
;
};
auto
f16_to_f32
=
[](
const
std
::
shared_ptr
<
HostTensorND
>&
src
)
->
std
::
shared_ptr
<
HostTensorND
>
{
auto
ret
=
std
::
make_shared
<
HostTensorND
>
(
src
->
comp_node
(),
src
->
shape
(),
dtype
::
Float32
{});
for
(
size_t
i
=
0
;
i
<
src
->
layout
().
total_nr_elems
();
i
++
)
{
ret
->
ptr
<
dt_float32
>
()[
i
]
=
src
->
ptr
<
dt_float16
>
()[
i
];
}
return
ret
;
};
int
nr_get
=
0
;
auto
on_get
=
[
&
nr_get
](
const
std
::
string
&
,
const
void
*
,
size_t
,
const
void
*
,
size_t
)
{
++
nr_get
;
};
PersistentCacheHook
cache_hook
{
on_get
};
auto
strategy
=
S
(
S
::
PROFILE
|
S
::
REPRODUCIBLE
);
using
Checker
=
AutoOprChecker
<
2
,
1
>
;
auto
make_graph
=
[
&
](
const
Checker
::
SymInpArray
&
inputs
)
->
Checker
::
SymOutArray
{
Policy
policy
;
policy
.
strategy
=
strategy
;
return
{
opr
::
ConvolutionBackwardData
::
make_deconv
(
inputs
[
0
],
inputs
[
1
],
param
,
policy
)};
};
auto
fwd
=
[
&
](
Checker
::
NumOutArray
&
dest
,
Checker
::
NumInpArray
inp
)
{
std
::
shared_ptr
<
HostTensorND
>
out
;
conv_bwd_data_brute
({
f16_to_f32
(
inp
[
0
]),
f16_to_f32
(
inp
[
1
])},
out
,
param
);
dest
[
0
]
=
*
f32_to_f16
(
out
);
};
Checker
::
RunOptions
opt
;
opt
.
outputs_max_err
=
1e-2
;
nr_get
=
0
;
Checker
(
make_graph
,
fwd
)
.
disable_grad_check
()
.
set_input_dtype
(
0
,
dtype
::
Float16
{})
.
set_input_dtype
(
1
,
dtype
::
Float16
{})
.
set_input_generator
(
0
,
gen_fp16
)
.
set_input_generator
(
1
,
gen_fp16
)
.
run
({
TensorShape
{
3
,
4
,
10
,
6
},
{
4
,
2
,
3
,
3
}},
opt
)
.
run
({
TensorShape
{
2
,
2
,
4
,
3
},
{
2
,
2
,
3
,
3
}},
opt
)
.
run
({
TensorShape
{
1
,
3
,
10
,
6
},
{
3
,
2
,
3
,
3
}},
opt
);
if
(
strategy
==
S
::
HEURISTIC
)
{
ASSERT_EQ
(
0
,
nr_get
);
}
else
{
ASSERT_LT
(
0
,
nr_get
);
}
}
#endif
TEST
(
TestOprDNN
,
Deconvolution
)
{
// dilated grouped deconv
using
Checker
=
AutoOprChecker
<
2
,
1
>
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录