Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
863f9e55
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
863f9e55
编写于
6月 01, 2020
作者:
W
wangchaochaohu
提交者:
GitHub
6月 01, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix conv_transpose Op fp16 error test=develop (#24695) (#24784)
上级
627d5567
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
158 addition
and
29 deletion
+158
-29
paddle/fluid/operators/conv_cudnn_helper.h
paddle/fluid/operators/conv_cudnn_helper.h
+3
-1
paddle/fluid/operators/conv_transpose_cudnn_op.cu
paddle/fluid/operators/conv_transpose_cudnn_op.cu
+4
-2
python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
.../paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
+149
-25
python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
...luid/tests/unittests/white_list/op_accuracy_white_list.py
+2
-1
未找到文件。
paddle/fluid/operators/conv_cudnn_helper.h
浏览文件 @
863f9e55
...
@@ -148,7 +148,7 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
...
@@ -148,7 +148,7 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
}
}
#endif
#endif
if
(
!
exhaustive
)
{
if
(
!
exhaustive
&&
!
deterministic
)
{
#if CUDNN_VERSION >= 7001
#if CUDNN_VERSION >= 7001
int
perf_count
;
int
perf_count
;
int
best_algo_idx
=
0
;
int
best_algo_idx
=
0
;
...
@@ -185,6 +185,8 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
...
@@ -185,6 +185,8 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
workspace_size_limit
,
&
algo
));
workspace_size_limit
,
&
algo
));
#endif
#endif
VLOG
(
3
)
<<
"choose algo "
<<
algo
;
VLOG
(
3
)
<<
"choose algo "
<<
algo
;
}
else
if
(
deterministic
)
{
algo
=
static_cast
<
cudnnConvolutionFwdAlgo_t
>
(
1
);
}
else
{
}
else
{
auto
&
dev_ctx
=
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
...
...
paddle/fluid/operators/conv_transpose_cudnn_op.cu
浏览文件 @
863f9e55
...
@@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
...
@@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
int
output_offset
=
int
output_offset
=
transformed_output
.
numel
()
/
transformed_output
.
dims
()[
0
]
/
groups
;
transformed_output
.
numel
()
/
transformed_output
.
dims
()[
0
]
/
groups
;
int
filter_offset
=
filter
->
numel
()
/
groups
;
int
filter_offset
=
filter
->
numel
()
/
groups
;
T
alpha
=
static_cast
<
T
>
(
1.0
),
beta
=
static_cast
<
T
>
(
0.0
);
ScalingParamType
<
T
>
alpha
=
1.0
f
;
ScalingParamType
<
T
>
beta
=
0.0
f
;
auto
workspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
auto
workspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
auto
cudnn_func
=
[
&
](
void
*
cudnn_workspace
)
{
auto
cudnn_func
=
[
&
](
void
*
cudnn_workspace
)
{
...
@@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
...
@@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
int
output_grad_offset
=
transformed_output_grad
.
numel
()
/
int
output_grad_offset
=
transformed_output_grad
.
numel
()
/
transformed_output_grad
.
dims
()[
0
]
/
groups
;
transformed_output_grad
.
dims
()[
0
]
/
groups
;
int
filter_offset
=
filter
->
numel
()
/
groups
;
int
filter_offset
=
filter
->
numel
()
/
groups
;
T
alpha
=
static_cast
<
T
>
(
1.0
),
beta
=
static_cast
<
T
>
(
0.0
);
ScalingParamType
<
T
>
alpha
=
1.0
f
;
ScalingParamType
<
T
>
beta
=
0.0
f
;
auto
workspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
auto
workspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
if
(
input_grad
)
{
if
(
input_grad
)
{
// Because beta is zero, it is unnecessary to reset input_grad.
// Because beta is zero, it is unnecessary to reset input_grad.
...
...
python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
浏览文件 @
863f9e55
...
@@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest):
...
@@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest):
def
setUp
(
self
):
def
setUp
(
self
):
# init as conv transpose
# init as conv transpose
self
.
dtype
=
np
.
float64
self
.
dtype
=
np
.
float64
self
.
need_check_grad
=
True
self
.
is_test
=
False
self
.
is_test
=
False
self
.
use_cudnn
=
False
self
.
use_cudnn
=
False
self
.
use_mkldnn
=
False
self
.
use_mkldnn
=
False
...
@@ -152,6 +153,7 @@ class TestConv2dTransposeOp(OpTest):
...
@@ -152,6 +153,7 @@ class TestConv2dTransposeOp(OpTest):
self
.
check_output
(
check_dygraph
=
(
self
.
use_mkldnn
==
False
))
self
.
check_output
(
check_dygraph
=
(
self
.
use_mkldnn
==
False
))
def
test_check_grad_no_input
(
self
):
def
test_check_grad_no_input
(
self
):
if
self
.
need_check_grad
:
if
self
.
use_cudnn
:
if
self
.
use_cudnn
:
place
=
core
.
CUDAPlace
(
0
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
self
.
check_grad_with_place
(
...
@@ -160,17 +162,21 @@ class TestConv2dTransposeOp(OpTest):
...
@@ -160,17 +162,21 @@ class TestConv2dTransposeOp(OpTest):
max_relative_error
=
0.02
,
max_relative_error
=
0.02
,
no_grad_set
=
set
([
'Input'
]))
no_grad_set
=
set
([
'Input'
]))
else
:
else
:
self
.
check_grad
([
'Filter'
],
'Output'
,
no_grad_set
=
set
([
'Input'
]))
self
.
check_grad
(
[
'Filter'
],
'Output'
,
no_grad_set
=
set
([
'Input'
]))
def
test_check_grad_no_filter
(
self
):
def
test_check_grad_no_filter
(
self
):
if
self
.
need_check_grad
:
if
self
.
use_cudnn
:
if
self
.
use_cudnn
:
place
=
core
.
CUDAPlace
(
0
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
self
.
check_grad_with_place
(
place
,
[
'Input'
],
'Output'
,
no_grad_set
=
set
([
'Filter'
]))
place
,
[
'Input'
],
'Output'
,
no_grad_set
=
set
([
'Filter'
]))
else
:
else
:
self
.
check_grad
([
'Input'
],
'Output'
,
no_grad_set
=
set
([
'Filter'
]))
self
.
check_grad
(
[
'Input'
],
'Output'
,
no_grad_set
=
set
([
'Filter'
]))
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
if
self
.
need_check_grad
:
if
self
.
use_cudnn
:
if
self
.
use_cudnn
:
place
=
core
.
CUDAPlace
(
0
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
self
.
check_grad_with_place
(
...
@@ -708,6 +714,124 @@ class TestDepthwiseConvTransposeAsymmetricPad_NHWC(TestConv2dTransposeOp):
...
@@ -708,6 +714,124 @@ class TestDepthwiseConvTransposeAsymmetricPad_NHWC(TestConv2dTransposeOp):
self
.
data_format
=
'NHWC'
self
.
data_format
=
'NHWC'
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestCUDNN_FP16
(
TestConv2dTransposeOp
):
def
init_test_case
(
self
):
self
.
dtype
=
np
.
float16
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
groups
=
1
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
def
init_op_type
(
self
):
self
.
need_check_grad
=
False
self
.
use_cudnn
=
True
self
.
op_type
=
"conv2d_transpose"
def
test_check_output
(
self
):
if
self
.
use_cudnn
:
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
0.02
,
check_dygraph
=
(
self
.
use_mkldnn
==
False
))
else
:
self
.
check_output
(
check_dygraph
=
(
self
.
use_mkldnn
==
False
))
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestCUDNN_NHWC_FP16
(
TestCUDNN_FP16
):
def
init_test_case
(
self
):
self
.
dtype
=
np
.
float16
self
.
pad
=
[
0
,
0
]
self
.
stride
=
[
1
,
1
]
self
.
dilations
=
[
1
,
1
]
self
.
groups
=
1
self
.
input_size
=
[
2
,
5
,
5
,
3
]
# NHWC
f_c
=
self
.
input_size
[
-
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
self
.
data_format
=
'NHWC'
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestCUDNNWithSymmetricPad_NHWC_FP16
(
TestCUDNN_FP16
):
def
init_test_case
(
self
):
self
.
dtype
=
np
.
float16
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
groups
=
1
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
5
,
5
,
3
]
# NHWC
f_c
=
self
.
input_size
[
-
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
self
.
data_format
=
'NHWC'
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestCUDNNWithAsymmetricPad_NHWC_FP16
(
TestCUDNN_FP16
):
def
init_test_case
(
self
):
self
.
dtype
=
np
.
float16
self
.
pad
=
[
1
,
0
,
2
,
3
]
self
.
stride
=
[
2
,
2
]
self
.
groups
=
1
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
5
,
5
,
3
]
# NHWC
f_c
=
self
.
input_size
[
-
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
self
.
data_format
=
'NHWC'
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestCUDNNWithStride_NHWC_FP16
(
TestCUDNN_FP16
):
def
init_test_case
(
self
):
self
.
dtype
=
np
.
float16
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
groups
=
1
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
5
,
5
,
3
]
# NHWC
f_c
=
self
.
input_size
[
-
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
self
.
data_format
=
'NHWC'
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestCUDNNWithGroups_NHWC_FP16
(
TestCUDNN_FP16
):
def
init_test_case
(
self
):
self
.
dtype
=
np
.
float16
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
dilations
=
[
1
,
1
]
self
.
groups
=
2
self
.
input_size
=
[
2
,
5
,
5
,
4
]
# NCHW
f_c
=
self
.
input_size
[
-
1
]
self
.
filter_size
=
[
f_c
,
3
,
3
,
3
]
self
.
data_format
=
'NHWC'
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestCUDNNWithEvenUpsample_NHWC_FP16
(
TestCUDNN_FP16
):
def
init_test_case
(
self
):
self
.
dtype
=
np
.
float16
self
.
pad
=
[
2
,
2
]
self
.
stride
=
[
2
,
2
]
self
.
groups
=
1
self
.
dilations
=
[
1
,
1
]
self
.
output_size
=
[
14
,
14
]
self
.
input_size
=
[
2
,
7
,
7
,
3
]
# NHWC
f_c
=
self
.
input_size
[
-
1
]
self
.
filter_size
=
[
f_c
,
6
,
5
,
5
]
self
.
data_format
=
'NHWC'
class
TestConv2dTransposeAPI
(
unittest
.
TestCase
):
class
TestConv2dTransposeAPI
(
unittest
.
TestCase
):
def
test_case1
(
self
):
def
test_case1
(
self
):
data1
=
fluid
.
layers
.
data
(
data1
=
fluid
.
layers
.
data
(
...
...
python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
浏览文件 @
863f9e55
...
@@ -80,5 +80,6 @@ NO_FP16_CHECK_GRAD_OP_LIST = [
...
@@ -80,5 +80,6 @@ NO_FP16_CHECK_GRAD_OP_LIST = [
'fused_elemwise_activation'
,
\
'fused_elemwise_activation'
,
\
'pool2d'
,
\
'pool2d'
,
\
'pool3d'
,
\
'pool3d'
,
\
'softmax'
'softmax'
,
\
'conv2d_transpose'
]
]
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录