Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
e5af650b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e5af650b
编写于
12月 21, 2020
作者:
L
LielinJiang
提交者:
GitHub
12月 21, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add double grad for conv_transpose (#29706)
* add double grad for conv_transpose
上级
224f3bcb
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
743 addition
and
1 deletion
+743
-1
paddle/fluid/operators/conv_transpose_cudnn_op.cu
paddle/fluid/operators/conv_transpose_cudnn_op.cu
+486
-0
paddle/fluid/operators/conv_transpose_op.cc
paddle/fluid/operators/conv_transpose_op.cc
+84
-1
paddle/fluid/operators/conv_transpose_op.cu
paddle/fluid/operators/conv_transpose_op.cu
+3
-0
paddle/fluid/operators/conv_transpose_op.h
paddle/fluid/operators/conv_transpose_op.h
+10
-0
python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
...ddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
+159
-0
tools/static_mode_white_list.py
tools/static_mode_white_list.py
+1
-0
未找到文件。
paddle/fluid/operators/conv_transpose_cudnn_op.cu
浏览文件 @
e5af650b
...
...
@@ -551,6 +551,487 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
}
};
/*
* Inputs: I, W, dO, ddI, ddW
* Outputs: ddO, dW, dI
* ddo = conv_bp_data(W, ddI) + conv_bp_data(ddW, I)
* dW = conv_bp_filter(dO, ddI)
* dI = conv(dO, ddW)
*/
template
<
typename
T
>
class
CUDNNConvTransposeDoubleGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
auto
X
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
W
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
dO
=
ctx
.
Input
<
Tensor
>
(
"DOutput"
);
auto
ddX
=
ctx
.
Input
<
Tensor
>
(
"DDInput"
);
auto
ddW
=
ctx
.
Input
<
Tensor
>
(
"DDFilter"
);
auto
ddO
=
ctx
.
Output
<
Tensor
>
(
"DDOutput"
);
auto
dW
=
ctx
.
Output
<
Tensor
>
(
"DFilter"
);
auto
dX
=
ctx
.
Output
<
Tensor
>
(
"DInput"
);
if
(
ddO
)
{
ddO
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
platform
::
CUDADeviceContext
,
T
>
set_zero
;
set_zero
(
dev_ctx
,
ddO
,
static_cast
<
T
>
(
0
));
}
if
(
dW
)
{
dW
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
if
(
dX
)
{
dX
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
const
T
*
dy
=
dO
->
data
<
T
>
();
const
T
*
w
=
W
->
data
<
T
>
();
const
T
*
ddx
=
nullptr
;
const
T
*
ddw
=
nullptr
;
T
*
dw
,
*
dx
,
*
ddy
;
dw
=
dx
=
ddy
=
nullptr
;
T
*
transformed_dx
=
nullptr
;
const
std
::
vector
<
int
>&
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
bool
deterministic
=
FLAGS_cudnn_deterministic
;
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
const
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
const
bool
channel_last
=
(
data_format
==
"NHWC"
||
data_format
==
"NDHWC"
);
// transform Tensors to channel first-----------
Tensor
transformed_X_channel
(
X
->
type
());
Tensor
transformed_dO_channel
(
dO
->
type
());
Tensor
transformed_ddX_channel
(
X
->
type
());
Tensor
transformed_ddO_channel
(
dO
->
type
());
Tensor
transformed_dX_channel
(
X
->
type
());
if
(
channel_last
)
{
ResizeToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
X
,
&
transformed_X_channel
);
TransToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
X
,
&
transformed_X_channel
);
ResizeToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
dO
,
&
transformed_dO_channel
);
TransToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
dO
,
&
transformed_dO_channel
);
if
(
ddX
)
{
ResizeToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
ddX
,
&
transformed_ddX_channel
);
TransToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
ddX
,
&
transformed_ddX_channel
);
}
if
(
ddO
)
{
ResizeToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
ddO
,
&
transformed_ddO_channel
);
}
if
(
dX
)
{
ResizeToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
dX
,
&
transformed_dX_channel
);
transformed_dX_channel
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
}
else
{
transformed_X_channel
=
*
X
;
transformed_dO_channel
=
*
dO
;
if
(
ddX
)
{
transformed_ddX_channel
=
*
ddX
;
}
if
(
dX
)
{
transformed_dX_channel
=
*
dX
;
}
}
std
::
vector
<
int
>
output_vec
=
framework
::
vectorize
<
int
>
(
transformed_dO_channel
.
dims
());
auto
in_dims
=
transformed_X_channel
.
dims
();
auto
filter_dims
=
W
->
dims
();
framework
::
DDim
in_data_dims
=
framework
::
slice_ddim
(
in_dims
,
2
,
in_dims
.
size
());
framework
::
DDim
filter_data_dims
=
framework
::
slice_ddim
(
filter_dims
,
2
,
filter_dims
.
size
());
std
::
vector
<
int
>
ksize
=
framework
::
vectorize
<
int
>
(
filter_data_dims
);
UpdatePaddingAndDilation
(
&
paddings
,
&
dilations
,
padding_algorithm
,
in_data_dims
,
strides
,
ksize
);
int
data_dim
=
strides
.
size
();
// 2d or 3d
bool
is_sys_pad
=
math
::
IsSymmetricPadding
(
paddings
,
data_dim
);
Tensor
transformed_X
(
X
->
type
());
Tensor
transformed_ddX
(
X
->
type
());
Tensor
transformed_dO
(
dO
->
type
());
std
::
vector
<
int
>
padding_common
(
data_dim
,
0
);
std
::
vector
<
int
>
input_pad
(
X
->
dims
().
size
()
*
2
,
0
);
if
(
!
is_sys_pad
)
{
// get pad
std
::
vector
<
int
>
padding_diff
(
data_dim
);
std
::
vector
<
int
>
new_input_shape_vec
(
data_dim
+
2
);
std
::
vector
<
int
>
new_output_grad_shape_vec
(
data_dim
+
2
);
new_input_shape_vec
[
0
]
=
transformed_X_channel
.
dims
()[
0
];
new_input_shape_vec
[
1
]
=
transformed_X_channel
.
dims
()[
1
];
new_output_grad_shape_vec
[
0
]
=
transformed_dO_channel
.
dims
()[
0
];
new_output_grad_shape_vec
[
1
]
=
transformed_dO_channel
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
data_dim
;
++
i
)
{
padding_diff
[
i
]
=
std
::
abs
(
paddings
[
2
*
i
]
-
paddings
[
2
*
i
+
1
]);
padding_common
[
i
]
=
std
::
min
(
paddings
[
2
*
i
],
paddings
[
2
*
i
+
1
]);
new_input_shape_vec
[
i
+
2
]
=
transformed_X_channel
.
dims
()[
i
+
2
]
+
padding_diff
[
i
];
new_output_grad_shape_vec
[
i
+
2
]
=
transformed_dO_channel
.
dims
()[
i
+
2
]
+
padding_diff
[
i
];
input_pad
[
2
*
i
+
4
]
=
paddings
[
2
*
i
]
-
padding_common
[
i
];
input_pad
[
2
*
i
+
4
+
1
]
=
paddings
[
2
*
i
+
1
]
-
padding_common
[
i
];
}
framework
::
DDim
new_input_shape
(
framework
::
make_ddim
(
new_input_shape_vec
));
transformed_X
.
Resize
(
new_input_shape
);
transformed_ddX
.
Resize
(
new_input_shape
);
framework
::
DDim
new_output_grad_shape
(
framework
::
make_ddim
(
new_output_grad_shape_vec
));
transformed_dO
.
Resize
(
new_output_grad_shape
);
transformed_dO
=
ctx
.
AllocateTmpTensor
<
T
,
paddle
::
platform
::
CUDADeviceContext
>
(
new_output_grad_shape
,
dev_ctx
);
transformed_X
=
ctx
.
AllocateTmpTensor
<
T
,
paddle
::
platform
::
CUDADeviceContext
>
(
new_input_shape
,
dev_ctx
);
if
(
ddX
)
{
transformed_ddX
=
ctx
.
AllocateTmpTensor
<
T
,
paddle
::
platform
::
CUDADeviceContext
>
(
new_input_shape
,
dev_ctx
);
}
// pad for input
const
int
rank
=
X
->
dims
().
size
();
T
pad_value
(
0.0
);
switch
(
rank
)
{
case
4
:
{
math
::
PadFunction
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
4
>
(
ctx
,
input_pad
,
transformed_X_channel
,
pad_value
,
&
transformed_X
);
if
(
dO
)
{
math
::
PadFunction
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
4
>
(
ctx
,
input_pad
,
transformed_dO_channel
,
pad_value
,
&
transformed_dO
);
}
if
(
ddX
)
{
math
::
PadFunction
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
4
>
(
ctx
,
input_pad
,
transformed_ddX_channel
,
pad_value
,
&
transformed_ddX
);
}
}
break
;
case
5
:
{
math
::
PadFunction
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
5
>
(
ctx
,
input_pad
,
transformed_X_channel
,
pad_value
,
&
transformed_X
);
if
(
ddX
)
{
math
::
PadFunction
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
5
>
(
ctx
,
input_pad
,
transformed_ddX_channel
,
pad_value
,
&
transformed_ddX
);
}
}
break
;
default:
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"ConvOp only support tensors with 4 or 5 dimensions."
));
}
}
else
{
transformed_X
=
transformed_X_channel
;
transformed_dO
=
transformed_dO_channel
;
if
(
ddX
)
{
transformed_ddX
=
transformed_ddX_channel
;
}
if
(
paddings
.
size
()
==
data_dim
)
{
for
(
size_t
i
=
0
;
i
<
data_dim
;
++
i
)
{
padding_common
[
i
]
=
paddings
[
i
];
}
}
else
{
for
(
size_t
i
=
0
;
i
<
data_dim
;
++
i
)
{
padding_common
[
i
]
=
paddings
[
2
*
i
];
}
}
}
std
::
vector
<
int64_t
>
starts
(
data_dim
,
0
);
std
::
vector
<
int64_t
>
ends
(
data_dim
,
0
);
std
::
vector
<
int64_t
>
axes
(
data_dim
,
0
);
for
(
size_t
i
=
0
;
i
<
data_dim
;
++
i
)
{
starts
[
i
]
=
input_pad
[
2
*
i
+
4
]
*
(
strides
[
i
]
+
1
);
ends
[
i
]
=
starts
[
i
]
+
output_vec
[
i
+
2
];
axes
[
i
]
=
i
+
2
;
}
std
::
vector
<
int
>
transformed_output_vec
=
output_vec
;
for
(
size_t
i
=
0
;
i
<
data_dim
;
++
i
)
{
transformed_output_vec
[
i
+
2
]
=
output_vec
[
i
+
2
]
+
(
input_pad
[
2
*
i
+
4
]
+
input_pad
[
2
*
i
+
5
])
*
strides
[
i
]
-
2
*
padding_common
[
i
]
+
paddings
[
2
*
i
]
+
paddings
[
2
*
i
+
1
];
}
if
(
!
is_sys_pad
)
{
DDim
transformed_output_shape
(
framework
::
make_ddim
(
transformed_output_vec
));
transformed_ddO_channel
.
mutable_data
<
T
>
(
transformed_output_shape
,
ctx
.
GetPlace
());
}
else
{
ddO
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
transformed_ddO_channel
=
*
ddO
;
transformed_ddO_channel
.
Resize
(
framework
::
make_ddim
(
transformed_output_vec
));
}
const
T
*
x
=
transformed_X
.
data
<
T
>
();
int
iwo_group
=
groups
;
int
c_group
=
1
;
#if CUDNN_VERSION_MIN(7, 0, 1)
iwo_group
=
1
;
c_group
=
groups
;
groups
=
1
;
#endif
auto
dtype
=
platform
::
CudnnDataType
<
T
>::
type
;
auto
handle
=
dev_ctx
.
cudnn_handle
();
ConvArgs
args1
{
&
transformed_ddO_channel
,
W
,
&
transformed_ddX
,
strides
,
padding_common
,
dilations
,
dtype
};
ConvArgs
args2
{
&
transformed_ddO_channel
,
ddW
,
&
transformed_X
,
strides
,
padding_common
,
dilations
,
dtype
};
ConvArgs
args3
{
&
transformed_dO
,
dW
,
&
transformed_ddX_channel
,
strides
,
padding_common
,
dilations
,
dtype
};
ConvArgs
args4
{
&
transformed_dO
,
ddW
,
&
transformed_dX_channel
,
strides
,
padding_common
,
dilations
,
dtype
};
cudnnConvolutionBwdDataAlgo_t
bwd_algo1
=
static_cast
<
cudnnConvolutionBwdDataAlgo_t
>
(
0
);
cudnnConvolutionBwdDataAlgo_t
bwd_algo2
=
static_cast
<
cudnnConvolutionBwdDataAlgo_t
>
(
0
);
cudnnConvolutionFwdAlgo_t
data_algo
=
static_cast
<
cudnnConvolutionFwdAlgo_t
>
(
0
);
cudnnConvolutionBwdFilterAlgo_t
filter_algo
=
static_cast
<
cudnnConvolutionBwdFilterAlgo_t
>
(
0
);
auto
layout
=
GetCudnnTensorFormat
(
DataLayout
::
kNCHW
);
// ddo = conv(ddI, W) + conv(I, ddW)
size_t
workspace_size
=
0
;
T
*
transformed_ddy_channel
=
nullptr
;
if
(
ddO
)
{
ddy
=
ddO
->
data
<
T
>
();
transformed_ddy_channel
=
transformed_ddO_channel
.
data
<
T
>
();
if
(
ddX
)
{
args1
.
handle
=
handle
;
args1
.
idesc
.
set
(
transformed_ddO_channel
,
iwo_group
);
args1
.
wdesc
.
set
(
*
W
,
layout
,
iwo_group
);
args1
.
odesc
.
set
(
transformed_ddX
,
iwo_group
);
args1
.
cdesc
.
set
(
dtype
,
padding_common
,
strides
,
dilations
,
c_group
);
using
search1
=
SearchAlgorithm
<
cudnnConvolutionBwdDataAlgoPerf_t
>
;
bwd_algo1
=
search1
::
Find
<
T
>
(
args1
,
false
,
deterministic
,
ctx
);
workspace_size
=
search1
::
GetWorkspaceSize
(
args1
,
bwd_algo1
);
}
if
(
ddW
)
{
ddw
=
ddW
->
data
<
T
>
();
args2
.
handle
=
handle
;
args2
.
idesc
.
set
(
transformed_ddO_channel
,
iwo_group
);
args2
.
wdesc
.
set
(
*
ddW
,
layout
,
iwo_group
);
args2
.
odesc
.
set
(
transformed_X
,
iwo_group
);
args2
.
cdesc
.
set
(
dtype
,
padding_common
,
strides
,
dilations
,
c_group
);
using
search2
=
SearchAlgorithm
<
cudnnConvolutionBwdDataAlgoPerf_t
>
;
bwd_algo2
=
search2
::
Find
<
T
>
(
args2
,
false
,
deterministic
,
ctx
);
workspace_size
=
std
::
max
(
workspace_size
,
search2
::
GetWorkspaceSize
(
args2
,
bwd_algo2
));
}
}
if
(
dW
&&
ddX
)
{
dw
=
dW
->
data
<
T
>
();
args3
.
handle
=
handle
;
args3
.
idesc
.
set
(
transformed_dO
,
iwo_group
);
args3
.
wdesc
.
set
(
*
dW
,
layout
,
iwo_group
);
args3
.
odesc
.
set
(
transformed_ddX_channel
,
iwo_group
);
args3
.
cdesc
.
set
(
dtype
,
padding_common
,
strides
,
dilations
,
c_group
);
using
search3
=
SearchAlgorithm
<
cudnnConvolutionBwdFilterAlgoPerf_t
>
;
filter_algo
=
search3
::
Find
<
T
>
(
args3
,
false
,
deterministic
,
ctx
);
workspace_size
=
std
::
max
(
workspace_size
,
search3
::
GetWorkspaceSize
(
args3
,
filter_algo
));
}
if
(
ddW
&&
dX
)
{
transformed_dx
=
transformed_dX_channel
.
data
<
T
>
();
args4
.
handle
=
handle
;
args4
.
idesc
.
set
(
transformed_dO
,
iwo_group
);
args4
.
wdesc
.
set
(
*
ddW
,
layout
,
iwo_group
);
args4
.
odesc
.
set
(
transformed_dX_channel
,
iwo_group
);
args4
.
cdesc
.
set
(
dtype
,
padding_common
,
strides
,
dilations
,
c_group
);
using
search4
=
SearchAlgorithm
<
cudnnConvolutionFwdAlgoPerf_t
>
;
data_algo
=
search4
::
Find
<
T
>
(
args4
,
false
,
deterministic
,
ctx
);
workspace_size
=
std
::
max
(
workspace_size
,
search4
::
GetWorkspaceSize
(
args4
,
data_algo
));
}
int
i_n
,
i_c
,
i_d
,
i_h
,
i_w
;
GetNCDHW
(
transformed_X
.
dims
(),
DataLayout
::
kNCHW
,
&
i_n
,
&
i_c
,
&
i_d
,
&
i_h
,
&
i_w
);
int
o_n
,
o_c
,
o_d
,
o_h
,
o_w
;
GetNCDHW
(
transformed_dO
.
dims
(),
DataLayout
::
kNCHW
,
&
o_n
,
&
o_c
,
&
o_d
,
&
o_h
,
&
o_w
);
int
group_offset_in
=
transformed_X
.
numel
()
/
transformed_X
.
dims
()[
0
]
/
groups
;
int
group_offset_out
=
transformed_dO
.
numel
()
/
transformed_dO
.
dims
()[
0
]
/
groups
;
int
group_offset_filter
=
W
->
numel
()
/
groups
;
ScalingParamType
<
T
>
alpha
=
1.0
f
;
ScalingParamType
<
T
>
beta
=
0.0
f
;
auto
wkspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
if
(
ddO
)
{
if
(
ddX
)
{
ddx
=
transformed_ddX
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
wkspace_handle
.
RunFunc
(
[
&
](
void
*
workspace_ptr
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnConvolutionBackwardData
(
handle
,
&
alpha
,
args1
.
wdesc
.
desc
(),
w
+
i
*
group_offset_filter
,
args1
.
odesc
.
desc
(),
ddx
+
i
*
group_offset_in
,
args1
.
cdesc
.
desc
(),
bwd_algo1
,
workspace_ptr
,
workspace_size
,
&
beta
,
args1
.
idesc
.
desc
(),
transformed_ddy_channel
+
i
*
group_offset_out
));
},
workspace_size
);
}
}
if
(
ddW
)
{
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
wkspace_handle
.
RunFunc
(
[
&
](
void
*
workspace_ptr
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnConvolutionBackwardData
(
handle
,
&
alpha
,
args2
.
wdesc
.
desc
(),
ddw
+
i
*
group_offset_filter
,
args2
.
odesc
.
desc
(),
x
+
i
*
group_offset_in
,
args2
.
cdesc
.
desc
(),
bwd_algo2
,
workspace_ptr
,
workspace_size
,
&
alpha
,
args2
.
idesc
.
desc
(),
transformed_ddy_channel
+
i
*
group_offset_out
));
},
workspace_size
);
}
}
if
((
!
is_sys_pad
)
&&
(
!
channel_last
))
{
if
(
strides
.
size
()
==
2U
)
{
Slice
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
4
>
(
ctx
,
&
transformed_ddO_channel
,
ddO
,
starts
,
ends
,
axes
);
}
else
if
(
!
is_sys_pad
&&
strides
.
size
()
==
3U
)
{
Slice
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
5
>
(
ctx
,
&
transformed_ddO_channel
,
ddO
,
starts
,
ends
,
axes
);
}
}
else
if
((
!
is_sys_pad
)
&&
(
channel_last
))
{
if
(
strides
.
size
()
==
2U
)
{
Slice
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
4
>
(
ctx
,
&
transformed_ddO_channel
,
&
transformed_ddO_channel
,
starts
,
ends
,
axes
);
}
else
if
(
!
is_sys_pad
&&
strides
.
size
()
==
3U
)
{
Slice
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
5
>
(
ctx
,
&
transformed_ddO_channel
,
&
transformed_ddO_channel
,
starts
,
ends
,
axes
);
}
TransToChannelLast
<
paddle
::
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
&
transformed_ddO_channel
,
ddO
);
}
}
T
*
transformed_dy_channel
=
transformed_dO
.
data
<
T
>
();
if
(
dW
&&
ddX
)
{
ddx
=
transformed_ddX_channel
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
wkspace_handle
.
RunFunc
(
[
&
](
void
*
workspace_ptr
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnConvolutionBackwardFilter
(
handle
,
&
alpha
,
args3
.
idesc
.
desc
(),
transformed_dy_channel
+
i
*
group_offset_out
,
args3
.
odesc
.
desc
(),
ddx
+
i
*
group_offset_in
,
args3
.
cdesc
.
desc
(),
filter_algo
,
workspace_ptr
,
workspace_size
,
&
beta
,
args3
.
wdesc
.
desc
(),
dw
+
i
*
group_offset_filter
));
},
workspace_size
);
}
}
if
(
dX
&&
ddW
)
{
ddw
=
ddW
->
data
<
T
>
();
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
wkspace_handle
.
RunFunc
(
[
&
](
void
*
workspace_ptr
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnConvolutionForward
(
handle
,
&
alpha
,
args4
.
idesc
.
desc
(),
transformed_dy_channel
+
i
*
group_offset_out
,
args4
.
wdesc
.
desc
(),
ddw
+
i
*
group_offset_filter
,
args4
.
cdesc
.
desc
(),
data_algo
,
workspace_ptr
,
workspace_size
,
&
beta
,
args4
.
odesc
.
desc
(),
transformed_dx
+
i
*
group_offset_in
));
},
workspace_size
);
}
if
(
channel_last
)
{
TransToChannelLast
<
paddle
::
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
&
transformed_dX_channel
,
dX
);
}
}
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -565,6 +1046,11 @@ REGISTER_OP_KERNEL(conv2d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace,
ops
::
CUDNNConvTransposeGradOpKernel
<
plat
::
float16
>
,
ops
::
CUDNNConvTransposeGradOpKernel
<
float
>
,
ops
::
CUDNNConvTransposeGradOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
conv2d_transpose_grad_grad
,
CUDNN
,
plat
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvTransposeDoubleGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvTransposeDoubleGradOpKernel
<
double
>
,
paddle
::
operators
::
CUDNNConvTransposeDoubleGradOpKernel
<
plat
::
float16
>
);
REGISTER_OP_KERNEL
(
conv3d_transpose
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
CUDNNConvTransposeOpKernel
<
plat
::
float16
>
,
...
...
paddle/fluid/operators/conv_transpose_op.cc
浏览文件 @
e5af650b
...
...
@@ -513,6 +513,85 @@ class ConvTransposeGradOpMaker : public framework::SingleGradOpMaker<T> {
}
};
/*
* Inputs: I, W, dO, ddI, ddW
* Outputs: ddO, dW, dI
*/
template
<
typename
T
>
class
ConvTransposeDoubleGradMaker
:
public
framework
::
SingleGradOpMaker
<
T
>
{
public:
using
framework
::
SingleGradOpMaker
<
T
>::
SingleGradOpMaker
;
void
Apply
(
GradOpPtr
<
T
>
op
)
const
override
{
op
->
SetType
(
this
->
ForwardOpType
()
+
"_grad"
);
// I, W, dO, ddI, ddW
op
->
SetInput
(
"Input"
,
this
->
Input
(
"Input"
));
op
->
SetInput
(
"Filter"
,
this
->
Input
(
"Filter"
));
op
->
SetInput
(
"DOutput"
,
this
->
Input
(
framework
::
GradVarName
(
"Output"
)));
op
->
SetInput
(
"DDInput"
,
this
->
OutputGrad
(
framework
::
GradVarName
(
"Input"
)));
op
->
SetInput
(
"DDFilter"
,
this
->
OutputGrad
(
framework
::
GradVarName
(
"Filter"
)));
// ddO, dI, dW
// Unlike grad op, double grad op does not use name@GRAD@GRAD
// as key of ops' inputs and outputs.
auto
ddx
=
this
->
OutputGrad
(
framework
::
GradVarName
(
"Input"
));
auto
ddw
=
this
->
OutputGrad
(
framework
::
GradVarName
(
"Filter"
));
op
->
SetOutput
(
"DDOutput"
,
ddx
.
empty
()
?
this
->
EmptyInputGrad
()
:
this
->
InputGrad
(
framework
::
GradVarName
(
"Output"
)));
op
->
SetOutput
(
"DFilter"
,
ddx
.
empty
()
?
this
->
EmptyInputGrad
()
:
this
->
InputGrad
(
"Filter"
));
op
->
SetOutput
(
"DInput"
,
ddw
.
empty
()
?
this
->
EmptyInputGrad
()
:
this
->
InputGrad
(
"Input"
));
op
->
SetAttrMap
(
this
->
Attrs
());
}
};
void
ConvTransposeOpDoubleGrad
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
auto
x_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
w_dims
=
ctx
->
GetInputDim
(
"Filter"
);
auto
do_dims
=
ctx
->
GetInputDim
(
"DOutput"
);
if
(
ctx
->
HasOutput
(
"DDOutput"
)
&&
(
ctx
->
HasInput
(
"DDInput"
)
||
(
ctx
->
HasInput
(
"DDFilter"
))))
{
ctx
->
SetOutputDim
(
"DDOutput"
,
do_dims
);
}
if
(
ctx
->
HasOutput
(
"DFilter"
)
&&
ctx
->
HasInput
(
"DDInput"
))
{
ctx
->
SetOutputDim
(
"DFilter"
,
w_dims
);
}
if
(
ctx
->
HasOutput
(
"DInput"
)
&&
ctx
->
HasInput
(
"DDFilter"
))
{
ctx
->
SetOutputDim
(
"DInput"
,
x_dims
);
}
}
framework
::
OpKernelType
ConvTransposeOpDoubleGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
bool
use_cudnn
=
ctx
.
Attr
<
bool
>
(
"use_cudnn"
);
use_cudnn
&=
platform
::
is_gpu_place
(
ctx
.
GetPlace
());
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
use_cudnn
&=
dev_ctx
.
cudnn_handle
()
!=
nullptr
;
}
#endif
framework
::
LibraryType
library_
;
if
(
use_cudnn
)
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
else
{
library_
=
framework
::
LibraryType
::
kPlain
;
}
framework
::
DataLayout
layout_
=
framework
::
DataLayout
::
kAnyLayout
;
return
framework
::
OpKernelType
(
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"Input"
),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
}
// namespace operators
}
// namespace paddle
...
...
@@ -523,7 +602,11 @@ REGISTER_OPERATOR(conv2d_transpose, ops::ConvTransposeOp,
ops
::
Conv2DTransposeOpMaker
,
ops
::
ConvTransposeGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
ConvTransposeGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
conv2d_transpose_grad
,
ops
::
ConvTransposeOpGrad
);
REGISTER_OPERATOR
(
conv2d_transpose_grad
,
ops
::
ConvTransposeOpGrad
,
ops
::
ConvTransposeDoubleGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
ConvTransposeDoubleGradMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
conv2d_transpose_grad_grad
,
ops
::
ConvTransposeOpDoubleGrad
);
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose
,
...
...
paddle/fluid/operators/conv_transpose_op.cu
浏览文件 @
e5af650b
...
...
@@ -24,6 +24,9 @@ REGISTER_OP_CUDA_KERNEL(conv2d_transpose,
REGISTER_OP_CUDA_KERNEL
(
conv2d_transpose_grad
,
ops
::
GemmConvTransposeGradKernel
<
CUDA
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
CUDA
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
conv2d_transpose_grad_grad
,
ops
::
GemmConvTransposeGradKernel
<
CUDA
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
CUDA
,
double
>
);
// conv3d
REGISTER_OP_CUDA_KERNEL
(
conv3d_transpose
,
...
...
paddle/fluid/operators/conv_transpose_op.h
浏览文件 @
e5af650b
...
...
@@ -114,6 +114,16 @@ class ConvTransposeOpGrad : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
class
ConvTransposeOpDoubleGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
template
<
typename
DeviceContext
,
typename
T
>
class
GemmConvTransposeKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
0 → 100644
浏览文件 @
e5af650b
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
import
paddle.fluid.core
as
core
import
gradient_checker
from
decorator_helper
import
prog_scope
class
TestConvTransposeDoubleGradCheck
(
unittest
.
TestCase
):
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
4
,
3
,
3
]
eps
=
0.005
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
y
=
layers
.
conv2d_transpose
(
x
,
2
,
filter_size
=
1
,
groups
=
1
,
bias_attr
=
False
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
w
=
fluid
.
default_main_program
().
global_block
().
all_parameters
()
w_arr
=
[]
for
p
in
w
:
w_arr
.
append
(
np
.
random
.
uniform
(
-
1
,
1
,
p
.
shape
).
astype
(
dtype
))
gradient_checker
.
double_grad_check
(
[
x
]
+
w
,
y
,
x_init
=
[
x_arr
]
+
w_arr
,
place
=
place
,
eps
=
eps
)
def
test_grad
(
self
):
places
=
[]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
for
p
in
places
:
self
.
func
(
p
)
class
TestConvTranspose2DoubleGradCheck_AsyPadding
(
TestConvTransposeDoubleGradCheck
):
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
2
,
3
,
3
]
eps
=
0.005
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
y
=
layers
.
conv2d_transpose
(
input
=
x
,
num_filters
=
2
,
filter_size
=
1
,
padding
=
[
1
,
0
,
0
,
1
],
bias_attr
=
False
,
use_cudnn
=
True
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
w
=
fluid
.
default_main_program
().
global_block
().
all_parameters
()
w_arr
=
[]
for
p
in
w
:
w_arr
.
append
(
np
.
random
.
uniform
(
-
1
,
1
,
p
.
shape
).
astype
(
dtype
))
gradient_checker
.
double_grad_check
(
[
x
]
+
w
,
y
,
x_init
=
[
x_arr
]
+
w_arr
,
place
=
place
,
eps
=
eps
)
class
TestConvTranspose2DoubleGradCheck_PaddingSAME
(
TestConvTransposeDoubleGradCheck
):
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
2
,
3
,
3
]
eps
=
0.005
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
y
=
layers
.
conv2d_transpose
(
input
=
x
,
num_filters
=
2
,
filter_size
=
1
,
padding
=
"SAME"
,
bias_attr
=
False
,
use_cudnn
=
True
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
w
=
fluid
.
default_main_program
().
global_block
().
all_parameters
()
w_arr
=
[]
for
p
in
w
:
w_arr
.
append
(
np
.
random
.
uniform
(
-
1
,
1
,
p
.
shape
).
astype
(
dtype
))
gradient_checker
.
double_grad_check
(
[
x
]
+
w
,
y
,
x_init
=
[
x_arr
]
+
w_arr
,
place
=
place
,
eps
=
eps
)
class
TestConvTranspose2DoubleGradCheck_PaddingVALID
(
TestConvTransposeDoubleGradCheck
):
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
2
,
3
,
3
]
eps
=
0.005
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
y
=
layers
.
conv2d_transpose
(
input
=
x
,
num_filters
=
2
,
filter_size
=
1
,
padding
=
"VALID"
,
bias_attr
=
False
,
use_cudnn
=
True
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
w
=
fluid
.
default_main_program
().
global_block
().
all_parameters
()
w_arr
=
[]
for
p
in
w
:
w_arr
.
append
(
np
.
random
.
uniform
(
-
1
,
1
,
p
.
shape
).
astype
(
dtype
))
gradient_checker
.
double_grad_check
(
[
x
]
+
w
,
y
,
x_init
=
[
x_arr
]
+
w_arr
,
place
=
place
,
eps
=
eps
)
class
TestConvTranspose2DoubleGradCheck_ChannelLast
(
TestConvTransposeDoubleGradCheck
):
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
3
,
2
]
eps
=
0.005
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
y
=
layers
.
conv2d_transpose
(
input
=
x
,
num_filters
=
2
,
filter_size
=
1
,
padding
=
[
1
,
1
],
bias_attr
=
False
,
use_cudnn
=
True
,
groups
=
1
,
data_format
=
"NHWC"
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
w
=
fluid
.
default_main_program
().
global_block
().
all_parameters
()
w_arr
=
[]
for
p
in
w
:
w_arr
.
append
(
np
.
random
.
uniform
(
-
1
,
1
,
p
.
shape
).
astype
(
dtype
))
gradient_checker
.
double_grad_check
(
[
x
]
+
w
,
y
,
x_init
=
[
x_arr
]
+
w_arr
,
place
=
place
,
eps
=
eps
)
if
__name__
==
"__main__"
:
unittest
.
main
()
tools/static_mode_white_list.py
浏览文件 @
e5af650b
...
...
@@ -108,6 +108,7 @@ STATIC_MODE_TESTING_LIST = [
'test_conv3d_transpose_layer'
,
'test_conv3d_transpose_part2_op'
,
'test_conv_nn_grad'
,
'test_conv_transpose_nn_grad'
,
'test_conv_shift_op'
,
'test_cos_sim_op'
,
'test_create_global_var'
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录