Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
e5af650b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e5af650b
编写于
12月 21, 2020
作者:
L
LielinJiang
提交者:
GitHub
12月 21, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add double grad for conv_transpose (#29706)
* add double grad for conv_transpose
上级
224f3bcb
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
743 addition
and
1 deletion
+743
-1
paddle/fluid/operators/conv_transpose_cudnn_op.cu
paddle/fluid/operators/conv_transpose_cudnn_op.cu
+486
-0
paddle/fluid/operators/conv_transpose_op.cc
paddle/fluid/operators/conv_transpose_op.cc
+84
-1
paddle/fluid/operators/conv_transpose_op.cu
paddle/fluid/operators/conv_transpose_op.cu
+3
-0
paddle/fluid/operators/conv_transpose_op.h
paddle/fluid/operators/conv_transpose_op.h
+10
-0
python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
...ddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
+159
-0
tools/static_mode_white_list.py
tools/static_mode_white_list.py
+1
-0
未找到文件。
paddle/fluid/operators/conv_transpose_cudnn_op.cu
浏览文件 @
e5af650b
...
...
@@ -551,6 +551,487 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
}
};
/*
* Inputs: I, W, dO, ddI, ddW
* Outputs: ddO, dW, dI
* ddo = conv_bp_data(W, ddI) + conv_bp_data(ddW, I)
* dW = conv_bp_filter(dO, ddI)
* dI = conv(dO, ddW)
*/
template
<
typename
T
>
class
CUDNNConvTransposeDoubleGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
auto
X
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
W
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
dO
=
ctx
.
Input
<
Tensor
>
(
"DOutput"
);
auto
ddX
=
ctx
.
Input
<
Tensor
>
(
"DDInput"
);
auto
ddW
=
ctx
.
Input
<
Tensor
>
(
"DDFilter"
);
auto
ddO
=
ctx
.
Output
<
Tensor
>
(
"DDOutput"
);
auto
dW
=
ctx
.
Output
<
Tensor
>
(
"DFilter"
);
auto
dX
=
ctx
.
Output
<
Tensor
>
(
"DInput"
);
if
(
ddO
)
{
ddO
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
platform
::
CUDADeviceContext
,
T
>
set_zero
;
set_zero
(
dev_ctx
,
ddO
,
static_cast
<
T
>
(
0
));
}
if
(
dW
)
{
dW
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
if
(
dX
)
{
dX
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
const
T
*
dy
=
dO
->
data
<
T
>
();
const
T
*
w
=
W
->
data
<
T
>
();
const
T
*
ddx
=
nullptr
;
const
T
*
ddw
=
nullptr
;
T
*
dw
,
*
dx
,
*
ddy
;
dw
=
dx
=
ddy
=
nullptr
;
T
*
transformed_dx
=
nullptr
;
const
std
::
vector
<
int
>&
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
bool
deterministic
=
FLAGS_cudnn_deterministic
;
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
const
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
const
bool
channel_last
=
(
data_format
==
"NHWC"
||
data_format
==
"NDHWC"
);
// transform Tensors to channel first-----------
Tensor
transformed_X_channel
(
X
->
type
());
Tensor
transformed_dO_channel
(
dO
->
type
());
Tensor
transformed_ddX_channel
(
X
->
type
());
Tensor
transformed_ddO_channel
(
dO
->
type
());
Tensor
transformed_dX_channel
(
X
->
type
());
if
(
channel_last
)
{
ResizeToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
X
,
&
transformed_X_channel
);
TransToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
X
,
&
transformed_X_channel
);
ResizeToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
dO
,
&
transformed_dO_channel
);
TransToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
dO
,
&
transformed_dO_channel
);
if
(
ddX
)
{
ResizeToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
ddX
,
&
transformed_ddX_channel
);
TransToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
ddX
,
&
transformed_ddX_channel
);
}
if
(
ddO
)
{
ResizeToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
ddO
,
&
transformed_ddO_channel
);
}
if
(
dX
)
{
ResizeToChannelFirst
<
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
dX
,
&
transformed_dX_channel
);
transformed_dX_channel
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
}
else
{
transformed_X_channel
=
*
X
;
transformed_dO_channel
=
*
dO
;
if
(
ddX
)
{
transformed_ddX_channel
=
*
ddX
;
}
if
(
dX
)
{
transformed_dX_channel
=
*
dX
;
}
}
std
::
vector
<
int
>
output_vec
=
framework
::
vectorize
<
int
>
(
transformed_dO_channel
.
dims
());
auto
in_dims
=
transformed_X_channel
.
dims
();
auto
filter_dims
=
W
->
dims
();
framework
::
DDim
in_data_dims
=
framework
::
slice_ddim
(
in_dims
,
2
,
in_dims
.
size
());
framework
::
DDim
filter_data_dims
=
framework
::
slice_ddim
(
filter_dims
,
2
,
filter_dims
.
size
());
std
::
vector
<
int
>
ksize
=
framework
::
vectorize
<
int
>
(
filter_data_dims
);
UpdatePaddingAndDilation
(
&
paddings
,
&
dilations
,
padding_algorithm
,
in_data_dims
,
strides
,
ksize
);
int
data_dim
=
strides
.
size
();
// 2d or 3d
bool
is_sys_pad
=
math
::
IsSymmetricPadding
(
paddings
,
data_dim
);
Tensor
transformed_X
(
X
->
type
());
Tensor
transformed_ddX
(
X
->
type
());
Tensor
transformed_dO
(
dO
->
type
());
std
::
vector
<
int
>
padding_common
(
data_dim
,
0
);
std
::
vector
<
int
>
input_pad
(
X
->
dims
().
size
()
*
2
,
0
);
if
(
!
is_sys_pad
)
{
// get pad
std
::
vector
<
int
>
padding_diff
(
data_dim
);
std
::
vector
<
int
>
new_input_shape_vec
(
data_dim
+
2
);
std
::
vector
<
int
>
new_output_grad_shape_vec
(
data_dim
+
2
);
new_input_shape_vec
[
0
]
=
transformed_X_channel
.
dims
()[
0
];
new_input_shape_vec
[
1
]
=
transformed_X_channel
.
dims
()[
1
];
new_output_grad_shape_vec
[
0
]
=
transformed_dO_channel
.
dims
()[
0
];
new_output_grad_shape_vec
[
1
]
=
transformed_dO_channel
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
data_dim
;
++
i
)
{
padding_diff
[
i
]
=
std
::
abs
(
paddings
[
2
*
i
]
-
paddings
[
2
*
i
+
1
]);
padding_common
[
i
]
=
std
::
min
(
paddings
[
2
*
i
],
paddings
[
2
*
i
+
1
]);
new_input_shape_vec
[
i
+
2
]
=
transformed_X_channel
.
dims
()[
i
+
2
]
+
padding_diff
[
i
];
new_output_grad_shape_vec
[
i
+
2
]
=
transformed_dO_channel
.
dims
()[
i
+
2
]
+
padding_diff
[
i
];
input_pad
[
2
*
i
+
4
]
=
paddings
[
2
*
i
]
-
padding_common
[
i
];
input_pad
[
2
*
i
+
4
+
1
]
=
paddings
[
2
*
i
+
1
]
-
padding_common
[
i
];
}
framework
::
DDim
new_input_shape
(
framework
::
make_ddim
(
new_input_shape_vec
));
transformed_X
.
Resize
(
new_input_shape
);
transformed_ddX
.
Resize
(
new_input_shape
);
framework
::
DDim
new_output_grad_shape
(
framework
::
make_ddim
(
new_output_grad_shape_vec
));
transformed_dO
.
Resize
(
new_output_grad_shape
);
transformed_dO
=
ctx
.
AllocateTmpTensor
<
T
,
paddle
::
platform
::
CUDADeviceContext
>
(
new_output_grad_shape
,
dev_ctx
);
transformed_X
=
ctx
.
AllocateTmpTensor
<
T
,
paddle
::
platform
::
CUDADeviceContext
>
(
new_input_shape
,
dev_ctx
);
if
(
ddX
)
{
transformed_ddX
=
ctx
.
AllocateTmpTensor
<
T
,
paddle
::
platform
::
CUDADeviceContext
>
(
new_input_shape
,
dev_ctx
);
}
// pad for input
const
int
rank
=
X
->
dims
().
size
();
T
pad_value
(
0.0
);
switch
(
rank
)
{
case
4
:
{
math
::
PadFunction
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
4
>
(
ctx
,
input_pad
,
transformed_X_channel
,
pad_value
,
&
transformed_X
);
if
(
dO
)
{
math
::
PadFunction
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
4
>
(
ctx
,
input_pad
,
transformed_dO_channel
,
pad_value
,
&
transformed_dO
);
}
if
(
ddX
)
{
math
::
PadFunction
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
4
>
(
ctx
,
input_pad
,
transformed_ddX_channel
,
pad_value
,
&
transformed_ddX
);
}
}
break
;
case
5
:
{
math
::
PadFunction
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
5
>
(
ctx
,
input_pad
,
transformed_X_channel
,
pad_value
,
&
transformed_X
);
if
(
ddX
)
{
math
::
PadFunction
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
5
>
(
ctx
,
input_pad
,
transformed_ddX_channel
,
pad_value
,
&
transformed_ddX
);
}
}
break
;
default:
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"ConvOp only support tensors with 4 or 5 dimensions."
));
}
}
else
{
transformed_X
=
transformed_X_channel
;
transformed_dO
=
transformed_dO_channel
;
if
(
ddX
)
{
transformed_ddX
=
transformed_ddX_channel
;
}
if
(
paddings
.
size
()
==
data_dim
)
{
for
(
size_t
i
=
0
;
i
<
data_dim
;
++
i
)
{
padding_common
[
i
]
=
paddings
[
i
];
}
}
else
{
for
(
size_t
i
=
0
;
i
<
data_dim
;
++
i
)
{
padding_common
[
i
]
=
paddings
[
2
*
i
];
}
}
}
std
::
vector
<
int64_t
>
starts
(
data_dim
,
0
);
std
::
vector
<
int64_t
>
ends
(
data_dim
,
0
);
std
::
vector
<
int64_t
>
axes
(
data_dim
,
0
);
for
(
size_t
i
=
0
;
i
<
data_dim
;
++
i
)
{
starts
[
i
]
=
input_pad
[
2
*
i
+
4
]
*
(
strides
[
i
]
+
1
);
ends
[
i
]
=
starts
[
i
]
+
output_vec
[
i
+
2
];
axes
[
i
]
=
i
+
2
;
}
std
::
vector
<
int
>
transformed_output_vec
=
output_vec
;
for
(
size_t
i
=
0
;
i
<
data_dim
;
++
i
)
{
transformed_output_vec
[
i
+
2
]
=
output_vec
[
i
+
2
]
+
(
input_pad
[
2
*
i
+
4
]
+
input_pad
[
2
*
i
+
5
])
*
strides
[
i
]
-
2
*
padding_common
[
i
]
+
paddings
[
2
*
i
]
+
paddings
[
2
*
i
+
1
];
}
if
(
!
is_sys_pad
)
{
DDim
transformed_output_shape
(
framework
::
make_ddim
(
transformed_output_vec
));
transformed_ddO_channel
.
mutable_data
<
T
>
(
transformed_output_shape
,
ctx
.
GetPlace
());
}
else
{
ddO
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
transformed_ddO_channel
=
*
ddO
;
transformed_ddO_channel
.
Resize
(
framework
::
make_ddim
(
transformed_output_vec
));
}
const
T
*
x
=
transformed_X
.
data
<
T
>
();
int
iwo_group
=
groups
;
int
c_group
=
1
;
#if CUDNN_VERSION_MIN(7, 0, 1)
iwo_group
=
1
;
c_group
=
groups
;
groups
=
1
;
#endif
auto
dtype
=
platform
::
CudnnDataType
<
T
>::
type
;
auto
handle
=
dev_ctx
.
cudnn_handle
();
ConvArgs
args1
{
&
transformed_ddO_channel
,
W
,
&
transformed_ddX
,
strides
,
padding_common
,
dilations
,
dtype
};
ConvArgs
args2
{
&
transformed_ddO_channel
,
ddW
,
&
transformed_X
,
strides
,
padding_common
,
dilations
,
dtype
};
ConvArgs
args3
{
&
transformed_dO
,
dW
,
&
transformed_ddX_channel
,
strides
,
padding_common
,
dilations
,
dtype
};
ConvArgs
args4
{
&
transformed_dO
,
ddW
,
&
transformed_dX_channel
,
strides
,
padding_common
,
dilations
,
dtype
};
cudnnConvolutionBwdDataAlgo_t
bwd_algo1
=
static_cast
<
cudnnConvolutionBwdDataAlgo_t
>
(
0
);
cudnnConvolutionBwdDataAlgo_t
bwd_algo2
=
static_cast
<
cudnnConvolutionBwdDataAlgo_t
>
(
0
);
cudnnConvolutionFwdAlgo_t
data_algo
=
static_cast
<
cudnnConvolutionFwdAlgo_t
>
(
0
);
cudnnConvolutionBwdFilterAlgo_t
filter_algo
=
static_cast
<
cudnnConvolutionBwdFilterAlgo_t
>
(
0
);
auto
layout
=
GetCudnnTensorFormat
(
DataLayout
::
kNCHW
);
// ddo = conv(ddI, W) + conv(I, ddW)
size_t
workspace_size
=
0
;
T
*
transformed_ddy_channel
=
nullptr
;
if
(
ddO
)
{
ddy
=
ddO
->
data
<
T
>
();
transformed_ddy_channel
=
transformed_ddO_channel
.
data
<
T
>
();
if
(
ddX
)
{
args1
.
handle
=
handle
;
args1
.
idesc
.
set
(
transformed_ddO_channel
,
iwo_group
);
args1
.
wdesc
.
set
(
*
W
,
layout
,
iwo_group
);
args1
.
odesc
.
set
(
transformed_ddX
,
iwo_group
);
args1
.
cdesc
.
set
(
dtype
,
padding_common
,
strides
,
dilations
,
c_group
);
using
search1
=
SearchAlgorithm
<
cudnnConvolutionBwdDataAlgoPerf_t
>
;
bwd_algo1
=
search1
::
Find
<
T
>
(
args1
,
false
,
deterministic
,
ctx
);
workspace_size
=
search1
::
GetWorkspaceSize
(
args1
,
bwd_algo1
);
}
if
(
ddW
)
{
ddw
=
ddW
->
data
<
T
>
();
args2
.
handle
=
handle
;
args2
.
idesc
.
set
(
transformed_ddO_channel
,
iwo_group
);
args2
.
wdesc
.
set
(
*
ddW
,
layout
,
iwo_group
);
args2
.
odesc
.
set
(
transformed_X
,
iwo_group
);
args2
.
cdesc
.
set
(
dtype
,
padding_common
,
strides
,
dilations
,
c_group
);
using
search2
=
SearchAlgorithm
<
cudnnConvolutionBwdDataAlgoPerf_t
>
;
bwd_algo2
=
search2
::
Find
<
T
>
(
args2
,
false
,
deterministic
,
ctx
);
workspace_size
=
std
::
max
(
workspace_size
,
search2
::
GetWorkspaceSize
(
args2
,
bwd_algo2
));
}
}
if
(
dW
&&
ddX
)
{
dw
=
dW
->
data
<
T
>
();
args3
.
handle
=
handle
;
args3
.
idesc
.
set
(
transformed_dO
,
iwo_group
);
args3
.
wdesc
.
set
(
*
dW
,
layout
,
iwo_group
);
args3
.
odesc
.
set
(
transformed_ddX_channel
,
iwo_group
);
args3
.
cdesc
.
set
(
dtype
,
padding_common
,
strides
,
dilations
,
c_group
);
using
search3
=
SearchAlgorithm
<
cudnnConvolutionBwdFilterAlgoPerf_t
>
;
filter_algo
=
search3
::
Find
<
T
>
(
args3
,
false
,
deterministic
,
ctx
);
workspace_size
=
std
::
max
(
workspace_size
,
search3
::
GetWorkspaceSize
(
args3
,
filter_algo
));
}
if
(
ddW
&&
dX
)
{
transformed_dx
=
transformed_dX_channel
.
data
<
T
>
();
args4
.
handle
=
handle
;
args4
.
idesc
.
set
(
transformed_dO
,
iwo_group
);
args4
.
wdesc
.
set
(
*
ddW
,
layout
,
iwo_group
);
args4
.
odesc
.
set
(
transformed_dX_channel
,
iwo_group
);
args4
.
cdesc
.
set
(
dtype
,
padding_common
,
strides
,
dilations
,
c_group
);
using
search4
=
SearchAlgorithm
<
cudnnConvolutionFwdAlgoPerf_t
>
;
data_algo
=
search4
::
Find
<
T
>
(
args4
,
false
,
deterministic
,
ctx
);
workspace_size
=
std
::
max
(
workspace_size
,
search4
::
GetWorkspaceSize
(
args4
,
data_algo
));
}
int
i_n
,
i_c
,
i_d
,
i_h
,
i_w
;
GetNCDHW
(
transformed_X
.
dims
(),
DataLayout
::
kNCHW
,
&
i_n
,
&
i_c
,
&
i_d
,
&
i_h
,
&
i_w
);
int
o_n
,
o_c
,
o_d
,
o_h
,
o_w
;
GetNCDHW
(
transformed_dO
.
dims
(),
DataLayout
::
kNCHW
,
&
o_n
,
&
o_c
,
&
o_d
,
&
o_h
,
&
o_w
);
int
group_offset_in
=
transformed_X
.
numel
()
/
transformed_X
.
dims
()[
0
]
/
groups
;
int
group_offset_out
=
transformed_dO
.
numel
()
/
transformed_dO
.
dims
()[
0
]
/
groups
;
int
group_offset_filter
=
W
->
numel
()
/
groups
;
ScalingParamType
<
T
>
alpha
=
1.0
f
;
ScalingParamType
<
T
>
beta
=
0.0
f
;
auto
wkspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
if
(
ddO
)
{
if
(
ddX
)
{
ddx
=
transformed_ddX
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
wkspace_handle
.
RunFunc
(
[
&
](
void
*
workspace_ptr
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnConvolutionBackwardData
(
handle
,
&
alpha
,
args1
.
wdesc
.
desc
(),
w
+
i
*
group_offset_filter
,
args1
.
odesc
.
desc
(),
ddx
+
i
*
group_offset_in
,
args1
.
cdesc
.
desc
(),
bwd_algo1
,
workspace_ptr
,
workspace_size
,
&
beta
,
args1
.
idesc
.
desc
(),
transformed_ddy_channel
+
i
*
group_offset_out
));
},
workspace_size
);
}
}
if
(
ddW
)
{
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
wkspace_handle
.
RunFunc
(
[
&
](
void
*
workspace_ptr
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnConvolutionBackwardData
(
handle
,
&
alpha
,
args2
.
wdesc
.
desc
(),
ddw
+
i
*
group_offset_filter
,
args2
.
odesc
.
desc
(),
x
+
i
*
group_offset_in
,
args2
.
cdesc
.
desc
(),
bwd_algo2
,
workspace_ptr
,
workspace_size
,
&
alpha
,
args2
.
idesc
.
desc
(),
transformed_ddy_channel
+
i
*
group_offset_out
));
},
workspace_size
);
}
}
if
((
!
is_sys_pad
)
&&
(
!
channel_last
))
{
if
(
strides
.
size
()
==
2U
)
{
Slice
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
4
>
(
ctx
,
&
transformed_ddO_channel
,
ddO
,
starts
,
ends
,
axes
);
}
else
if
(
!
is_sys_pad
&&
strides
.
size
()
==
3U
)
{
Slice
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
5
>
(
ctx
,
&
transformed_ddO_channel
,
ddO
,
starts
,
ends
,
axes
);
}
}
else
if
((
!
is_sys_pad
)
&&
(
channel_last
))
{
if
(
strides
.
size
()
==
2U
)
{
Slice
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
4
>
(
ctx
,
&
transformed_ddO_channel
,
&
transformed_ddO_channel
,
starts
,
ends
,
axes
);
}
else
if
(
!
is_sys_pad
&&
strides
.
size
()
==
3U
)
{
Slice
<
paddle
::
platform
::
CUDADeviceContext
,
T
,
5
>
(
ctx
,
&
transformed_ddO_channel
,
&
transformed_ddO_channel
,
starts
,
ends
,
axes
);
}
TransToChannelLast
<
paddle
::
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
&
transformed_ddO_channel
,
ddO
);
}
}
T
*
transformed_dy_channel
=
transformed_dO
.
data
<
T
>
();
if
(
dW
&&
ddX
)
{
ddx
=
transformed_ddX_channel
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
wkspace_handle
.
RunFunc
(
[
&
](
void
*
workspace_ptr
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnConvolutionBackwardFilter
(
handle
,
&
alpha
,
args3
.
idesc
.
desc
(),
transformed_dy_channel
+
i
*
group_offset_out
,
args3
.
odesc
.
desc
(),
ddx
+
i
*
group_offset_in
,
args3
.
cdesc
.
desc
(),
filter_algo
,
workspace_ptr
,
workspace_size
,
&
beta
,
args3
.
wdesc
.
desc
(),
dw
+
i
*
group_offset_filter
));
},
workspace_size
);
}
}
if
(
dX
&&
ddW
)
{
ddw
=
ddW
->
data
<
T
>
();
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
wkspace_handle
.
RunFunc
(
[
&
](
void
*
workspace_ptr
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnConvolutionForward
(
handle
,
&
alpha
,
args4
.
idesc
.
desc
(),
transformed_dy_channel
+
i
*
group_offset_out
,
args4
.
wdesc
.
desc
(),
ddw
+
i
*
group_offset_filter
,
args4
.
cdesc
.
desc
(),
data_algo
,
workspace_ptr
,
workspace_size
,
&
beta
,
args4
.
odesc
.
desc
(),
transformed_dx
+
i
*
group_offset_in
));
},
workspace_size
);
}
if
(
channel_last
)
{
TransToChannelLast
<
paddle
::
platform
::
CUDADeviceContext
,
T
>
(
ctx
,
&
transformed_dX_channel
,
dX
);
}
}
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -565,6 +1046,11 @@ REGISTER_OP_KERNEL(conv2d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace,
ops
::
CUDNNConvTransposeGradOpKernel
<
plat
::
float16
>
,
ops
::
CUDNNConvTransposeGradOpKernel
<
float
>
,
ops
::
CUDNNConvTransposeGradOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
conv2d_transpose_grad_grad
,
CUDNN
,
plat
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvTransposeDoubleGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvTransposeDoubleGradOpKernel
<
double
>
,
paddle
::
operators
::
CUDNNConvTransposeDoubleGradOpKernel
<
plat
::
float16
>
);
REGISTER_OP_KERNEL
(
conv3d_transpose
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
CUDNNConvTransposeOpKernel
<
plat
::
float16
>
,
...
...
paddle/fluid/operators/conv_transpose_op.cc
浏览文件 @
e5af650b
...
...
@@ -513,6 +513,85 @@ class ConvTransposeGradOpMaker : public framework::SingleGradOpMaker<T> {
}
};
/*
* Inputs: I, W, dO, ddI, ddW
* Outputs: ddO, dW, dI
*/
template
<
typename
T
>
class
ConvTransposeDoubleGradMaker
:
public
framework
::
SingleGradOpMaker
<
T
>
{
public:
using
framework
::
SingleGradOpMaker
<
T
>::
SingleGradOpMaker
;
void
Apply
(
GradOpPtr
<
T
>
op
)
const
override
{
op
->
SetType
(
this
->
ForwardOpType
()
+
"_grad"
);
// I, W, dO, ddI, ddW
op
->
SetInput
(
"Input"
,
this
->
Input
(
"Input"
));
op
->
SetInput
(
"Filter"
,
this
->
Input
(
"Filter"
));
op
->
SetInput
(
"DOutput"
,
this
->
Input
(
framework
::
GradVarName
(
"Output"
)));
op
->
SetInput
(
"DDInput"
,
this
->
OutputGrad
(
framework
::
GradVarName
(
"Input"
)));
op
->
SetInput
(
"DDFilter"
,
this
->
OutputGrad
(
framework
::
GradVarName
(
"Filter"
)));
// ddO, dI, dW
// Unlike grad op, double grad op does not use name@GRAD@GRAD
// as key of ops' inputs and outputs.
auto
ddx
=
this
->
OutputGrad
(
framework
::
GradVarName
(
"Input"
));
auto
ddw
=
this
->
OutputGrad
(
framework
::
GradVarName
(
"Filter"
));
op
->
SetOutput
(
"DDOutput"
,
ddx
.
empty
()
?
this
->
EmptyInputGrad
()
:
this
->
InputGrad
(
framework
::
GradVarName
(
"Output"
)));
op
->
SetOutput
(
"DFilter"
,
ddx
.
empty
()
?
this
->
EmptyInputGrad
()
:
this
->
InputGrad
(
"Filter"
));
op
->
SetOutput
(
"DInput"
,
ddw
.
empty
()
?
this
->
EmptyInputGrad
()
:
this
->
InputGrad
(
"Input"
));
op
->
SetAttrMap
(
this
->
Attrs
());
}
};
void
ConvTransposeOpDoubleGrad
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
auto
x_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
w_dims
=
ctx
->
GetInputDim
(
"Filter"
);
auto
do_dims
=
ctx
->
GetInputDim
(
"DOutput"
);
if
(
ctx
->
HasOutput
(
"DDOutput"
)
&&
(
ctx
->
HasInput
(
"DDInput"
)
||
(
ctx
->
HasInput
(
"DDFilter"
))))
{
ctx
->
SetOutputDim
(
"DDOutput"
,
do_dims
);
}
if
(
ctx
->
HasOutput
(
"DFilter"
)
&&
ctx
->
HasInput
(
"DDInput"
))
{
ctx
->
SetOutputDim
(
"DFilter"
,
w_dims
);
}
if
(
ctx
->
HasOutput
(
"DInput"
)
&&
ctx
->
HasInput
(
"DDFilter"
))
{
ctx
->
SetOutputDim
(
"DInput"
,
x_dims
);
}
}
framework
::
OpKernelType
ConvTransposeOpDoubleGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
bool
use_cudnn
=
ctx
.
Attr
<
bool
>
(
"use_cudnn"
);
use_cudnn
&=
platform
::
is_gpu_place
(
ctx
.
GetPlace
());
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
use_cudnn
&=
dev_ctx
.
cudnn_handle
()
!=
nullptr
;
}
#endif
framework
::
LibraryType
library_
;
if
(
use_cudnn
)
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
else
{
library_
=
framework
::
LibraryType
::
kPlain
;
}
framework
::
DataLayout
layout_
=
framework
::
DataLayout
::
kAnyLayout
;
return
framework
::
OpKernelType
(
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"Input"
),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
}
// namespace operators
}
// namespace paddle
...
...
@@ -523,7 +602,11 @@ REGISTER_OPERATOR(conv2d_transpose, ops::ConvTransposeOp,
ops
::
Conv2DTransposeOpMaker
,
ops
::
ConvTransposeGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
ConvTransposeGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
conv2d_transpose_grad
,
ops
::
ConvTransposeOpGrad
);
REGISTER_OPERATOR
(
conv2d_transpose_grad
,
ops
::
ConvTransposeOpGrad
,
ops
::
ConvTransposeDoubleGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
ConvTransposeDoubleGradMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
conv2d_transpose_grad_grad
,
ops
::
ConvTransposeOpDoubleGrad
);
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose
,
...
...
paddle/fluid/operators/conv_transpose_op.cu
浏览文件 @
e5af650b
...
...
@@ -24,6 +24,9 @@ REGISTER_OP_CUDA_KERNEL(conv2d_transpose,
REGISTER_OP_CUDA_KERNEL
(
conv2d_transpose_grad
,
ops
::
GemmConvTransposeGradKernel
<
CUDA
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
CUDA
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
conv2d_transpose_grad_grad
,
ops
::
GemmConvTransposeGradKernel
<
CUDA
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
CUDA
,
double
>
);
// conv3d
REGISTER_OP_CUDA_KERNEL
(
conv3d_transpose
,
...
...
paddle/fluid/operators/conv_transpose_op.h
浏览文件 @
e5af650b
...
...
@@ -114,6 +114,16 @@ class ConvTransposeOpGrad : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
class
ConvTransposeOpDoubleGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
template
<
typename
DeviceContext
,
typename
T
>
class
GemmConvTransposeKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
0 → 100644
浏览文件 @
e5af650b
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
import
paddle.fluid.core
as
core
import
gradient_checker
from
decorator_helper
import
prog_scope
class
TestConvTransposeDoubleGradCheck
(
unittest
.
TestCase
):
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
4
,
3
,
3
]
eps
=
0.005
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
y
=
layers
.
conv2d_transpose
(
x
,
2
,
filter_size
=
1
,
groups
=
1
,
bias_attr
=
False
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
w
=
fluid
.
default_main_program
().
global_block
().
all_parameters
()
w_arr
=
[]
for
p
in
w
:
w_arr
.
append
(
np
.
random
.
uniform
(
-
1
,
1
,
p
.
shape
).
astype
(
dtype
))
gradient_checker
.
double_grad_check
(
[
x
]
+
w
,
y
,
x_init
=
[
x_arr
]
+
w_arr
,
place
=
place
,
eps
=
eps
)
def
test_grad
(
self
):
places
=
[]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
for
p
in
places
:
self
.
func
(
p
)
class
TestConvTranspose2DoubleGradCheck_AsyPadding
(
TestConvTransposeDoubleGradCheck
):
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
2
,
3
,
3
]
eps
=
0.005
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
y
=
layers
.
conv2d_transpose
(
input
=
x
,
num_filters
=
2
,
filter_size
=
1
,
padding
=
[
1
,
0
,
0
,
1
],
bias_attr
=
False
,
use_cudnn
=
True
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
w
=
fluid
.
default_main_program
().
global_block
().
all_parameters
()
w_arr
=
[]
for
p
in
w
:
w_arr
.
append
(
np
.
random
.
uniform
(
-
1
,
1
,
p
.
shape
).
astype
(
dtype
))
gradient_checker
.
double_grad_check
(
[
x
]
+
w
,
y
,
x_init
=
[
x_arr
]
+
w_arr
,
place
=
place
,
eps
=
eps
)
class
TestConvTranspose2DoubleGradCheck_PaddingSAME
(
TestConvTransposeDoubleGradCheck
):
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
2
,
3
,
3
]
eps
=
0.005
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
y
=
layers
.
conv2d_transpose
(
input
=
x
,
num_filters
=
2
,
filter_size
=
1
,
padding
=
"SAME"
,
bias_attr
=
False
,
use_cudnn
=
True
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
w
=
fluid
.
default_main_program
().
global_block
().
all_parameters
()
w_arr
=
[]
for
p
in
w
:
w_arr
.
append
(
np
.
random
.
uniform
(
-
1
,
1
,
p
.
shape
).
astype
(
dtype
))
gradient_checker
.
double_grad_check
(
[
x
]
+
w
,
y
,
x_init
=
[
x_arr
]
+
w_arr
,
place
=
place
,
eps
=
eps
)
class
TestConvTranspose2DoubleGradCheck_PaddingVALID
(
TestConvTransposeDoubleGradCheck
):
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
2
,
3
,
3
]
eps
=
0.005
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
y
=
layers
.
conv2d_transpose
(
input
=
x
,
num_filters
=
2
,
filter_size
=
1
,
padding
=
"VALID"
,
bias_attr
=
False
,
use_cudnn
=
True
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
w
=
fluid
.
default_main_program
().
global_block
().
all_parameters
()
w_arr
=
[]
for
p
in
w
:
w_arr
.
append
(
np
.
random
.
uniform
(
-
1
,
1
,
p
.
shape
).
astype
(
dtype
))
gradient_checker
.
double_grad_check
(
[
x
]
+
w
,
y
,
x_init
=
[
x_arr
]
+
w_arr
,
place
=
place
,
eps
=
eps
)
class
TestConvTranspose2DoubleGradCheck_ChannelLast
(
TestConvTransposeDoubleGradCheck
):
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
3
,
2
]
eps
=
0.005
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
y
=
layers
.
conv2d_transpose
(
input
=
x
,
num_filters
=
2
,
filter_size
=
1
,
padding
=
[
1
,
1
],
bias_attr
=
False
,
use_cudnn
=
True
,
groups
=
1
,
data_format
=
"NHWC"
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
w
=
fluid
.
default_main_program
().
global_block
().
all_parameters
()
w_arr
=
[]
for
p
in
w
:
w_arr
.
append
(
np
.
random
.
uniform
(
-
1
,
1
,
p
.
shape
).
astype
(
dtype
))
gradient_checker
.
double_grad_check
(
[
x
]
+
w
,
y
,
x_init
=
[
x_arr
]
+
w_arr
,
place
=
place
,
eps
=
eps
)
if
__name__
==
"__main__"
:
unittest
.
main
()
tools/static_mode_white_list.py
浏览文件 @
e5af650b
...
...
@@ -108,6 +108,7 @@ STATIC_MODE_TESTING_LIST = [
'test_conv3d_transpose_layer'
,
'test_conv3d_transpose_part2_op'
,
'test_conv_nn_grad'
,
'test_conv_transpose_nn_grad'
,
'test_conv_shift_op'
,
'test_cos_sim_op'
,
'test_create_global_var'
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录