Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
206f32c1
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
206f32c1
编写于
11月 06, 2017
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
deconv2d kernel and deconv3d kernel write together
上级
0f1b30ef
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
111 addition
and
257 deletion
+111
-257
paddle/operators/conv2d_transpose_cudnn_op.cc
paddle/operators/conv2d_transpose_cudnn_op.cc
+2
-2
paddle/operators/conv_transpose_op.cc
paddle/operators/conv_transpose_op.cc
+4
-4
paddle/operators/conv_transpose_op.cu
paddle/operators/conv_transpose_op.cu
+4
-4
paddle/operators/conv_transpose_op.h
paddle/operators/conv_transpose_op.h
+101
-247
未找到文件。
paddle/operators/conv2d_transpose_cudnn_op.cc
浏览文件 @
206f32c1
...
...
@@ -44,7 +44,7 @@ REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp,
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose_cudnn
,
ops
::
GemmConv
2D
TransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose_cudnn_grad
,
ops
::
GemmConv
2D
TransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/conv_transpose_op.cc
浏览文件 @
206f32c1
...
...
@@ -187,17 +187,17 @@ REGISTER_OP(conv2d_transpose, ops::ConvTransposeOp, ops::Conv2DTransposeOpMaker,
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose
,
ops
::
GemmConv
2D
TransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose_grad
,
ops
::
GemmConv
2D
TransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP
(
conv3d_transpose
,
ops
::
ConvTransposeOp
,
ops
::
Conv3DTransposeOpMaker
,
conv3d_transpose_grad
,
ops
::
ConvTransposeOpGrad
);
REGISTER_OP_CPU_KERNEL
(
conv3d_transpose
,
ops
::
GemmConv
3D
TransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
conv3d_transpose_grad
,
ops
::
GemmConv
3D
TransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/conv_transpose_op.cu
浏览文件 @
206f32c1
...
...
@@ -18,14 +18,14 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL
(
conv2d_transpose
,
ops
::
GemmConv
2D
TransposeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
conv2d_transpose_grad
,
ops
::
GemmConv
2D
TransposeGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
conv3d_transpose
,
ops
::
GemmConv
3D
TransposeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
conv3d_transpose_grad
,
ops
::
GemmConv
3D
TransposeGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
paddle/operators/conv_transpose_op.h
浏览文件 @
206f32c1
...
...
@@ -57,7 +57,7 @@ class ConvTransposeOpGrad : public framework::OperatorWithKernel {
};
template
<
typename
Place
,
typename
T
>
class
GemmConv
2D
TransposeKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GemmConvTransposeKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
Tensor
*
input
=
context
.
Input
<
Tensor
>
(
"Input"
);
...
...
@@ -70,24 +70,31 @@ class GemmConv2DTransposeKernel : public framework::OpKernel<T> {
// groups will alway be disabled in conv2dtranspose.
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
const
int64_t
m
=
input
->
dims
()[
1
];
const
int64_t
h
=
input
->
dims
()[
2
];
const
int64_t
w
=
input
->
dims
()[
3
];
const
int64_t
k_h
=
filter
.
dims
()[
2
];
const
int64_t
k_w
=
filter
.
dims
()[
3
];
const
int64_t
c
=
output
->
dims
()[
1
];
// output channels
const
int64_t
o_h
=
output
->
dims
()[
2
];
const
int64_t
o_w
=
output
->
dims
()[
3
];
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
col2im
;
// use col_shape in the im2col and col2im calculation
DDim
col_shape
=
{
c
,
k_h
,
k_w
,
h
,
w
};
// input_shape_vec: {h, w} or {d, h, w}
std
::
vector
<
int64_t
>
input_shape_vec
=
framework
::
vectorize
(
input
->
dims
());
input_shape_vec
.
erase
(
input_shape_vec
.
begin
(),
input_shape_vec
.
begin
()
+
2
);
// filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w}
std
::
vector
<
int64_t
>
filter_shape_vec
=
framework
::
vectorize
(
filter
.
dims
());
filter_shape_vec
.
erase
(
filter_shape_vec
.
begin
(),
filter_shape_vec
.
begin
()
+
2
);
// use col_shape in the im2col and col2im (or vol2col and col2vol)
// calculation
// col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w}
std
::
vector
<
int64_t
>
col_shape_vec
;
col_shape_vec
.
push_back
(
output
->
dims
()[
1
]);
col_shape_vec
.
insert
(
col_shape_vec
.
end
(),
filter_shape_vec
.
begin
(),
filter_shape_vec
.
end
());
col_shape_vec
.
insert
(
col_shape_vec
.
end
(),
input_shape_vec
.
begin
(),
input_shape_vec
.
end
());
DDim
col_shape
(
framework
::
make_ddim
(
col_shape_vec
));
// use col_matrix_shape in the gemm calculation
DDim
col_matrix_shape
=
{
c
*
k_h
*
k_w
,
h
*
w
};
// size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
DDim
col_matrix_shape
=
framework
::
flatten_to_2d
(
col_shape
,
filter_shape_vec
.
size
()
+
1
);
Tensor
col
;
col
.
mutable_data
<
T
>
(
col_shape
,
context
.
GetPlace
());
...
...
@@ -98,47 +105,61 @@ class GemmConv2DTransposeKernel : public framework::OpKernel<T> {
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
DDim
output_shape
=
{
c
,
o_h
,
o_w
};
DDim
input_matrix_shape
=
{
m
,
h
*
w
};
// output size: (c, o_h, o_w) or (c, o_d, o_h, o_w)
DDim
output_shape
=
framework
::
slice_ddim
(
output
->
dims
(),
1
,
output
->
dims
().
size
());
// input matrix size: (m, h * w) or (m, d * h * w)
DDim
input_matrix_shape
=
{
input
->
dims
()[
1
],
col_matrix_shape
[
1
]};
// filter size: (m, c * k_h * k_w)
DDim
filter_matrix_shape
=
{
m
,
c
*
k_h
*
k_w
};
// filter size: (m, c * k_h * k_w)
or (m, c * k_d * k_h * k_w)
DDim
filter_matrix_shape
=
{
input
->
dims
()[
1
],
col_matrix_shape
[
0
]
};
filter
.
Resize
(
filter_matrix_shape
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
context
.
device_context
(),
output
,
static_cast
<
T
>
(
0
));
// convolution transpose: gemm + col2im (similar to conv-backward on input)
// convolution transpose: gemm + col2im or col2vol (similar to conv-backward
// on input)
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
// batch with size (m, h * w)
// batch with size (m, h * w)
or (m, d * h * w)
Tensor
input_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_matrix_shape
);
// output size: (c, o_h, o_w)
// output size: (c, o_h, o_w)
or (c, o_d, o_h, o_w)
Tensor
output_batch
=
output
->
Slice
(
i
,
i
+
1
).
Resize
(
output_shape
);
// col_matrix = filter * input_batch
// of shape (c * k_h * k_w, h * w)
// of shape (c * k_h * k_w, h * w)
or (c * k_d * k_h * k_w, d * h * w)
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
filter
,
true
,
input_batch
,
false
,
static_cast
<
T
>
(
1.0
),
&
col_matrix
,
static_cast
<
T
>
(
0.0
));
if
(
filter_shape_vec
.
size
()
==
2
)
{
// col2im: col_matrix -> dy
// from (c * k_h * k_w, h * w) to (c, o_h, o_w)
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
col2im
;
col2im
(
context
.
device_context
(),
output_batch
,
col
,
strides
[
0
],
strides
[
1
],
0
,
0
,
0
,
0
);
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
// col2vol: col_matrix -> dy
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
math
::
Col2VolFunctor
<
Place
,
T
>
col2vol
;
col2vol
(
context
.
device_context
(),
output_batch
,
col
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
0
,
0
,
0
);
}
}
}
};
template
<
typename
Place
,
typename
T
>
class
GemmConv
2D
TransposeGradKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GemmConvTransposeGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
Tensor
*
input
=
context
.
Input
<
Tensor
>
(
"Input"
);
const
Tensor
*
output_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
// For filter, we do not use const pointer b/c we will do reshape,
// but we should avoid modifying its value.
Tensor
filter
=
*
context
.
Input
<
Tensor
>
(
"Filter"
);
...
...
@@ -147,38 +168,50 @@ class GemmConv2DTransposeGradKernel : public framework::OpKernel<T> {
Tensor
*
filter_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Filter"
));
if
((
!
input_grad
)
&&
(
!
filter_grad
))
return
;
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
// Actually, no paddings and groups allowed in conv transpose.
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
const
int64_t
m
=
input
->
dims
()[
1
];
const
int64_t
h
=
input
->
dims
()[
2
];
const
int64_t
w
=
input
->
dims
()[
3
];
const
int64_t
k_h
=
filter
.
dims
()[
2
];
const
int64_t
k_w
=
filter
.
dims
()[
3
];
const
int64_t
c
=
output_grad
->
dims
()[
1
];
// output channels
const
int64_t
o_h
=
output_grad
->
dims
()[
2
];
const
int64_t
o_w
=
output_grad
->
dims
()[
3
];
// input_shape_vec: {h, w} or {d, h, w}
std
::
vector
<
int64_t
>
input_shape_vec
=
framework
::
vectorize
(
input
->
dims
());
input_shape_vec
.
erase
(
input_shape_vec
.
begin
(),
input_shape_vec
.
begin
()
+
2
);
// filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w}
std
::
vector
<
int64_t
>
filter_shape_vec
=
framework
::
vectorize
(
filter
.
dims
());
filter_shape_vec
.
erase
(
filter_shape_vec
.
begin
(),
filter_shape_vec
.
begin
()
+
2
);
// use col_shape in the im2col and col2im (or vol2col and col2vol)
// calculation
// col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w}
std
::
vector
<
int64_t
>
col_shape_vec
;
col_shape_vec
.
push_back
(
output_grad
->
dims
()[
1
]);
col_shape_vec
.
insert
(
col_shape_vec
.
end
(),
filter_shape_vec
.
begin
(),
filter_shape_vec
.
end
());
col_shape_vec
.
insert
(
col_shape_vec
.
end
(),
input_shape_vec
.
begin
(),
input_shape_vec
.
end
());
DDim
col_shape
(
framework
::
make_ddim
(
col_shape_vec
));
// Only im2col functor required for bp to get to the right shape
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
// use col_matrix_shape in the gemm calculation
// size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
DDim
col_matrix_shape
=
framework
::
flatten_to_2d
(
col_shape
,
filter_shape_vec
.
size
()
+
1
);
// use col_shape in the im2col and col2im calculation
DDim
col_shape
=
{
c
,
k_h
,
k_w
,
h
,
w
};
// output size: (c, o_h, o_w) or (c, o_d, o_h, o_w)
DDim
output_shape
=
framework
::
slice_ddim
(
output_grad
->
dims
(),
1
,
output_grad
->
dims
().
size
());
DDim
output_shape
=
{
c
,
o_h
,
o_w
};
DDim
input_matrix_shape
=
{
m
,
h
*
w
};
// input matrix size: (m, h * w) or (m, d * h * w)
DDim
input_matrix_shape
=
{
input
->
dims
()[
1
],
col_matrix_shape
[
1
]
};
DDim
filter_matrix_shape
=
{
m
,
c
*
k_h
*
k_w
};
// filter size: (m, c * k_h * k_w) or (m, c * k_d * k_h * k_w)
DDim
filter_matrix_shape
=
{
input
->
dims
()[
1
],
col_matrix_shape
[
0
]};
filter
.
Resize
(
filter_matrix_shape
);
if
((
!
input_grad
)
&&
(
!
filter_grad
))
{
return
;
}
// convolution transpose grad on input:
// im2col + gemm (similar to conv-forward)
// input need to compute gradient
...
...
@@ -190,7 +223,6 @@ class GemmConv2DTransposeGradKernel : public framework::OpKernel<T> {
// to call the matrix multiplication interface.
Tensor
col_matrix
;
col_matrix
.
ShareDataWith
(
col
);
DDim
col_matrix_shape
=
{
c
*
k_h
*
k_w
,
h
*
w
};
col_matrix
.
Resize
(
col_matrix_shape
);
Tensor
filter_grad_
;
...
...
@@ -212,208 +244,29 @@ class GemmConv2DTransposeGradKernel : public framework::OpKernel<T> {
Tensor
output_grad_batch
=
output_grad
->
Slice
(
i
,
i
+
1
).
Resize
(
output_shape
);
if
(
filter_shape_vec
.
size
()
==
2
)
{
// im2col: dy -> col matrix
// from (c, o_h, o_w) to (c * k_h * k_w, h * w)
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
im2col
(
context
.
device_context
(),
output_grad_batch
,
col
,
strides
[
0
],
strides
[
1
],
paddings
[
0
],
paddings
[
0
],
paddings
[
1
],
paddings
[
1
]);
if
(
input_grad
)
{
// batch with size (m, h, w)
Tensor
input_grad_batch
=
input_grad
->
Slice
(
i
,
i
+
1
).
Resize
(
input_matrix_shape
);
// gemm: dx = filter * dy
// (m, c * k_h * k_w) * (c * k_h * k_w, h * w) -> (m, h * w)
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
filter
,
false
,
col_matrix
,
false
,
static_cast
<
T
>
(
1.0
),
&
input_grad_batch
,
static_cast
<
T
>
(
0.0
));
}
if
(
filter_grad
)
{
// input batch
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_matrix_shape
);
// gemm: d_filter = x * dy^T
// (m, c * h * w) * (k_h * k_w, c * h * w) -> (m, k_h * k_w)
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
in_batch
,
false
,
col_matrix
,
true
,
static_cast
<
T
>
(
1.0
),
&
filter_grad_
,
static_cast
<
T
>
(
1.0
));
}
}
}
}
};
template
<
typename
Place
,
typename
T
>
class
GemmConv3DTransposeKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
Tensor
*
input
=
context
.
Input
<
Tensor
>
(
"Input"
);
// The filter will be reshaped, so it should not be constant pointer
Tensor
filter
=
*
context
.
Input
<
Tensor
>
(
"Filter"
);
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
// TODO(chengduo): Paddings can be added in future.
// groups will alway be disabled in conv3dtranspose.
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
const
int64_t
m
=
input
->
dims
()[
1
];
const
int64_t
d
=
input
->
dims
()[
2
];
const
int64_t
h
=
input
->
dims
()[
3
];
const
int64_t
w
=
input
->
dims
()[
4
];
const
int64_t
k_d
=
filter
.
dims
()[
2
];
const
int64_t
k_h
=
filter
.
dims
()[
3
];
const
int64_t
k_w
=
filter
.
dims
()[
4
];
const
int64_t
c
=
output
->
dims
()[
1
];
// output channels
const
int64_t
o_d
=
output
->
dims
()[
2
];
const
int64_t
o_h
=
output
->
dims
()[
3
];
const
int64_t
o_w
=
output
->
dims
()[
4
];
math
::
Col2VolFunctor
<
Place
,
T
>
col2vol
;
// use col_shape in the vol2col and col2vol calculation
DDim
col_shape
=
{
c
,
k_d
,
k_h
,
k_w
,
d
,
h
,
w
};
// use col_matrix_shape in the gemm calculation
DDim
col_matrix_shape
=
{
c
*
k_d
*
k_h
*
k_w
,
d
*
h
*
w
};
Tensor
col
;
col
.
mutable_data
<
T
>
(
col_shape
,
context
.
GetPlace
());
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor
col_matrix
;
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
DDim
output_shape
=
{
c
,
o_d
,
o_h
,
o_w
};
DDim
input_matrix_shape
=
{
m
,
d
*
h
*
w
};
// filter size: (m, c * k_d * k_h * k_w)
DDim
filter_matrix_shape
=
{
m
,
c
*
k_d
*
k_h
*
k_w
};
filter
.
Resize
(
filter_matrix_shape
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
context
.
device_context
(),
output
,
static_cast
<
T
>
(
0
));
// convolution transpose: gemm + col2vol (similar to conv-backward on input)
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
// batch with size (m, d * h * w)
Tensor
input_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_matrix_shape
);
// output size: (c, o_d, o_h, o_w)
Tensor
output_batch
=
output
->
Slice
(
i
,
i
+
1
).
Resize
(
output_shape
);
// col_matrix = filter * input_batch
// of shape (c * k_d * k_h * k_w, d * h * w)
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
filter
,
true
,
input_batch
,
false
,
static_cast
<
T
>
(
1.0
),
&
col_matrix
,
static_cast
<
T
>
(
0.0
));
// col2vol: col_matrix -> dy
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
col2vol
(
context
.
device_context
(),
output_batch
,
col
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
0
,
0
,
0
);
}
}
};
template
<
typename
Place
,
typename
T
>
class
GemmConv3DTransposeGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
Tensor
*
input
=
context
.
Input
<
Tensor
>
(
"Input"
);
const
Tensor
*
output_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
// For filter, we do not use const pointer b/c we will do reshape,
// but we should avoid modifying its value.
Tensor
filter
=
*
context
.
Input
<
Tensor
>
(
"Filter"
);
Tensor
*
input_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
Tensor
*
filter_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Filter"
));
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
// Actually, no paddings and groups allowed in conv transpose.
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
const
int64_t
m
=
input
->
dims
()[
1
];
const
int64_t
d
=
input
->
dims
()[
2
];
const
int64_t
h
=
input
->
dims
()[
3
];
const
int64_t
w
=
input
->
dims
()[
4
];
const
int64_t
k_d
=
filter
.
dims
()[
2
];
const
int64_t
k_h
=
filter
.
dims
()[
3
];
const
int64_t
k_w
=
filter
.
dims
()[
4
];
const
int64_t
c
=
output_grad
->
dims
()[
1
];
// output channels
const
int64_t
o_d
=
output_grad
->
dims
()[
2
];
const
int64_t
o_h
=
output_grad
->
dims
()[
3
];
const
int64_t
o_w
=
output_grad
->
dims
()[
4
];
// Only vol2col functor required for bp to get to the right shape
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
// use col_shape in the vol2col and col2vol calculation
DDim
col_shape
=
{
c
,
k_d
,
k_h
,
k_w
,
d
,
h
,
w
};
// use col_matrix_shape in the gemm calculation
DDim
col_matrix_shape_f
=
{
c
*
d
*
h
*
w
,
k_d
*
k_h
*
k_w
};
DDim
output_shape
=
{
c
,
o_d
,
o_h
,
o_w
};
DDim
input_matrix_shape
=
{
m
,
d
*
h
*
w
};
DDim
filter_matrix_shape
=
{
m
,
c
*
k_d
*
k_h
*
k_w
};
filter
.
Resize
(
filter_matrix_shape
);
if
((
!
input_grad
)
&&
(
!
filter_grad
))
{
return
;
}
// convolution transpose grad on input:
// vol2col + gemm (similar to conv-forward)
// input need to compute gradient
if
(
input_grad
||
filter_grad
)
{
Tensor
col
;
col
.
mutable_data
<
T
>
(
col_shape
,
context
.
GetPlace
());
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor
col_matrix
;
col_matrix
.
ShareDataWith
(
col
);
DDim
col_matrix_shape
=
{
c
*
k_d
*
k_h
*
k_w
,
d
*
h
*
w
};
col_matrix
.
Resize
(
col_matrix_shape
);
Tensor
filter_grad_
;
math
::
SetConstant
<
Place
,
T
>
set_zero
;
if
(
input_grad
)
{
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
set_zero
(
context
.
device_context
(),
input_grad
,
static_cast
<
T
>
(
0
));
}
if
(
filter_grad
)
{
// filter size (m, c * k_d * k_h * k_w)
filter_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
set_zero
(
context
.
device_context
(),
filter_grad
,
static_cast
<
T
>
(
0
));
filter_grad_
=
*
filter_grad
;
filter_grad_
.
Resize
(
filter_matrix_shape
);
}
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
// batch with size (c, o_d * o_h * o_w)
Tensor
output_grad_batch
=
output_grad
->
Slice
(
i
,
i
+
1
).
Resize
(
output_shape
);
strides
[
1
],
paddings
[
0
],
paddings
[
0
],
paddings
[
1
],
paddings
[
1
]);
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
// vol2col: dy -> col_matrix
// from (c, o_d, o_h, o_w) to (c * k_d * k_h * k_w, d * h * w)
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
vol2col
(
context
.
device_context
(),
output_grad_batch
,
col
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
}
if
(
input_grad
)
{
// batch with size (m,
d,
h, w)
// batch with size (m, h, w)
Tensor
input_grad_batch
=
input_grad
->
Slice
(
i
,
i
+
1
).
Resize
(
input_matrix_shape
);
// gemm: dx = filter * dy
// (m, c * k_h * k_w) * (c * k_h * k_w, h * w) -> (m, h * w)
// or
// (m, c * k_d * k_h * k_w) * (c * k_d * k_h * k_w, d * h * w) -> (m,
// d, h, w)
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
filter
,
false
,
...
...
@@ -424,6 +277,8 @@ class GemmConv3DTransposeGradKernel : public framework::OpKernel<T> {
// input batch
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_matrix_shape
);
// gemm: d_filter = x * dy^T
// (m, c * h * w) * (k_h * k_w, c * h * w) -> (m, k_h * k_w)
// or
// (m, d * h * w) * (d * h * w, c * k_d * k_h * k_w) -> (m, c * k_d *
// k_h * k_w)
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
in_batch
,
false
,
...
...
@@ -434,6 +289,5 @@ class GemmConv3DTransposeGradKernel : public framework::OpKernel<T> {
}
}
};
}
// namespace operators
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录