Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
4fc9f55e
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4fc9f55e
编写于
11月 15, 2017
作者:
C
chengduo
提交者:
GitHub
11月 15, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5472 from chengduoZH/refine_im2col
Add dilations for conv2d and optimize conv2d code
上级
09866fb7
00e0881b
变更
17
显示空白变更内容
内联
并排
Showing
17 changed file
with
944 addition
and
635 deletion
+944
-635
paddle/operators/conv_cudnn_op.cc
paddle/operators/conv_cudnn_op.cc
+0
-2
paddle/operators/conv_op.cc
paddle/operators/conv_op.cc
+40
-14
paddle/operators/conv_op.h
paddle/operators/conv_op.h
+79
-46
paddle/operators/conv_transpose_op.cc
paddle/operators/conv_transpose_op.cc
+12
-9
paddle/operators/conv_transpose_op.h
paddle/operators/conv_transpose_op.h
+24
-19
paddle/operators/math/context_project.h
paddle/operators/math/context_project.h
+29
-24
paddle/operators/math/im2col.cc
paddle/operators/math/im2col.cc
+142
-151
paddle/operators/math/im2col.cu
paddle/operators/math/im2col.cu
+210
-193
paddle/operators/math/im2col.h
paddle/operators/math/im2col.h
+17
-7
paddle/operators/math/im2col_test.cc
paddle/operators/math/im2col_test.cc
+12
-12
paddle/operators/math/vol2col.cc
paddle/operators/math/vol2col.cc
+83
-38
paddle/operators/math/vol2col.cu
paddle/operators/math/vol2col.cu
+120
-58
paddle/operators/math/vol2col.h
paddle/operators/math/vol2col.h
+19
-8
paddle/operators/math/vol2col_test.cc
paddle/operators/math/vol2col_test.cc
+11
-9
paddle/operators/sequence_conv_op.h
paddle/operators/sequence_conv_op.h
+11
-11
python/paddle/v2/fluid/tests/test_conv2d_op.py
python/paddle/v2/fluid/tests/test_conv2d_op.py
+72
-13
python/paddle/v2/fluid/tests/test_conv3d_op.py
python/paddle/v2/fluid/tests/test_conv3d_op.py
+63
-21
未找到文件。
paddle/operators/conv_cudnn_op.cc
浏览文件 @
4fc9f55e
...
...
@@ -22,8 +22,6 @@ class CudnnConvOpMaker : public Conv2DOpMaker {
CudnnConvOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
Conv2DOpMaker
(
proto
,
op_checker
)
{
AddAttr
<
std
::
vector
<
int
>>
(
"dilations"
,
"dilations of convolution operator."
)
.
SetDefault
(
std
::
vector
<
int
>
{
1
,
1
});
AddAttr
<
int
>
(
"workspace_size_MB"
,
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
...
...
paddle/operators/conv_op.cc
浏览文件 @
4fc9f55e
...
...
@@ -30,6 +30,7 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
int
groups
=
ctx
->
Attrs
().
Get
<
int
>
(
"groups"
);
std
::
vector
<
int
>
dilations
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"dilations"
);
int
input_channels
=
in_dims
[
1
];
int
output_channels
=
filter_dims
[
0
];
...
...
@@ -52,9 +53,15 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
"The number of output channels should be divided by groups."
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
0
]});
for
(
size_t
i
=
0
;
i
<
paddings
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
in_dims
[
i
+
2
]
+
2
*
paddings
[
i
]
-
(
dilations
[
i
]
*
(
filter_dims
[
i
+
2
]
-
1
)
+
1
)
>
0
,
"Due to the settings of paddings, filter_dims and "
"dilations, the output size is less than 0, please check "
"again."
);
output_shape
.
push_back
(
OutputSize
(
in_dims
[
i
+
2
],
filter_dims
[
i
+
2
],
paddings
[
i
],
strides
[
i
]));
dilations
[
i
],
paddings
[
i
],
strides
[
i
]));
}
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
}
...
...
@@ -78,9 +85,15 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
AddOutput
(
"Output"
,
"(Tensor) The output tensor of convolution operator. "
"The format of output tensor is also NCHW."
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"strides of convolution operator."
)
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector<int> default:{1, 1}), the "
"strides(h_stride, w_stride) of "
"convolution operator."
)
.
SetDefault
({
1
,
1
});
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"paddings of convolution operator."
)
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"(vector<int> default:{0, 0}), the "
"paddings(h_pad, w_pad) of "
"convolution operator."
)
.
SetDefault
({
0
,
0
});
AddAttr
<
int
>
(
"groups"
,
...
...
@@ -90,15 +103,20 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
"first half of the input channels, while the second half of the filters "
"is only connected to the second half of the input channels."
)
.
SetDefault
(
1
);
AddAttr
<
std
::
vector
<
int
>>
(
"dilations"
,
"(vector<int> default:{1, 1}), the "
"dilations(h_dilation, w_dilation) of "
"convolution operator."
)
.
SetDefault
({
1
,
1
});
AddComment
(
R"DOC(
Convolution Operator.
The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
and strides, paddings, groups
, dilations
parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch
size, C is the number of channels, H is the height of the feature, and W is
the width of the feature. Parameters(ksize, strides, paddings) are two elements.
the width of the feature. Parameters(ksize, strides, paddings
, dilations
) are two elements.
These two elements represent height and width, respectively.
The input(X) size and output(Out) size may be different.
...
...
@@ -109,8 +127,8 @@ Example:
Output:
Output shape: (N, C_out, H_out, W_out)
where
H_out = (H_in
- filter_size[0] + 2 * paddings[0]
) / strides[0] + 1;
W_out = (W_in
- filter_size[1] + 2 * paddings[1]
) / strides[1] + 1;
H_out = (H_in
+ 2 * paddings[0] - (dilations[0]*(filter_size[0] - 1) + 1)
) / strides[0] + 1;
W_out = (W_in
+ 2 * paddings[1] - (dilations[1]*(filter_size[1] - 1) + 1)
) / strides[1] + 1;
)DOC"
);
}
...
...
@@ -135,13 +153,15 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto,
AddOutput
(
"Output"
,
"(Tensor) The output tensor of convolution operator."
"The format of output tensor is also NCDHW."
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector, default:{0, 0, 0}), the strides of convolution operator."
)
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector<int>, default:{1, 1, 1}), the "
"strides(d_stride, h_stride, w_stride) of "
"convolution operator."
)
.
SetDefault
({
1
,
1
,
1
});
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"(vector, default:{0, 0, 0}), the paddings of convolution operator."
)
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"(vector<int>, default:{0, 0, 0}), the "
"paddings(d_pad, h_pad, w_pad) of convolution "
"operator."
)
.
SetDefault
({
0
,
0
,
0
});
AddAttr
<
int
>
(
"groups"
,
...
...
@@ -151,6 +171,12 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto,
"first half of the input channels, while the second half of the filters "
"is only connected to the second half of the input channels."
)
.
SetDefault
(
1
);
AddAttr
<
std
::
vector
<
int
>>
(
"dilations"
,
"(vector<int> default:{1, 1, 1}), the "
"dilations(d_dilation, h_dilation, w_dilation) of "
"convolution operator. Currently, conv3d doesn't "
"support dilation."
)
.
SetDefault
({
1
,
1
,
1
});
AddComment
(
R"DOC(
Convolution3D Operator.
...
...
paddle/operators/conv_op.h
浏览文件 @
4fc9f55e
...
...
@@ -27,11 +27,24 @@ using Tensor = framework::Tensor;
// Base convolution operator definations for other conv
// like operators to reuse the implementation.
inline
int
OutputSize
(
int
input_size
,
int
filter_size
,
int
padding
,
int
stride
)
{
int
output_size
=
(
input_size
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
inline
int
OutputSize
(
int
input_size
,
int
filter_size
,
int
dilation
,
int
padding
,
int
stride
)
{
const
int
dkernel
=
dilation
*
(
filter_size
-
1
)
+
1
;
const
int
output_size
=
(
input_size
+
2
*
padding
-
dkernel
)
/
stride
+
1
;
return
output_size
;
}
inline
bool
IsExpand
(
std
::
vector
<
int64_t
>&
filter_dim
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
dilations
)
{
bool
filter_1
=
true
,
strides_1
=
true
,
padding_0
=
true
,
dilation_1
=
true
;
for
(
size_t
j
=
0
;
j
<
strides
.
size
();
++
j
)
{
filter_1
=
filter_1
&&
(
static_cast
<
int
>
(
filter_dim
[
j
])
==
1
);
strides_1
=
strides_1
&&
(
strides
[
j
]
==
1
);
padding_0
=
padding_0
&&
(
paddings
[
j
]
==
0
);
dilation_1
=
dilation_1
&&
(
dilations
[
j
]
==
1
);
}
return
!
(
filter_1
&&
strides_1
&&
padding_0
&&
dilation_1
);
}
// Define Op classes in .h file so that other conv
// operator implementations can reuse the code.
...
...
@@ -50,14 +63,12 @@ class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker {
class
ConvOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
class
ConvOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
...
...
@@ -73,9 +84,10 @@ class GemmConvKernel : public framework::OpKernel<T> {
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
groups
=
context
.
Attr
<
int
>
(
"groups"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
int
groups
=
context
.
Attr
<
int
>
(
"group
s"
);
std
::
vector
<
int
>
dilations
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dilation
s"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
...
...
@@ -106,14 +118,17 @@ class GemmConvKernel : public framework::OpKernel<T> {
framework
::
DDim
col_matrix_shape
=
framework
::
flatten_to_2d
(
col_shape
,
filter_shape_vec
.
size
()
+
1
);
bool
is_expand
=
IsExpand
(
filter_shape_vec
,
strides
,
paddings
,
dilations
);
Tensor
col
;
col
.
mutable_data
<
T
>
(
col_shape
,
context
.
GetPlace
());
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor
col_matrix
;
if
(
is_expand
)
{
col
.
mutable_data
<
T
>
(
col_shape
,
context
.
GetPlace
());
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
static_cast
<
int
>
(
input
->
dims
().
size
()));
...
...
@@ -130,24 +145,30 @@ class GemmConvKernel : public framework::OpKernel<T> {
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output
->
dims
()[
1
])
/
groups
;
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
Tensor
in_slice
=
in_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
if
(
filter_shape_vec
.
size
()
==
2
)
{
if
(
!
is_expand
)
{
col
.
ShareDataWith
(
in_slice
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
else
if
(
filter_shape_vec
.
size
()
==
2
)
{
// im2col
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
im2col
(
context
.
device_context
(),
in_slice
,
col
,
stride
s
[
0
],
strides
[
1
],
paddings
[
0
],
paddings
[
0
],
paddings
[
1
]
,
paddings
[
1
]
);
im2col
(
context
.
device_context
(),
in_slice
,
dilations
,
strides
,
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
padding
s
[
0
],
paddings
[
1
]}
,
&
col
);
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
// vol2col
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
vol2col
(
context
.
device_context
(),
in_slice
,
col
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
vol2col
(
context
.
device_context
(),
in_slice
,
dilations
,
strides
,
paddings
,
&
col
);
}
// gemm
...
...
@@ -178,9 +199,10 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
if
(
!
input_grad
&&
!
filter_grad
)
return
;
int
groups
=
context
.
Attr
<
int
>
(
"groups"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
int
groups
=
context
.
Attr
<
int
>
(
"group
s"
);
std
::
vector
<
int
>
dilations
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dilation
s"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
...
...
@@ -230,14 +252,17 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output_grad
->
dims
()[
1
])
/
groups
;
bool
is_expand
=
IsExpand
(
filter_shape_vec
,
strides
,
paddings
,
dilations
);
Tensor
col
;
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor
col_matrix
;
if
(
is_expand
)
{
col
.
mutable_data
<
T
>
(
col_shape
,
context
.
GetPlace
());
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
math
::
SetConstant
<
Place
,
T
>
set_zero
;
...
...
@@ -245,6 +270,9 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
set_zero
(
context
.
device_context
(),
input_grad
,
static_cast
<
T
>
(
0
));
math
::
Col2VolFunctor
<
Place
,
T
>
col2vol
;
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
col2im
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
out_grad_batch
=
output_grad
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
...
...
@@ -254,24 +282,26 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
Tensor
out_grad_slice
=
out_grad_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
filter_slice
,
true
,
out_grad_slice
,
false
,
T
(
1.0
),
&
col_matrix
,
T
(
0.0
));
// col2im
Tensor
in_grad_slice
=
in_grad_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
if
(
filter_shape_vec
.
size
()
==
2
)
{
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
col2im
;
col2im
(
context
.
device_context
(),
in_grad_slice
,
col
,
strides
[
0
],
strides
[
1
],
paddings
[
0
],
paddings
[
0
],
paddings
[
1
],
paddings
[
1
]);
if
(
!
is_expand
)
{
col_matrix
.
ShareDataWith
(
in_grad_slice
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
filter_slice
,
true
,
out_grad_slice
,
false
,
T
(
1.0
),
&
col_matrix
,
T
(
0.0
));
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
math
::
Col2VolFunctor
<
Place
,
T
>
col2vol
;
col2vol
(
context
.
device_context
(),
in_grad_slice
,
col
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
if
(
is_expand
&&
filter_shape_vec
.
size
()
==
2
)
{
col2im
(
context
.
device_context
(),
col
,
dilations
,
strides
,
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]},
&
in_grad_slice
);
}
else
if
(
is_expand
&&
filter_shape_vec
.
size
()
==
3
)
{
col2vol
(
context
.
device_context
(),
col
,
dilations
,
strides
,
paddings
,
&
in_grad_slice
);
}
}
}
...
...
@@ -282,7 +312,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
Tensor
filter_grad_
=
*
filter_grad
;
filter_grad_
.
Resize
(
filter_matrix_shape
);
set_zero
(
context
.
device_context
(),
filter_grad
,
static_cast
<
T
>
(
0
));
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
out_grad_batch
=
output_grad
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
...
...
@@ -293,16 +324,18 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
out_grad_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
in_slice
=
in_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
if
(
filter_shape_vec
.
size
()
==
2
)
{
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
im2col
(
context
.
device_context
(),
in_slice
,
col
,
strides
[
0
],
strides
[
1
],
paddings
[
0
],
paddings
[
0
],
paddings
[
1
],
paddings
[
1
]);
if
(
!
is_expand
)
{
col
.
ShareDataWith
(
in_slice
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
else
if
(
filter_shape_vec
.
size
()
==
2
)
{
im2col
(
context
.
device_context
(),
in_slice
,
dilations
,
strides
,
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]},
&
col
);
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
vol2col
(
context
.
device_context
(),
in_slice
,
col
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
vol2col
(
context
.
device_context
(),
in_slice
,
dilations
,
strides
,
paddings
,
&
col
);
}
// gemm
...
...
paddle/operators/conv_transpose_op.cc
浏览文件 @
4fc9f55e
...
...
@@ -51,7 +51,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
"as the number of filters."
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
1
]});
for
(
size_t
i
=
0
;
i
<
padding
s
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
stride
s
.
size
();
++
i
)
{
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
+
filter_dims
[
i
+
2
]);
}
...
...
@@ -79,11 +79,13 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
"The format of output tensor is also NCHW."
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector defalut:{1, 1}), strides of convolution transpose operator."
)
"(vector<int> defalut:{1, 1}), the strides(h_stride, w_stride) of "
"convolution transpose operator."
)
.
SetDefault
({
1
,
1
});
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"(vector defalut:{0, 0}), paddings of convolution transpose operator."
)
"(vector<int> defalut:{0, 0}), the paddings(h_pad, w_pad) of convolution "
"transpose operator."
)
.
SetDefault
({
0
,
0
});
AddComment
(
R"DOC(
Convolution2D Transpose Operator.
...
...
@@ -132,13 +134,14 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(
"Where N is batch size, C is "
"the number of channels, D is the depth of the feature, H is the "
"height of the feature, and W is the width of the feature."
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector defalut:{1, 1, 1}), strides of convolution transpose operator."
)
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector<int> defalut:{1, 1, 1}), the "
"strides{d_stride, h_stride, w_stride} of "
"convolution transpose operator."
)
.
SetDefault
({
1
,
1
,
1
});
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"(vector defalut:{0, 0, 0}), paddings
of convolution transpose operator."
)
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"(vector<int> defalut:{0, 0, 0}), paddings(d_pad, "
"h_pad, w_pad)
of convolution transpose operator."
)
.
SetDefault
({
0
,
0
,
0
});
AddComment
(
R"DOC(
Convolution3D Transpose Operator.
...
...
paddle/operators/conv_transpose_op.h
浏览文件 @
4fc9f55e
...
...
@@ -43,16 +43,12 @@ class Conv3DTransposeOpMaker : public framework::OpProtoAndCheckerMaker {
class
ConvTransposeOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
class
ConvTransposeOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
...
...
@@ -66,6 +62,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
// Actually, no paddings and groups allowed in conv transpose.
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
// TODO(Zhuoyuan): Paddings can be added in future.
// groups will alway be disabled in conv2dtranspose.
...
...
@@ -120,6 +118,10 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
context
.
device_context
(),
output
,
static_cast
<
T
>
(
0
));
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
col2im
;
math
::
Col2VolFunctor
<
Place
,
T
>
col2vol
;
std
::
vector
<
int
>
dilations
({
1
,
1
,
1
});
// convolution transpose: gemm + col2im or col2vol (similar to conv-backward
// on input)
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
...
...
@@ -138,16 +140,16 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
if
(
filter_shape_vec
.
size
()
==
2
)
{
// col2im: col_matrix -> dy
// from (c * k_h * k_w, h * w) to (c, o_h, o_w)
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
col2im
;
col2im
(
context
.
device_context
(),
output_batch
,
col
,
strides
[
0
],
strides
[
1
],
0
,
0
,
0
,
0
);
col2im
(
context
.
device_context
(),
col
,
std
::
vector
<
int
>
{
dilations
[
0
],
dilations
[
1
]},
strides
,
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]},
&
output_batch
);
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
// col2vol: col_matrix -> dy
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
math
::
Col2VolFunctor
<
Place
,
T
>
col2vol
;
col2vol
(
context
.
device_context
(),
output_batch
,
col
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
0
,
0
,
0
);
col2vol
(
context
.
device_context
(),
col
,
dilations
,
strides
,
std
::
vector
<
int
>
{
0
,
0
,
0
},
&
output_batch
);
}
}
}
...
...
@@ -228,6 +230,10 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
Tensor
filter_grad_
;
math
::
SetConstant
<
Place
,
T
>
set_zero
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
std
::
vector
<
int
>
dilations
({
1
,
1
,
1
});
if
(
input_grad
)
{
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
set_zero
(
context
.
device_context
(),
input_grad
,
static_cast
<
T
>
(
0
));
...
...
@@ -247,17 +253,16 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
if
(
filter_shape_vec
.
size
()
==
2
)
{
// im2col: dy -> col matrix
// from (c, o_h, o_w) to (c * k_h * k_w, h * w)
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
im2col
(
context
.
device_context
(),
output_grad_batch
,
col
,
strides
[
0
],
strides
[
1
],
paddings
[
0
],
paddings
[
0
],
paddings
[
1
],
paddings
[
1
]);
im2col
(
context
.
device_context
(),
output_grad_batch
,
std
::
vector
<
int
>
{
dilations
[
0
],
dilations
[
1
]},
strides
,
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]},
&
col
);
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
// vol2col: dy -> col_matrix
// from (c, o_d, o_h, o_w) to (c * k_d * k_h * k_w, d * h * w)
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
vol2col
(
context
.
device_context
(),
output_grad_batch
,
col
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
vol2col
(
context
.
device_context
(),
output_grad_batch
,
dilations
,
strides
,
paddings
,
&
col
);
}
if
(
input_grad
)
{
...
...
paddle/operators/math/context_project.h
浏览文件 @
4fc9f55e
...
...
@@ -88,13 +88,18 @@ template <typename Place, typename T>
class
ContextProjectFunctor
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
LoDTensor
&
in
,
const
Tensor
&
padding_data
,
Tensor
&
col
,
bool
padding_trainable
,
int
context_start
,
int
context_length
,
int
context_stride
,
int
up_pad
,
int
down_pad
)
{
const
Tensor
&
padding_data
,
bool
padding_trainable
,
const
int
context_start
,
const
int
context_length
,
const
int
context_stride
,
const
int
up_pad
,
const
int
down_pad
,
Tensor
*
col
)
{
auto
lod_level_0
=
in
.
lod
()[
0
];
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kOCF
,
Place
,
float
>
im2col_ocf
;
std
::
vector
<
int
>
dilation
({
1
,
1
});
std
::
vector
<
int
>
padding
({
up_pad
,
0
,
down_pad
,
0
});
std
::
vector
<
int
>
stride
({
context_stride
,
1
});
int
input_row_begin
,
input_row_end
;
int
sequence_height
,
sequence_width
;
sequence_width
=
in
.
dims
()[
1
];
...
...
@@ -105,7 +110,7 @@ class ContextProjectFunctor {
:
static_cast
<
int
>
(
lod_level_0
[
i
]);
input_row_end
=
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]);
Tensor
out_t
=
col
.
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
Tensor
out_t
=
col
->
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
...
...
@@ -123,16 +128,13 @@ class ContextProjectFunctor {
{
1
,
input_row_end
-
input_row_begin
,
sequence_width
});
// input_channels, input_height, input_width
in_t
.
Resize
(
framework
::
make_ddim
(
input_shape
));
im2col_ocf
(
context
,
in_t
,
out_t
,
/*stride_height*/
context_stride
,
/*stride_width*/
1
,
up_pad
,
down_pad
,
0
,
0
);
im2col_ocf
(
context
,
in_t
,
dilation
,
stride
,
padding
,
&
out_t
);
out_t
.
Resize
({
sequence_height
,
context_length
*
sequence_width
});
}
}
if
(
padding_trainable
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod_level_0
.
size
())
-
1
;
++
i
)
{
Tensor
out_t
=
col
.
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
Tensor
out_t
=
col
->
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
...
...
@@ -196,14 +198,19 @@ class ContextProjectFunctor {
template
<
typename
Place
,
typename
T
>
class
ContextProjectGradFunctor
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
LoDTensor
&
in
,
Tensor
&
padding_data
,
Tensor
&
col
,
bool
padding_trainable
,
int
context_start
,
int
context_length
,
int
context_stride
,
int
up_pad
,
int
down_pad
,
bool
input_grad
,
bool
pad_grad
)
{
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
LoDTensor
&
in
,
bool
padding_trainable
,
const
int
context_start
,
const
int
context_length
,
const
int
context_stride
,
const
int
up_pad
,
const
int
down_pad
,
bool
pad_grad
,
bool
input_grad
,
Tensor
*
padding_data
,
Tensor
*
col
)
{
auto
lod_level_0
=
in
.
lod
()[
0
];
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kOCF
,
Place
,
float
>
col2im_ocf
;
std
::
vector
<
int
>
dilation
({
1
,
1
});
std
::
vector
<
int
>
padding
({
up_pad
,
0
,
down_pad
,
0
});
std
::
vector
<
int
>
stride
({
context_stride
,
1
});
int
input_row_begin
,
input_row_end
;
int
sequence_height
,
sequence_width
;
sequence_width
=
in
.
dims
()[
1
];
...
...
@@ -215,7 +222,7 @@ class ContextProjectGradFunctor {
:
static_cast
<
int
>
(
lod_level_0
[
i
]);
input_row_end
=
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]);
Tensor
out_t
=
col
.
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
Tensor
out_t
=
col
->
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
...
...
@@ -234,9 +241,7 @@ class ContextProjectGradFunctor {
sequence_width
});
// input_channels, input_height, input_width
in_t
.
Resize
(
framework
::
make_ddim
(
input_shape
));
col2im_ocf
(
context
,
in_t
,
out_t
,
/*stride_height*/
context_stride
,
/*stride_width*/
1
,
up_pad
,
down_pad
,
0
,
0
);
col2im_ocf
(
context
,
out_t
,
dilation
,
stride
,
padding
,
&
in_t
);
out_t
.
Resize
({
sequence_height
,
context_length
*
sequence_width
});
}
}
...
...
@@ -244,7 +249,7 @@ class ContextProjectGradFunctor {
if
(
pad_grad
)
{
if
(
padding_trainable
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod_level_0
.
size
())
-
1
;
++
i
)
{
Tensor
out_t
=
col
.
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
Tensor
out_t
=
col
->
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
...
...
@@ -259,7 +264,7 @@ class ContextProjectGradFunctor {
k
+
context_length
<
up_pad
?
context_length
:
up_pad
-
k
;
Tensor
out_t_sub
=
out_t
.
Slice
(
k
*
context_length
,
k
*
context_length
+
padding_size
);
Tensor
w_sub
=
padding_data
.
Slice
(
k
,
k
+
padding_size
);
Tensor
w_sub
=
padding_data
->
Slice
(
k
,
k
+
padding_size
);
auto
out_t_sub_e
=
EigenMatrix
<
T
>::
From
(
out_t_sub
);
auto
w_sub_e
=
EigenMatrix
<
T
>::
From
(
w_sub
);
w_sub_e
.
device
(
*
context
.
GetEigenDevice
<
Place
>
())
=
...
...
@@ -292,7 +297,7 @@ class ContextProjectGradFunctor {
Tensor
out_t_sub
=
out_t
.
Slice
(
(
down_pad_begin_row
+
t
)
*
context_length
-
padding_size
,
(
down_pad_begin_row
+
t
)
*
context_length
);
Tensor
w_sub
=
padding_data
.
Slice
(
Tensor
w_sub
=
padding_data
->
Slice
(
up_pad
+
padding_idx
,
up_pad
+
padding_idx
+
padding_size
);
auto
out_t_sub_e
=
EigenMatrix
<
T
>::
From
(
out_t_sub
);
auto
w_sub_e
=
EigenMatrix
<
T
>::
From
(
w_sub
);
...
...
paddle/operators/math/im2col.cc
浏览文件 @
4fc9f55e
...
...
@@ -28,57 +28,55 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
5
);
int
i
nput
_channels
=
im
.
dims
()[
0
];
int
i
nput
_height
=
im
.
dims
()[
1
];
int
i
nput
_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
1
];
int
filter_width
=
col
.
dims
()[
2
];
int
output_height
=
col
.
dims
()[
3
];
int
output_width
=
col
.
dims
()[
4
];
int
i
m
_channels
=
im
.
dims
()[
0
];
int
i
m
_height
=
im
.
dims
()[
1
];
int
i
m
_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
->
dims
()[
1
];
int
filter_width
=
col
->
dims
()[
2
];
int
col_height
=
col
->
dims
()[
3
];
int
col_width
=
col
->
dims
()[
4
];
PADDLE_ENFORCE_EQ
(
(
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
stride_height
+
PADDLE_ENFORCE_EQ
(
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
((
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
)
/
stride
[
0
]
+
1
,
output
_height
,
col
_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
(
(
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
stride_width
+
PADDLE_ENFORCE_EQ
(
(
im_width
+
padding
[
1
]
+
padding
[
3
]
-
((
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
)
/
stride
[
1
]
+
1
,
output
_width
,
"output_width and padding(padding_left, padding_right
) are "
col
_width
,
"Output_height and padding(padding_up, padding_down
) are "
"inconsistent."
);
int
channels_col
=
i
nput
_channels
*
filter_height
*
filter_width
;
int
channels_col
=
i
m
_channels
*
filter_height
*
filter_width
;
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
int
c_im
=
c
/
filter_width
/
filter_height
;
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
int
im_row_idx
=
h
*
stride_height
+
h_offset
-
padding_up
;
int
im_col_idx
=
w
*
stride_width
+
w_offset
-
padding_left
;
for
(
int
h
=
0
;
h
<
col_height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
col_width
;
++
w
)
{
int
im_row_idx
=
h
*
stride
[
0
]
-
padding
[
0
]
+
h_offset
*
dilation
[
0
];
int
im_col_idx
=
w
*
stride
[
1
]
-
padding
[
1
]
+
w_offset
*
dilation
[
1
];
int
col_idx
=
(
c
*
col_height
+
h
)
*
col_width
+
w
;
int
im_idx
=
(
im_row_idx
+
c_im
*
im_height
)
*
im_width
+
im_col_idx
;
if
(
im_row_idx
<
0
||
im_row_idx
>=
input_height
||
im_col_idx
<
0
||
im_col_idx
>=
input_width
)
{
col_data
[(
c
*
output_height
+
h
)
*
output_width
+
w
]
=
T
(
0
);
}
else
{
im_row_idx
+=
c_im
*
input_height
;
col_data
[(
c
*
output_height
+
h
)
*
output_width
+
w
]
=
im_data
[
im_row_idx
*
input_width
+
im_col_idx
];
}
col_data
[
col_idx
]
=
(
im_row_idx
<
0
||
im_row_idx
>=
im_height
||
im_col_idx
<
0
||
im_col_idx
>=
im_width
)
?
static_cast
<
T
>
(
0
)
:
im_data
[
im_idx
];
}
}
}
...
...
@@ -94,54 +92,55 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
col
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
PADDLE_ENFORCE
(
im
->
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
i
nput_channels
=
im
.
dims
()[
0
];
int
i
nput_height
=
im
.
dims
()[
1
];
int
i
nput_width
=
im
.
dims
()[
2
];
int
i
m_channels
=
im
->
dims
()[
0
];
int
i
m_height
=
im
->
dims
()[
1
];
int
i
m_width
=
im
->
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
1
];
int
filter_width
=
col
.
dims
()[
2
];
int
output
_height
=
col
.
dims
()[
3
];
int
output
_width
=
col
.
dims
()[
4
];
int
col
_height
=
col
.
dims
()[
3
];
int
col
_width
=
col
.
dims
()[
4
];
PADDLE_ENFORCE_EQ
(
(
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
stride_height
+
PADDLE_ENFORCE_EQ
(
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
((
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
)
/
stride
[
0
]
+
1
,
output
_height
,
col
_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
(
(
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
stride_width
+
PADDLE_ENFORCE_EQ
(
(
im_width
+
padding
[
1
]
+
padding
[
3
]
-
((
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
)
/
stride
[
1
]
+
1
,
output
_width
,
"output_width and padding(padding_left, padding_right
) are "
col
_width
,
"Output_height and padding(padding_up, padding_down
) are "
"inconsistent."
);
int
channels_col
=
i
nput
_channels
*
filter_height
*
filter_width
;
int
channels_col
=
i
m
_channels
*
filter_height
*
filter_width
;
T
*
im_data
=
im
.
data
<
T
>
();
T
*
im_data
=
im
->
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
int
c_im
=
c
/
filter_width
/
filter_height
;
for
(
int
h
=
0
;
h
<
output
_height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
output
_width
;
++
w
)
{
int
im_row_idx
=
h
*
stride
_height
+
h_offset
-
padding_up
;
int
im_col_idx
=
w
*
stride
_width
+
w_offset
-
padding_left
;
for
(
int
h
=
0
;
h
<
col
_height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
col
_width
;
++
w
)
{
int
im_row_idx
=
h
*
stride
[
0
]
-
padding
[
0
]
+
h_offset
*
dilation
[
0
]
;
int
im_col_idx
=
w
*
stride
[
1
]
-
padding
[
1
]
+
w_offset
*
dilation
[
1
]
;
if
((
im_row_idx
)
>=
0
&&
(
im_row_idx
)
<
i
nput
_height
&&
(
im_col_idx
)
>=
0
&&
(
im_col_idx
)
<
i
nput
_width
)
{
im_row_idx
+=
c_im
*
i
nput
_height
;
im_data
[
im_row_idx
*
i
nput
_width
+
im_col_idx
]
+=
col_data
[(
c
*
output_height
+
h
)
*
output
_width
+
w
];
if
((
im_row_idx
)
>=
0
&&
(
im_row_idx
)
<
i
m
_height
&&
(
im_col_idx
)
>=
0
&&
(
im_col_idx
)
<
i
m
_width
)
{
im_row_idx
+=
c_im
*
i
m
_height
;
im_data
[
im_row_idx
*
i
m
_width
+
im_col_idx
]
+=
col_data
[(
c
*
col_height
+
h
)
*
col
_width
+
w
];
}
}
}
...
...
@@ -168,64 +167,59 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
i
nput
_channels
=
im
.
dims
()[
0
];
int
i
nput
_height
=
im
.
dims
()[
1
];
int
i
nput
_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
3
];
int
filter_width
=
col
.
dims
()[
4
];
int
output_height
=
col
.
dims
()[
0
];
int
output_width
=
col
.
dims
()[
1
];
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
5
);
int
i
m
_channels
=
im
.
dims
()[
0
];
int
i
m
_height
=
im
.
dims
()[
1
];
int
i
m
_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
->
dims
()[
3
];
int
filter_width
=
col
->
dims
()[
4
];
int
col_height
=
col
->
dims
()[
0
];
int
col_width
=
col
->
dims
()[
1
];
PADDLE_ENFORCE_EQ
(
(
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
stride_height
+
1
,
output_height
,
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
filter_height
)
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
(
(
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
stride_width
+
1
,
output_width
,
"output_width and padding(padding_left, padding_right) are "
(
im_width
+
padding
[
1
]
+
padding
[
3
]
-
filter_width
)
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
for
(
int
col_row_idx
=
0
;
col_row_idx
<
output
_height
;
++
col_row_idx
)
{
for
(
int
col_col_idx
=
0
;
col_col_idx
<
output
_width
;
++
col_col_idx
)
{
for
(
int
channel
=
0
;
channel
<
i
nput
_channels
;
++
channel
)
{
for
(
int
col_row_idx
=
0
;
col_row_idx
<
col
_height
;
++
col_row_idx
)
{
for
(
int
col_col_idx
=
0
;
col_col_idx
<
col
_width
;
++
col_col_idx
)
{
for
(
int
channel
=
0
;
channel
<
i
m
_channels
;
++
channel
)
{
for
(
int
filter_row_idx
=
0
;
filter_row_idx
<
filter_height
;
++
filter_row_idx
)
{
for
(
int
filter_col_idx
=
0
;
filter_col_idx
<
filter_width
;
++
filter_col_idx
)
{
int
im_row_offset
=
col_row_idx
*
stride
_height
+
filter_row_idx
-
padding_up
;
col_row_idx
*
stride
[
0
]
+
filter_row_idx
-
padding
[
0
]
;
int
im_col_offset
=
col_col_idx
*
stride
_width
+
filter_col_idx
-
padding_left
;
int
col_offset
=
((((
col_row_idx
)
*
output_width
+
col_col_idx
)
*
input
_channels
+
col_col_idx
*
stride
[
1
]
+
filter_col_idx
-
padding
[
1
]
;
int
col_offset
=
((((
col_row_idx
)
*
col_width
+
col_col_idx
)
*
im
_channels
+
channel
)
*
filter_height
+
filter_row_idx
)
*
filter_width
+
filter_col_idx
;
if
(
im_row_offset
<
0
||
im_row_offset
>=
input_height
||
im_col_offset
<
0
||
im_col_offset
>=
input_width
)
{
col_data
[
col_offset
]
=
T
(
0
);
}
else
{
int
im_offset
=
(
channel
*
input_height
+
im_row_offset
)
*
input_width
+
int
im_offset
=
(
channel
*
im_height
+
im_row_offset
)
*
im_width
+
im_col_offset
;
col_data
[
col_offset
]
=
im_data
[
im_offset
];
}
col_data
[
col_offset
]
=
(
im_row_offset
<
0
||
im_row_offset
>=
im_height
||
im_col_offset
<
0
||
im_col_offset
>=
im_width
)
?
static_cast
<
T
>
(
0
)
:
im_data
[
im_offset
];
}
}
}
...
...
@@ -243,60 +237,57 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
col
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
PADDLE_ENFORCE
(
im
->
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
i
nput_channels
=
im
.
dims
()[
0
];
int
i
nput_height
=
im
.
dims
()[
1
];
int
i
nput_width
=
im
.
dims
()[
2
];
int
i
m_channels
=
im
->
dims
()[
0
];
int
i
m_height
=
im
->
dims
()[
1
];
int
i
m_width
=
im
->
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
3
];
int
filter_width
=
col
.
dims
()[
4
];
int
output
_height
=
col
.
dims
()[
0
];
int
output
_width
=
col
.
dims
()[
1
];
int
col
_height
=
col
.
dims
()[
0
];
int
col
_width
=
col
.
dims
()[
1
];
PADDLE_ENFORCE_EQ
(
(
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
stride_height
+
1
,
output_height
,
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
filter_height
)
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
(
(
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
stride_width
+
1
,
output_width
,
"output_width and padding(padding_left, padding_right) are "
(
im_width
+
padding
[
1
]
+
padding
[
3
]
-
filter_width
)
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
T
*
im_data
=
im
.
data
<
T
>
();
T
*
im_data
=
im
->
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
col_row_idx
=
0
;
col_row_idx
<
output
_height
;
++
col_row_idx
)
{
for
(
int
col_col_idx
=
0
;
col_col_idx
<
output
_width
;
++
col_col_idx
)
{
for
(
int
channel
=
0
;
channel
<
i
nput
_channels
;
++
channel
)
{
for
(
int
col_row_idx
=
0
;
col_row_idx
<
col
_height
;
++
col_row_idx
)
{
for
(
int
col_col_idx
=
0
;
col_col_idx
<
col
_width
;
++
col_col_idx
)
{
for
(
int
channel
=
0
;
channel
<
i
m
_channels
;
++
channel
)
{
for
(
int
filter_row_idx
=
0
;
filter_row_idx
<
filter_height
;
++
filter_row_idx
)
{
for
(
int
filter_col_idx
=
0
;
filter_col_idx
<
filter_width
;
++
filter_col_idx
)
{
int
im_row_offset
=
col_row_idx
*
stride
_height
+
filter_row_idx
-
padding_up
;
col_row_idx
*
stride
[
0
]
+
filter_row_idx
-
padding
[
0
]
;
int
im_col_offset
=
col_col_idx
*
stride
_width
+
filter_col_idx
-
padding_left
;
int
col_offset
=
(((
col_row_idx
*
output_width
+
col_col_idx
)
*
input
_channels
+
col_col_idx
*
stride
[
1
]
+
filter_col_idx
-
padding
[
1
]
;
int
col_offset
=
(((
col_row_idx
*
col_width
+
col_col_idx
)
*
im
_channels
+
channel
)
*
filter_height
+
filter_row_idx
)
*
filter_width
+
filter_col_idx
;
if
(
im_row_offset
>=
0
&&
im_row_offset
<
i
nput
_height
&&
im_col_offset
>=
0
&&
im_col_offset
<
i
nput
_width
)
{
if
(
im_row_offset
>=
0
&&
im_row_offset
<
i
m
_height
&&
im_col_offset
>=
0
&&
im_col_offset
<
i
m
_width
)
{
int
im_offset
=
(
channel
*
i
nput_height
+
im_row_offset
)
*
input
_width
+
(
channel
*
i
m_height
+
im_row_offset
)
*
im
_width
+
im_col_offset
;
im_data
[
im_offset
]
+=
col_data
[
col_offset
];
}
...
...
paddle/operators/math/im2col.cu
浏览文件 @
4fc9f55e
...
...
@@ -20,36 +20,32 @@ namespace operators {
namespace
math
{
template
<
class
T
>
__global__
void
im2col
(
const
T
*
data_im
,
int
num_outs
,
int
height
,
int
width
,
__global__
void
im2col
(
const
T
*
data_im
,
int
num_outs
,
int
im_height
,
int
im_width
,
int
dilation_h
,
int
dilation_w
,
int
filter_height
,
int
filter_width
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
int
output_height
,
int
output_width
,
T
*
data_col
)
{
int
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
int
col_height
,
int
col_width
,
T
*
data_col
)
{
const
int
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
num_outs
)
{
int
w_out
=
index
%
output_width
;
index
/=
output_width
;
int
h_out
=
index
%
output_height
;
int
channel_in
=
index
/
output_height
;
int
w_out
=
index
%
col_width
;
int
h_out
=
(
index
/
col_width
)
%
col_height
;
int
channel_in
=
index
/
col_width
/
col_height
;
int
channel_out
=
channel_in
*
filter_height
*
filter_width
;
int
h_in
=
h_out
*
stride_height
;
int
w_in
=
w_out
*
stride_width
;
int
h_in
=
h_out
*
stride_height
-
padding_height
;
int
w_in
=
w_out
*
stride_width
-
padding_width
;
data_col
+=
(
channel_out
*
output_height
+
h_out
)
*
output_width
+
w_out
;
data_col
+=
(
channel_out
*
col_height
+
h_out
)
*
col_width
+
w_out
;
data_im
+=
(
channel_in
*
im_height
+
h_in
)
*
im_width
+
w_in
;
for
(
int
i
=
0
;
i
<
filter_height
;
++
i
)
{
for
(
int
j
=
0
;
j
<
filter_width
;
++
j
)
{
int
rIdx
=
int
(
h_in
+
i
);
int
cIdx
=
int
(
w_in
+
j
);
if
((
rIdx
-
(
int
)
padding_height
)
>=
(
int
)
height
||
(
rIdx
-
(
int
)
padding_height
)
<
0
||
(
cIdx
-
(
int
)
padding_width
)
>=
(
int
)
width
||
(
cIdx
-
(
int
)
padding_width
)
<
0
)
{
*
data_col
=
0
;
}
else
{
rIdx
=
rIdx
+
channel_in
*
height
-
padding_height
;
cIdx
=
cIdx
-
padding_width
;
*
data_col
=
data_im
[
rIdx
*
width
+
cIdx
];
}
data_col
+=
output_height
*
output_width
;
int
rIdx
=
h_in
+
i
*
dilation_h
;
int
cIdx
=
w_in
+
j
*
dilation_w
;
*
data_col
=
(
rIdx
>=
im_height
||
rIdx
<
0
||
cIdx
>=
im_width
||
cIdx
<
0
)
?
0
:
data_im
[
i
*
dilation_h
*
im_width
+
j
*
dilation_w
];
data_col
+=
col_height
*
col_width
;
}
}
}
...
...
@@ -65,30 +61,36 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
input_channels
=
im
.
dims
()[
0
];
int
input_height
=
im
.
dims
()[
1
];
int
input_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
1
];
int
filter_width
=
col
.
dims
()[
2
];
int
output_height
=
col
.
dims
()[
3
];
int
output_width
=
col
.
dims
()[
4
];
PADDLE_ENFORCE
((
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
stride_height
+
1
==
output_height
);
PADDLE_ENFORCE
((
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
stride_width
+
1
==
output_width
);
int
num_outputs
=
input_channels
*
output_height
*
output_width
;
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
5
);
int
im_channels
=
im
.
dims
()[
0
];
int
im_height
=
im
.
dims
()[
1
];
int
im_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
->
dims
()[
1
];
int
filter_width
=
col
->
dims
()[
2
];
int
col_height
=
col
->
dims
()[
3
];
int
col_width
=
col
->
dims
()[
4
];
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
int
num_outputs
=
im_channels
*
col_height
*
col_width
;
int
blocks
=
(
num_outputs
+
1024
-
1
)
/
1024
;
int
block_x
=
512
;
int
block_y
=
(
blocks
+
512
-
1
)
/
512
;
...
...
@@ -97,56 +99,57 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
im2col
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
im
.
data
<
T
>
(),
num_outputs
,
i
nput_height
,
input_width
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_up
,
padding_left
,
output_height
,
output_width
,
col
.
data
<
T
>
());
im
.
data
<
T
>
(),
num_outputs
,
i
m_height
,
im_width
,
dilation
[
0
]
,
dilation
[
1
],
filter_height
,
filter_width
,
stride
[
0
],
stride
[
1
]
,
padding
[
0
],
padding
[
1
],
col_height
,
col_width
,
col
->
data
<
T
>
());
}
};
template
<
class
T
>
__global__
void
col2im
(
size_t
n
,
const
T
*
data_col
,
size_t
height
,
size_t
width
,
size_t
channels
,
size_t
filter_height
,
size_t
filter_width
,
size_t
stride_height
,
size_t
stride_width
,
size_t
padding_height
,
size_t
padding_width
,
size_t
output_height
,
size_t
output_width
,
T
*
data_im
)
{
size_t
index
=
__global__
void
col2im
(
int
n
,
const
T
*
data_col
,
int
im_height
,
int
im_width
,
int
dilation_h
,
int
dilation_w
,
int
filter_height
,
int
filter_width
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
int
col_height
,
int
col_width
,
T
*
data_im
)
{
const
int
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
const
int
d_filter_height
=
dilation_h
*
(
filter_height
-
1
)
+
1
;
const
int
d_filter_width
=
dilation_w
*
(
filter_width
-
1
)
+
1
;
if
(
index
<
n
)
{
T
val
=
0
;
int
w
=
int
(
index
%
width
);
int
h
=
int
((
index
/
width
)
%
height
);
int
c
=
int
(
index
/
(
width
*
height
));
if
((
w
-
(
int
)
padding_width
)
>=
0
&&
(
w
-
(
int
)
padding_width
)
<
(
width
-
2
*
padding_width
)
&&
(
h
-
(
int
)
padding_height
)
>=
0
&&
(
h
-
padding_height
)
<
(
height
-
2
*
padding_height
))
{
int
w
=
index
%
im_width
;
int
h
=
(
index
/
im_width
)
%
im_height
;
int
c
=
index
/
(
im_width
*
im_height
);
// compute the start and end of the output
int
w_col_start
=
(
w
<
(
int
)
filter_width
)
?
0
:
(
w
-
int
(
filter_width
))
/
(
int
)
stride_width
+
1
;
int
w_col_end
=
min
((
int
)(
w
/
(
int
)
stride_width
+
1
),
(
int
)(
output_width
));
int
h_col_start
=
(
h
<
(
int
)
filter_height
)
?
0
:
(
h
-
(
int
)
filter_height
)
/
(
int
)
stride_height
+
1
;
int
h_col_end
=
min
(
int
(
h
/
stride_height
+
1
),
int
(
output_height
));
int
w_col_start
=
(
w
<
d_filter_width
)
?
0
:
(
w
-
d_filter_width
)
/
stride_width
+
1
;
int
w_col_end
=
min
(
w
/
stride_width
+
1
,
col_width
);
int
h_col_start
=
(
h
<
d_filter_height
)
?
0
:
(
h
-
d_filter_height
)
/
stride_height
+
1
;
int
h_col_end
=
min
(
h
/
stride_height
+
1
,
col_height
);
for
(
int
h_col
=
h_col_start
;
h_col
<
h_col_end
;
++
h_col
)
{
for
(
int
w_col
=
w_col_start
;
w_col
<
w_col_end
;
++
w_col
)
{
// the col location: [c * width * height + h_out, w_out]
int
c_col
=
int
(
c
*
filter_height
*
filter_width
)
+
(
h
-
h_col
*
(
int
)
stride_height
)
*
(
int
)
filter_width
+
(
w
-
w_col
*
(
int
)
stride_width
);
val
+=
data_col
[(
c_col
*
output_height
+
h_col
)
*
output_width
+
w_col
];
int
h_off
=
(
h
-
h_col
*
stride_height
);
int
w_off
=
(
w
-
w_col
*
stride_width
);
if
(
h_off
%
dilation_h
==
0
&&
w_off
%
dilation_w
==
0
)
{
h_off
/=
dilation_h
;
w_off
/=
dilation_w
;
int
data_col_index
=
(((
c
*
filter_height
+
h_off
)
*
filter_width
+
w_off
)
*
col_height
+
h_col
)
*
col_width
+
w_col
;
val
+=
data_col
[
data_col_index
];
}
}
h
-=
padding_height
;
w
-=
padding_width
;
data_im
[
c
*
((
width
-
2
*
padding_width
)
*
(
height
-
2
*
padding_height
))
+
h
*
(
width
-
2
*
padding_width
)
+
w
]
+=
val
;
}
data_im
[
index
]
=
val
;
}
}
...
...
@@ -159,33 +162,38 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
col
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
PADDLE_ENFORCE
(
im
->
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
i
nput_channels
=
im
.
dims
()[
0
];
int
i
nput_height
=
im
.
dims
()[
1
];
int
i
nput_width
=
im
.
dims
()[
2
];
int
i
m_channels
=
im
->
dims
()[
0
];
int
i
m_height
=
im
->
dims
()[
1
];
int
i
m_width
=
im
->
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
1
];
int
filter_width
=
col
.
dims
()[
2
];
int
output_height
=
col
.
dims
()[
3
];
int
output_width
=
col
.
dims
()[
4
];
PADDLE_ENFORCE
((
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
stride_height
+
1
==
output_height
);
PADDLE_ENFORCE
((
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
stride_width
+
1
==
output_width
);
size_t
num_kernels
=
input_channels
*
(
input_height
+
padding_up
+
padding_down
)
*
(
input_width
+
padding_left
+
padding_right
);
int
col_height
=
col
.
dims
()[
3
];
int
col_width
=
col
.
dims
()[
4
];
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
size_t
num_kernels
=
im_channels
*
im_height
*
im_width
;
size_t
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
size_t
block_x
=
512
;
...
...
@@ -198,10 +206,9 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
col2im
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
num_kernels
,
col
.
data
<
T
>
(),
input_height
+
padding_up
+
padding_down
,
input_width
+
padding_left
+
padding_left
,
input_channels
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_up
,
padding_left
,
output_height
,
output_width
,
im
.
data
<
T
>
());
num_kernels
,
col
.
data
<
T
>
(),
im_height
,
im_width
,
dilation
[
0
],
dilation
[
1
],
filter_height
,
filter_width
,
stride
[
0
],
stride
[
1
],
padding
[
0
],
padding
[
2
],
col_height
,
col_width
,
im
->
data
<
T
>
());
}
};
...
...
@@ -215,33 +222,32 @@ template class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
platform
::
GPUPlace
,
double
>;
template
<
class
T
>
__global__
void
im2colOCF
(
const
T
*
im_data
,
T
*
col_data
,
int
input_channels
,
int
i
nput_height
,
int
input_width
,
int
filter_height
,
int
filter_width
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
int
output_height
,
int
output_width
)
{
__global__
void
im2colOCF
(
const
T
*
im_data
,
int
im_channels
,
int
im_height
,
int
i
m_width
,
int
filter_height
,
int
filter_width
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
int
col_height
,
int
col_width
,
T
*
col_data
)
{
int
swid
=
blockIdx
.
x
;
int
shid
=
blockIdx
.
y
;
for
(
int
channelid
=
threadIdx
.
z
;
channelid
<
i
nput
_channels
;
for
(
int
channelid
=
threadIdx
.
z
;
channelid
<
i
m
_channels
;
channelid
+=
blockDim
.
z
)
{
for
(
int
idy
=
threadIdx
.
y
;
idy
<
filter_height
;
idy
+=
blockDim
.
y
)
{
for
(
int
idx
=
threadIdx
.
x
;
idx
<
filter_width
;
idx
+=
blockDim
.
x
)
{
int
width_offset
=
idx
+
swid
*
stride_width
-
padding_width
;
int
height_offset
=
idy
+
shid
*
stride_height
-
padding_height
;
int
im_offset
=
width_offset
+
height_offset
*
i
nput
_width
+
channelid
*
i
nput_height
*
input
_width
;
int
im_offset
=
width_offset
+
height_offset
*
i
m
_width
+
channelid
*
i
m_height
*
im
_width
;
int
col_offset
=
idx
+
idy
*
filter_width
+
channelid
*
filter_height
*
filter_width
+
(
shid
*
output_width
+
swid
)
*
(
input_channels
*
filter_height
*
filter_width
);
if
(
height_offset
>=
input_height
||
height_offset
<
0
||
width_offset
>=
input_width
||
width_offset
<
0
)
{
col_data
[
col_offset
]
=
T
(
0
);
}
else
{
col_data
[
col_offset
]
=
im_data
[
im_offset
];
}
(
shid
*
col_width
+
swid
)
*
(
im_channels
*
filter_height
*
filter_width
);
col_data
[
col_offset
]
=
(
height_offset
>=
im_height
||
height_offset
<
0
||
width_offset
>=
im_width
||
width_offset
<
0
)
?
T
(
0
)
:
im_data
[
im_offset
];
}
}
}
...
...
@@ -257,27 +263,33 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
input_channels
=
im
.
dims
()[
0
];
int
input_height
=
im
.
dims
()[
1
];
int
input_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
3
];
int
filter_width
=
col
.
dims
()[
4
];
int
output_height
=
col
.
dims
()[
0
];
int
output_width
=
col
.
dims
()[
1
];
PADDLE_ENFORCE
((
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
stride_height
+
1
==
output_height
);
PADDLE_ENFORCE
((
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
stride_width
+
1
==
output_width
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
5
);
int
im_channels
=
im
.
dims
()[
0
];
int
im_height
=
im
.
dims
()[
1
];
int
im_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
->
dims
()[
3
];
int
filter_width
=
col
->
dims
()[
4
];
int
col_height
=
col
->
dims
()[
0
];
int
col_width
=
col
->
dims
()[
1
];
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
int
block_dim_x
=
0
;
int
block_dim_y
=
0
;
...
...
@@ -296,42 +308,41 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
}
int
block_dim_z
=
1024
/
block_dim_x
/
block_dim_y
;
dim3
threads
(
block_dim_x
,
block_dim_y
,
std
::
min
(
block_dim_z
,
input_channels
));
dim3
grid
(
output_width
,
output_height
);
dim3
threads
(
block_dim_x
,
block_dim_y
,
std
::
min
(
block_dim_z
,
im_channels
));
dim3
grid
(
col_width
,
col_height
);
im2colOCF
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
im
.
data
<
T
>
(),
col
.
data
<
T
>
(),
input_channels
,
input_height
,
input_width
,
filter_
height
,
filter_width
,
stride_height
,
stride_width
,
padding_up
,
padding_left
,
output_height
,
output_width
);
im
.
data
<
T
>
(),
im_channels
,
im_height
,
im_width
,
filter_height
,
filter_
width
,
stride
[
0
],
stride
[
1
],
padding
[
0
],
padding
[
1
],
col_height
,
col_width
,
col
->
data
<
T
>
()
);
}
};
template
<
class
T
>
__global__
void
col2imOCF
(
T
*
im_data
,
const
T
*
col_data
,
int
input_channels
,
int
i
nput_height
,
int
input_width
,
int
filter_height
,
int
filter_width
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
int
output_height
,
int
output_width
)
{
__global__
void
col2imOCF
(
const
T
*
col_data
,
int
im_channels
,
int
im_height
,
int
i
m_width
,
int
filter_height
,
int
filter_width
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
int
col_height
,
int
col_width
,
T
*
im_data
)
{
int
swid
=
blockIdx
.
x
;
int
shid
=
blockIdx
.
y
;
for
(
int
channelid
=
threadIdx
.
z
;
channelid
<
i
nput
_channels
;
for
(
int
channelid
=
threadIdx
.
z
;
channelid
<
i
m
_channels
;
channelid
+=
blockDim
.
z
)
{
for
(
int
idy
=
threadIdx
.
y
;
idy
<
filter_height
;
idy
+=
blockDim
.
y
)
{
for
(
int
idx
=
threadIdx
.
x
;
idx
<
filter_width
;
idx
+=
blockDim
.
x
)
{
int
width_offset
=
idx
+
swid
*
stride_width
-
padding_width
;
int
height_offset
=
idy
+
shid
*
stride_height
-
padding_height
;
int
im_offset
=
width_offset
+
height_offset
*
i
nput
_width
+
channelid
*
i
nput_height
*
input
_width
;
int
im_offset
=
width_offset
+
height_offset
*
i
m
_width
+
channelid
*
i
m_height
*
im
_width
;
int
col_offset
=
idx
+
idy
*
filter_width
+
channelid
*
filter_height
*
filter_width
+
(
shid
*
output
_width
+
swid
)
*
(
i
nput
_channels
*
filter_height
*
filter_width
);
(
shid
*
col
_width
+
swid
)
*
(
i
m
_channels
*
filter_height
*
filter_width
);
if
(
height_offset
>=
0
&&
height_offset
<
i
nput
_height
&&
width_offset
>=
0
&&
width_offset
<
i
nput
_width
)
{
if
(
height_offset
>=
0
&&
height_offset
<
i
m
_height
&&
width_offset
>=
0
&&
width_offset
<
i
m
_width
)
{
paddle
::
platform
::
CudaAtomicAdd
(
im_data
+
im_offset
,
col_data
[
col_offset
]);
}
...
...
@@ -349,28 +360,35 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
col
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
PADDLE_ENFORCE
(
im
->
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
i
nput_channels
=
im
.
dims
()[
0
];
int
i
nput_height
=
im
.
dims
()[
1
];
int
i
nput_width
=
im
.
dims
()[
2
];
int
i
m_channels
=
im
->
dims
()[
0
];
int
i
m_height
=
im
->
dims
()[
1
];
int
i
m_width
=
im
->
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
3
];
int
filter_width
=
col
.
dims
()[
4
];
int
output_height
=
col
.
dims
()[
0
];
int
output_width
=
col
.
dims
()[
1
];
PADDLE_ENFORCE
((
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
stride_height
+
1
==
output_height
);
PADDLE_ENFORCE
((
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
stride_width
+
1
==
output_width
);
int
col_height
=
col
.
dims
()[
0
];
int
col_width
=
col
.
dims
()[
1
];
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
int
block_dim_x
=
0
;
int
block_dim_y
=
0
;
...
...
@@ -389,15 +407,14 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
}
int
block_dim_z
=
1024
/
block_dim_x
/
block_dim_y
;
dim3
threads
(
block_dim_x
,
block_dim_y
,
std
::
min
(
block_dim_z
,
input_channels
));
dim3
grid
(
output_width
,
output_height
);
dim3
threads
(
block_dim_x
,
block_dim_y
,
std
::
min
(
block_dim_z
,
im_channels
));
dim3
grid
(
col_width
,
col_height
);
col2imOCF
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
im
.
data
<
T
>
(),
col
.
data
<
T
>
(),
input_channels
,
input_height
,
input_width
,
filter_
height
,
filter_width
,
stride_height
,
stride_width
,
padding_up
,
padding_left
,
output_height
,
output_width
);
col
.
data
<
T
>
(),
im_channels
,
im_height
,
im_width
,
filter_height
,
filter_
width
,
stride
[
0
],
stride
[
1
],
padding
[
0
],
padding
[
1
],
col_height
,
col_width
,
im
->
data
<
T
>
()
);
}
};
...
...
paddle/operators/math/im2col.h
浏览文件 @
4fc9f55e
...
...
@@ -35,6 +35,15 @@ enum class ColFormat { kCFO = 0, kOCF = 1 };
* \param colData Column data.
* \param colShape The shape of colData.
*
* \param dilations dilation data.
* \param 2-dimension [dilation_height, dilation_width].
*
* \param strides stride data.
* \param 2-dimension [stride_height, stride_width].
*
* \param paddings padding data.
* \param 4-dimension [up_pad, left_pad, down_pad, right_pad].
*
* If the template argument Format is kCFO, the shape of colData is:
* [input_channels, filter_height, filter_width, output_height, output_width]
* So, it is easy to reshape into a convolution matrix for convolution
...
...
@@ -73,18 +82,19 @@ template <ColFormat Format, typename Place, typename T>
class
Im2ColFunctor
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
int
padding_left
,
int
padding_right
);
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
);
};
template
<
ColFormat
Format
,
typename
Place
,
typename
T
>
class
Col2ImFunctor
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
int
padding_left
,
int
padding_right
);
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
col
,
const
std
::
vector
<
int
>&
dilation
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
);
};
}
// namespace math
...
...
paddle/operators/math/im2col_test.cc
浏览文件 @
4fc9f55e
...
...
@@ -45,10 +45,14 @@ void testIm2col() {
int
input_height
=
2
;
int
input_width
=
3
;
int
filter_size
=
2
;
int
stride
=
1
;
int
padding
=
0
;
int
output_height
=
(
input_height
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
int
output_width
=
(
input_width
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
std
::
vector
<
int
>
stride
({
1
,
1
});
// stride_y, stride_x
std
::
vector
<
int
>
padding
(
{
0
,
0
,
0
,
0
});
// up_pad, left_pad, down_pad, right_pad
std
::
vector
<
int
>
dilation
({
1
,
1
});
// dilation_y, dilation_x
int
output_height
=
(
input_height
-
filter_size
+
padding
[
0
]
+
padding
[
1
])
/
stride
[
0
]
+
1
;
int
output_width
=
(
input_width
-
filter_size
+
padding
[
2
]
+
padding
[
3
])
/
stride
[
1
]
+
1
;
float
*
input_ptr
=
input_tmp
.
mutable_data
<
float
>
(
{
1
,
input_height
,
input_width
},
paddle
::
platform
::
CPUPlace
());
float
arr
[
6
]
=
{
0
,
1
,
2
,
3
,
4
,
5
};
...
...
@@ -85,10 +89,8 @@ void testIm2col() {
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
Place
,
float
>
im2col_ocf
;
im2col
(
*
context
,
input
,
output_cfo
,
stride
,
stride
,
padding
,
padding
,
padding
,
padding
);
im2col_ocf
(
*
context
,
input
,
output_ocf
,
stride
,
stride
,
padding
,
padding
,
padding
,
padding
);
im2col
(
*
context
,
input
,
dilation
,
stride
,
padding
,
&
output_cfo
);
im2col_ocf
(
*
context
,
input
,
dilation
,
stride
,
padding
,
&
output_ocf
);
float
out_cfo_data
[]
=
{
0
,
1
,
1
,
2
,
3
,
4
,
4
,
5
};
float
out_ocf_data
[]
=
{
0
,
1
,
3
,
4
,
1
,
2
,
4
,
5
};
...
...
@@ -131,8 +133,7 @@ void testIm2col() {
input
.
CopyFrom
(
input_tmp
,
*
place
,
*
context
);
}
col2im
(
*
context
,
input
,
output_cfo
,
stride
,
stride
,
padding
,
padding
,
padding
,
padding
);
col2im
(
*
context
,
output_cfo
,
dilation
,
stride
,
padding
,
&
input
);
float
*
in_ptr
;
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
...
...
@@ -153,8 +154,7 @@ void testIm2col() {
input
.
CopyFrom
(
input_tmp
,
*
place
,
*
context
);
}
col2im_ocf
(
*
context
,
input
,
output_ocf
,
stride
,
stride
,
padding
,
padding
,
padding
,
padding
);
col2im_ocf
(
*
context
,
output_ocf
,
dilation
,
stride
,
padding
,
&
input
);
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
in_ptr
=
input
.
data
<
float
>
();
...
...
paddle/operators/math/vol2col.cc
浏览文件 @
4fc9f55e
...
...
@@ -28,28 +28,51 @@ template <class T>
class
Vol2ColFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
vol
,
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
{
const
framework
::
Tensor
&
vol
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
col
)
const
{
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
int
output_depth
=
col
.
dims
()[
4
];
int
output_height
=
col
.
dims
()[
5
];
int
output_width
=
col
.
dims
()[
6
];
int
filter_depth
=
col
->
dims
()[
1
];
int
filter_height
=
col
->
dims
()[
2
];
int
filter_width
=
col
->
dims
()[
3
];
int
output_depth
=
col
->
dims
()[
4
];
int
output_height
=
col
->
dims
()[
5
];
int
output_width
=
col
->
dims
()[
6
];
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
PADDLE_ENFORCE_EQ
((
input_depth
+
2
*
paddings
[
0
]
-
((
dilations
[
0
]
*
(
filter_depth
-
1
)
+
1
)))
/
strides
[
0
]
+
1
,
output_depth
,
"input_depth and output_depth are "
"mismatching."
);
PADDLE_ENFORCE_EQ
((
input_height
+
2
*
paddings
[
1
]
-
((
dilations
[
1
]
*
(
filter_height
-
1
)
+
1
)))
/
strides
[
1
]
+
1
,
output_height
,
"input_height and output_height are "
"mismatching."
);
PADDLE_ENFORCE_EQ
((
input_width
+
2
*
paddings
[
2
]
-
((
dilations
[
2
]
*
(
filter_width
-
1
)
+
1
)))
/
strides
[
2
]
+
1
,
output_width
,
"input_width and output_width are "
"mismatching."
);
const
T
*
vol_data
=
vol
.
data
<
T
>
();
T
*
col_data
=
col
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
...
...
@@ -57,24 +80,23 @@ class Vol2ColFunctor<platform::CPUPlace, T> {
int
d_offset
=
(
c
/
filter_width
/
filter_height
)
%
filter_depth
;
int
c_in
=
c
/
filter_width
/
filter_height
/
filter_depth
;
for
(
int
d
=
0
;
d
<
output_depth
;
++
d
)
{
int
d_pad
=
d
*
stride
_depth
-
padding_depth
+
d_offset
;
int
d_pad
=
d
*
stride
s
[
0
]
-
paddings
[
0
]
+
d_offset
*
dilations
[
0
]
;
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
int
h_pad
=
h
*
stride
_height
-
padding_height
+
h_offset
;
int
h_pad
=
h
*
stride
s
[
1
]
-
paddings
[
1
]
+
h_offset
*
dilations
[
1
]
;
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
int
w_pad
=
w
*
stride
_width
-
padding_width
+
w_offset
;
int
w_pad
=
w
*
stride
s
[
2
]
-
paddings
[
2
]
+
w_offset
*
dilations
[
2
]
;
int
col_idx
=
((
c
*
output_depth
+
d
)
*
output_height
+
h
)
*
output_width
+
w
;
if
(
h_pad
<
0
||
h_pad
>=
input_height
||
w_pad
<
0
||
w_pad
>=
input_width
||
d_pad
<
0
||
d_pad
>=
input_depth
)
{
col_data
[
col_idx
]
=
static_cast
<
T
>
(
0
);
}
else
{
int
vol_idx
=
((
c_in
*
input_depth
+
d_pad
)
*
input_height
+
h_pad
)
*
input_width
+
w_pad
;
col_data
[
col_idx
]
=
vol_data
[
vol_idx
];
}
col_data
[
col_idx
]
=
(
h_pad
<
0
||
h_pad
>=
input_height
||
w_pad
<
0
||
w_pad
>=
input_width
||
d_pad
<
0
||
d_pad
>=
input_depth
)
?
static_cast
<
T
>
(
0
)
:
vol_data
[
vol_idx
];
}
}
}
...
...
@@ -92,17 +114,18 @@ template <class T>
class
Col2VolFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
vol
,
const
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
{
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
const
framework
::
Tensor
&
col
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
vol
)
const
{
PADDLE_ENFORCE
(
vol
->
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
input_channels
=
vol
->
dims
()[
0
];
int
input_depth
=
vol
->
dims
()[
1
];
int
input_height
=
vol
->
dims
()[
2
];
int
input_width
=
vol
->
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
...
...
@@ -112,7 +135,28 @@ class Col2VolFunctor<platform::CPUPlace, T> {
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
T
*
vol_data
=
vol
.
data
<
T
>
();
PADDLE_ENFORCE_EQ
((
input_depth
+
2
*
paddings
[
0
]
-
((
dilations
[
0
]
*
(
filter_depth
-
1
)
+
1
)))
/
strides
[
0
]
+
1
,
output_depth
,
"input_depth and output_depth are "
"mismatching."
);
PADDLE_ENFORCE_EQ
((
input_height
+
2
*
paddings
[
1
]
-
((
dilations
[
1
]
*
(
filter_height
-
1
)
+
1
)))
/
strides
[
1
]
+
1
,
output_height
,
"input_height and output_height are "
"mismatching."
);
PADDLE_ENFORCE_EQ
((
input_width
+
2
*
paddings
[
2
]
-
((
dilations
[
2
]
*
(
filter_width
-
1
)
+
1
)))
/
strides
[
2
]
+
1
,
output_width
,
"input_width and output_width are "
"mismatching."
);
T
*
vol_data
=
vol
->
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
...
...
@@ -121,11 +165,11 @@ class Col2VolFunctor<platform::CPUPlace, T> {
int
d_offset
=
(
c
/
filter_width
/
filter_height
)
%
filter_depth
;
int
cIm
=
c
/
filter_width
/
filter_height
/
filter_depth
;
for
(
int
d
=
0
;
d
<
output_depth
;
++
d
)
{
int
d_pad
=
d
*
stride
_depth
-
padding_depth
+
d_offset
;
int
d_pad
=
d
*
stride
s
[
0
]
-
paddings
[
0
]
+
d_offset
*
dilations
[
0
]
;
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
int
h_pad
=
h
*
stride
_height
-
padding_height
+
h_offset
;
int
h_pad
=
h
*
stride
s
[
1
]
-
paddings
[
1
]
+
h_offset
*
dilations
[
1
]
;
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
int
w_pad
=
w
*
stride
_width
-
padding_width
+
w_offset
;
int
w_pad
=
w
*
stride
s
[
2
]
-
paddings
[
2
]
+
w_offset
*
dilations
[
2
]
;
if
(
h_pad
>=
0
&&
h_pad
<
input_height
&&
w_pad
>=
0
&&
w_pad
<
input_width
&&
d_pad
>=
0
&&
d_pad
<
input_depth
)
{
...
...
@@ -133,6 +177,7 @@ class Col2VolFunctor<platform::CPUPlace, T> {
((
cIm
*
input_depth
+
d_pad
)
*
input_height
+
h_pad
)
*
input_width
+
w_pad
;
int
col_idx
=
((
c
*
output_depth
+
d
)
*
output_height
+
h
)
*
output_width
+
w
;
...
...
paddle/operators/math/vol2col.cu
浏览文件 @
4fc9f55e
...
...
@@ -21,11 +21,12 @@ namespace math {
template
<
class
T
>
__global__
void
vol2col
(
int
num_kernels
,
const
T
*
data_vol
,
int
depth
,
int
height
,
int
width
,
int
filter_depth
,
int
filter_height
,
int
filter_width
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
,
int
output_detph
,
int
output_height
,
int
output_width
,
T
*
data_col
)
{
int
height
,
int
width
,
int
dilation_d
,
int
dilation_h
,
int
dilation_w
,
int
filter_depth
,
int
filter_height
,
int
filter_width
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
,
int
output_detph
,
int
output_height
,
int
output_width
,
T
*
data_col
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
w_out
=
index
%
output_width
;
...
...
@@ -44,12 +45,14 @@ __global__ void vol2col(int num_kernels, const T* data_vol, int depth,
for
(
int
k
=
0
;
k
<
filter_depth
;
++
k
)
{
for
(
int
i
=
0
;
i
<
filter_height
;
++
i
)
{
for
(
int
j
=
0
;
j
<
filter_width
;
++
j
)
{
int
d
=
d_in
+
k
;
int
h
=
h_in
+
i
;
int
w
=
w_in
+
j
;
int
d
=
d_in
+
k
*
dilation_d
;
int
h
=
h_in
+
i
*
dilation_h
;
int
w
=
w_in
+
j
*
dilation_w
;
int
col_idx
=
(
k
*
dilation_d
*
height
+
i
*
dilation_h
)
*
width
+
j
*
dilation_w
;
*
data_col
=
(
d
>=
0
&&
d
<
depth
&&
h
>=
0
&&
h
<
height
&&
w
>=
0
&&
w
<
width
)
?
data_vol
[
(
k
*
height
+
i
)
*
width
+
j
]
?
data_vol
[
col_idx
]
:
0
;
data_col
+=
output_detph
*
output_height
*
output_width
;
}
...
...
@@ -68,23 +71,46 @@ template <class T>
class
Vol2ColFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
vol
,
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
{
const
framework
::
Tensor
&
vol
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
col
)
const
{
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
int
output_depth
=
col
.
dims
()[
4
];
int
output_height
=
col
.
dims
()[
5
];
int
output_width
=
col
.
dims
()[
6
];
int
filter_depth
=
col
->
dims
()[
1
];
int
filter_height
=
col
->
dims
()[
2
];
int
filter_width
=
col
->
dims
()[
3
];
int
output_depth
=
col
->
dims
()[
4
];
int
output_height
=
col
->
dims
()[
5
];
int
output_width
=
col
->
dims
()[
6
];
PADDLE_ENFORCE_EQ
((
input_depth
+
2
*
paddings
[
0
]
-
((
dilations
[
0
]
*
(
filter_depth
-
1
)
+
1
)))
/
strides
[
0
]
+
1
,
output_depth
,
"input_depth and output_depth are "
"Mismatching."
);
PADDLE_ENFORCE_EQ
((
input_height
+
2
*
paddings
[
1
]
-
((
dilations
[
1
]
*
(
filter_height
-
1
)
+
1
)))
/
strides
[
1
]
+
1
,
output_height
,
"input_height and output_height are "
"Mismatching."
);
PADDLE_ENFORCE_EQ
((
input_width
+
2
*
paddings
[
2
]
-
((
dilations
[
2
]
*
(
filter_width
-
1
)
+
1
)))
/
strides
[
2
]
+
1
,
output_width
,
"input_width and output_width are "
"Mismatching."
);
int
num_outputs
=
input_channels
*
output_depth
*
output_height
*
output_width
;
...
...
@@ -95,19 +121,25 @@ class Vol2ColFunctor<platform::GPUPlace, T> {
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
num_outputs
,
vol
.
data
<
T
>
(),
input_depth
,
input_height
,
input_width
,
filter_depth
,
filter_height
,
filter_width
,
stride_depth
,
stride_height
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
output_depth
,
output_height
,
output_width
,
col
.
data
<
T
>
());
dilations
[
0
],
dilations
[
1
],
dilations
[
2
],
filter_depth
,
filter_height
,
filter_width
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
2
],
output_depth
,
output_height
,
output_width
,
col
->
data
<
T
>
());
}
};
template
<
class
T
>
__global__
void
col2vol
(
int
num_kernels
,
const
T
*
data_col
,
int
depth
,
int
height
,
int
width
,
int
filter_depth
,
int
filter_height
,
int
filter_width
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
,
int
output_detph
,
int
output_height
,
int
output_width
,
T
*
data_vol
)
{
int
height
,
int
width
,
int
dilation_d
,
int
dilation_h
,
int
dilation_w
,
int
filter_depth
,
int
filter_height
,
int
filter_width
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
,
int
output_detph
,
int
output_height
,
int
output_width
,
T
*
data_vol
)
{
const
int
d_filter_depth
=
dilation_d
*
(
filter_depth
-
1
)
+
1
;
const
int
d_filter_height
=
dilation_h
*
(
filter_height
-
1
)
+
1
;
const
int
d_filter_width
=
dilation_w
*
(
filter_width
-
1
)
+
1
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
T
src_val
=
0
;
...
...
@@ -115,35 +147,41 @@ __global__ void col2vol(int num_kernels, const T* data_col, int depth,
int
h
=
(
index
/
width
)
%
height
+
padding_height
;
int
d
=
(
index
/
width
/
height
)
%
depth
+
padding_depth
;
int
c
=
index
/
width
/
height
/
depth
;
// compute the start and end of the output
int
w_col_start
=
(
w
<
filter_width
)
?
0
:
(
w
-
filter_width
)
/
stride_width
+
1
;
(
w
<
d_filter_width
)
?
0
:
(
w
-
d_
filter_width
)
/
stride_width
+
1
;
int
w_col_end
=
min
(
w
/
stride_width
+
1
,
output_width
);
int
h_col_start
=
(
h
<
filter_height
)
?
0
:
(
h
-
filter_height
)
/
stride_height
+
1
;
(
h
<
d_filter_height
)
?
0
:
(
h
-
d_
filter_height
)
/
stride_height
+
1
;
int
h_col_end
=
min
(
h
/
stride_height
+
1
,
output_height
);
int
d_col_start
=
(
d
<
filter_depth
)
?
0
:
(
d
-
filter_depth
)
/
stride_depth
+
1
;
(
d
<
d_filter_depth
)
?
0
:
(
d
-
d_
filter_depth
)
/
stride_depth
+
1
;
int
d_col_end
=
min
(
d
/
stride_depth
+
1
,
output_detph
);
int
offset
=
(
c
*
filter_depth
*
filter_height
*
filter_width
+
d
*
filter_width
*
filter_height
+
h
*
filter_width
+
w
)
*
output_detph
*
output_height
*
output_width
;
int
coeff_d_col
=
(
1
-
stride_depth
*
filter_width
*
filter_height
*
output_detph
)
*
output_height
*
output_width
;
int
coeff_h_col
=
(
1
-
stride_height
*
filter_width
*
output_detph
*
output_height
)
*
output_width
;
int
coeff_w_col
=
(
1
-
stride_width
*
output_detph
*
output_height
*
output_width
);
for
(
int
d_col
=
d_col_start
;
d_col
<
d_col_end
;
++
d_col
)
{
for
(
int
h_col
=
h_col_start
;
h_col
<
h_col_end
;
++
h_col
)
{
for
(
int
w_col
=
w_col_start
;
w_col
<
w_col_end
;
++
w_col
)
{
src_val
+=
data_col
[
offset
+
d_col
*
coeff_d_col
+
h_col
*
coeff_h_col
+
w_col
*
coeff_w_col
];
int
d_off
=
(
d
-
d_col
*
stride_depth
);
int
h_off
=
(
h
-
h_col
*
stride_height
);
int
w_off
=
(
w
-
w_col
*
stride_width
);
if
(
d_off
%
dilation_d
==
0
&&
h_off
%
dilation_h
==
0
&&
w_off
%
dilation_w
==
0
)
{
d_off
/=
dilation_d
;
h_off
/=
dilation_h
;
w_off
/=
dilation_w
;
int
data_col_index
=
(((((
c
*
filter_depth
+
d_off
)
*
filter_height
+
h_off
)
*
filter_width
+
w_off
)));
data_col_index
=
((
data_col_index
*
output_detph
+
d_col
)
*
output_height
+
h_col
)
*
output_width
+
w_col
;
src_val
+=
data_col
[
data_col_index
];
}
}
}
}
...
...
@@ -161,17 +199,18 @@ template <class T>
class
Col2VolFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
vol
,
const
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
{
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
const
framework
::
Tensor
&
col
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
vol
)
const
{
PADDLE_ENFORCE
(
vol
->
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
input_channels
=
vol
->
dims
()[
0
];
int
input_depth
=
vol
->
dims
()[
1
];
int
input_height
=
vol
->
dims
()[
2
];
int
input_width
=
vol
->
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
...
...
@@ -179,6 +218,28 @@ class Col2VolFunctor<platform::GPUPlace, T> {
int
output_height
=
col
.
dims
()[
5
];
int
output_width
=
col
.
dims
()[
6
];
PADDLE_ENFORCE_EQ
((
input_depth
+
2
*
paddings
[
0
]
-
((
dilations
[
0
]
*
(
filter_depth
-
1
)
+
1
)))
/
strides
[
0
]
+
1
,
output_depth
,
"input_depth and output_depth are "
"Mismatching."
);
PADDLE_ENFORCE_EQ
((
input_height
+
2
*
paddings
[
1
]
-
((
dilations
[
1
]
*
(
filter_height
-
1
)
+
1
)))
/
strides
[
1
]
+
1
,
output_height
,
"input_height and output_height are "
"Mismatching."
);
PADDLE_ENFORCE_EQ
((
input_width
+
2
*
paddings
[
2
]
-
((
dilations
[
2
]
*
(
filter_width
-
1
)
+
1
)))
/
strides
[
2
]
+
1
,
output_width
,
"input_width and output_width are "
"Mismatching."
);
int
num_kernels
=
input_channels
*
input_depth
*
input_height
*
input_width
;
const
int
threads
=
1024
;
...
...
@@ -188,9 +249,10 @@ class Col2VolFunctor<platform::GPUPlace, T> {
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
num_kernels
,
col
.
data
<
T
>
(),
input_depth
,
input_height
,
input_width
,
filter_depth
,
filter_height
,
filter_width
,
stride_depth
,
stride_height
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
output_depth
,
output_height
,
output_width
,
vol
.
data
<
T
>
());
dilations
[
0
],
dilations
[
1
],
dilations
[
2
],
filter_depth
,
filter_height
,
filter_width
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
2
],
output_depth
,
output_height
,
output_width
,
vol
->
data
<
T
>
());
}
};
...
...
paddle/operators/math/vol2col.h
浏览文件 @
4fc9f55e
...
...
@@ -31,6 +31,15 @@ namespace math {
* \param colData Column data.
* \param colShape The shape of colData.
*
* \param dilations dilation data.
* \param 3-dimension [dilation_depth, dilation_height, dilation_width].
*
* \param strides stride data.
* \param 3-dimension [stride_depth, stride_height, stride_width].
*
* \param paddings padding data.
* \param 3-dimension [d_pad, h_pad, w_pad].
*
* The shape of colData is:
* [input_channels, filter_depth, filter_height, filter_width, output_depth,
* output_height, output_width]
...
...
@@ -57,20 +66,22 @@ template <typename Place, typename T>
class
Vol2ColFunctor
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
vol
,
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
;
const
framework
::
Tensor
&
vol
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
col
)
const
;
};
template
<
typename
Place
,
typename
T
>
class
Col2VolFunctor
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
vol
,
const
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
;
const
framework
::
Tensor
&
col
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
vol
)
const
;
};
}
// namespace math
...
...
paddle/operators/math/vol2col_test.cc
浏览文件 @
4fc9f55e
...
...
@@ -62,11 +62,15 @@ void testVol2col() {
int
input_height
=
2
;
int
input_width
=
3
;
int
filter_size
=
2
;
int
stride
=
1
;
int
padding
=
0
;
int
output_depth
=
(
input_depth
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
int
output_height
=
(
input_height
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
int
output_width
=
(
input_width
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
std
::
vector
<
int
>
strides
({
1
,
1
,
1
});
std
::
vector
<
int
>
paddings
({
0
,
0
,
0
});
std
::
vector
<
int
>
dilations
({
1
,
1
,
1
});
int
output_depth
=
(
input_depth
-
filter_size
+
2
*
paddings
[
0
])
/
strides
[
0
]
+
1
;
int
output_height
=
(
input_height
-
filter_size
+
2
*
paddings
[
1
])
/
strides
[
1
]
+
1
;
int
output_width
=
(
input_width
-
filter_size
+
2
*
paddings
[
2
])
/
strides
[
2
]
+
1
;
// Vol2Col test
float
*
input_ptr
=
...
...
@@ -85,8 +89,7 @@ void testVol2col() {
*
place
);
paddle
::
operators
::
math
::
Vol2ColFunctor
<
Place
,
float
>
vol2col
;
vol2col
(
*
context
,
input
,
output
,
stride
,
stride
,
stride
,
padding
,
padding
,
padding
);
vol2col
(
*
context
,
input
,
dilations
,
strides
,
paddings
,
&
output
);
float
vol_2_col
[]
=
{
0
,
1
,
1
,
2
,
3
,
4
,
4
,
5
,
6
,
7
,
7
,
8
,
9
,
10
,
10
,
11
};
float
*
out_cfo_ptr
;
...
...
@@ -111,8 +114,7 @@ void testVol2col() {
}
paddle
::
operators
::
math
::
Col2VolFunctor
<
Place
,
float
>
col2vol
;
col2vol
(
*
context
,
input
,
output
,
stride
,
stride
,
stride
,
padding
,
padding
,
padding
);
col2vol
(
*
context
,
output
,
dilations
,
strides
,
paddings
,
&
input
);
float
*
in_ptr
;
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
...
...
paddle/operators/sequence_conv_op.h
浏览文件 @
4fc9f55e
...
...
@@ -62,9 +62,9 @@ class SequenceConvKernel : public framework::OpKernel<T> {
math
::
ContextProjectFunctor
<
Place
,
T
>
seq_project_functor
;
seq_project_functor
(
context
.
device_context
(),
*
in
,
*
padding_data
,
col
,
seq_project_functor
(
context
.
device_context
(),
*
in
,
*
padding_data
,
padding_trainable
,
context_start
,
context_length
,
context_stride
,
up_pad
,
down_pad
);
context_stride
,
up_pad
,
down_pad
,
&
col
);
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
col
,
false
,
filter
,
false
,
static_cast
<
T
>
(
1.0
),
out
,
static_cast
<
T
>
(
0.0
));
...
...
@@ -117,10 +117,10 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
in_g
->
set_lod
(
in
->
lod
());
set_zero
(
context
.
device_context
(),
in_g
,
static_cast
<
T
>
(
0
));
seq_project_grad_functor
(
context
.
device_context
(),
*
in_g
,
*
padding_data_g
,
col
,
padding_trainable
,
context_start
,
context_
length
,
context_stride
,
up_pad
,
down_pad
,
true
,
false
);
seq_project_grad_functor
(
context
.
device_context
(),
*
in_g
,
padding_trainable
,
context_start
,
context_length
,
context_
stride
,
up_pad
,
down_pad
,
false
,
true
,
padding_data_g
,
&
col
);
}
if
(
padding_trainable
&&
padding_data_g
)
{
...
...
@@ -129,9 +129,9 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
LoDTensor
*
input
=
const_cast
<
LoDTensor
*>
(
in
);
seq_project_grad_functor
(
context
.
device_context
(),
*
input
,
*
padding_data_g
,
col
,
padding_trainable
,
context_st
art
,
context_length
,
context_strid
e
,
up_pad
,
down_pad
,
false
,
true
);
padding_trainable
,
context_start
,
context_length
,
context_st
ride
,
up_pad
,
down_pad
,
true
,
fals
e
,
padding_data_g
,
&
col
);
}
if
(
filter_g
)
{
...
...
@@ -146,9 +146,9 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
padding_data
=
context
.
Input
<
Tensor
>
(
"PaddingData"
);
}
seq_project_functor
(
context
.
device_context
(),
*
in
,
*
padding_data
,
col
,
seq_project_functor
(
context
.
device_context
(),
*
in
,
*
padding_data
,
padding_trainable
,
context_start
,
context_length
,
context_stride
,
up_pad
,
down_pad
);
context_stride
,
up_pad
,
down_pad
,
&
col
);
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
col
,
true
,
out_grad
,
false
,
T
(
1.0
),
&
filter_grad
,
T
(
1.0
));
...
...
python/paddle/v2/fluid/tests/test_conv2d_op.py
浏览文件 @
4fc9f55e
...
...
@@ -10,23 +10,33 @@ def conv2d_forward_naive(input, filter, group, conv_param):
assert
np
.
mod
(
out_c
,
group
)
==
0
sub_out_c
=
out_c
/
group
stride
,
pad
=
conv_param
[
'stride'
],
conv_param
[
'pad'
]
out_h
=
1
+
(
in_h
+
2
*
pad
[
0
]
-
f_h
)
/
stride
[
0
]
out_w
=
1
+
(
in_w
+
2
*
pad
[
1
]
-
f_w
)
/
stride
[
1
]
stride
,
pad
,
dilation
=
conv_param
[
'stride'
],
conv_param
[
'pad'
],
conv_param
[
'dilation'
]
out_h
=
1
+
(
in_h
+
2
*
pad
[
0
]
-
(
dilation
[
0
]
*
(
f_h
-
1
)
+
1
))
/
stride
[
0
]
out_w
=
1
+
(
in_w
+
2
*
pad
[
1
]
-
(
dilation
[
1
]
*
(
f_w
-
1
)
+
1
))
/
stride
[
1
]
out
=
np
.
zeros
((
in_n
,
out_c
,
out_h
,
out_w
))
d_bolck_w
=
(
dilation
[
0
]
*
(
f_h
-
1
)
+
1
)
d_bolck_h
=
(
dilation
[
1
]
*
(
f_w
-
1
)
+
1
)
input_pad
=
np
.
pad
(
input
,
((
0
,
),
(
0
,
),
(
pad
[
0
],
),
(
pad
[
1
],
)),
mode
=
'constant'
,
constant_values
=
0
)
filter_dilation
=
np
.
zeros
((
out_c
,
f_c
,
d_bolck_h
,
d_bolck_w
))
filter_dilation
[:,
:,
0
:
d_bolck_h
:
dilation
[
0
],
0
:
d_bolck_w
:
dilation
[
1
]]
=
filter
for
i
in
range
(
out_h
):
for
j
in
range
(
out_w
):
for
g
in
range
(
group
):
input_pad_masked
=
\
input_pad
[:,
g
*
f_c
:(
g
+
1
)
*
f_c
,
i
*
stride
[
0
]:
i
*
stride
[
0
]
+
f
_h
,
j
*
stride
[
1
]:
j
*
stride
[
1
]
+
f
_w
]
i
*
stride
[
0
]:
i
*
stride
[
0
]
+
d_bolck
_h
,
j
*
stride
[
1
]:
j
*
stride
[
1
]
+
d_bolck
_w
]
f_sub
=
filter
[
g
*
sub_out_c
:(
g
+
1
)
*
sub_out_c
,
:,
:,
:]
f_sub
=
filter_dilation
[
g
*
sub_out_c
:(
g
+
1
)
*
sub_out_c
,
:,
:,
:]
for
k
in
range
(
sub_out_c
):
out
[:,
g
*
sub_out_c
+
k
,
i
,
j
]
=
\
np
.
sum
(
input_pad_masked
*
f_sub
[
k
,
:,
:,
:],
...
...
@@ -39,9 +49,14 @@ class TestConv2dOp(OpTest):
def
setUp
(
self
):
self
.
init_op_type
()
self
.
init_group
()
self
.
init_dilation
()
self
.
init_test_case
()
conv2d_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
conv2d_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
,
'dilation'
:
self
.
dilations
}
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
"float32"
)
filter
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
output
=
conv2d_forward_naive
(
input
,
filter
,
self
.
groups
,
...
...
@@ -80,12 +95,14 @@ class TestConv2dOp(OpTest):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
]
self
.
stride
=
[
1
,
1
]
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
]
def
init_group
(
self
):
self
.
groups
=
1
...
...
@@ -101,24 +118,66 @@ class TestWithGroup(TestConv2dOp):
self
.
op_type
=
"conv2d"
#----------------Conv2dCudnn----------------
class
TestWith1x1
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
1
,
1
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
]
class
TestCudnn
(
TestConv2dOp
):
def
init_group
(
self
):
self
.
groups
=
1
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv
_cudnn
"
self
.
op_type
=
"conv
2d
"
class
TestCudnnWithGroup
(
TestConv2dOp
):
class
TestWithDilation
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
10
,
10
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
def
init_dilation
(
self
):
self
.
dilations
=
[
2
,
2
]
def
init_group
(
self
):
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv2d"
#----------------Conv2dCudnn----------------
class
TestCudnn
(
TestConv2dOp
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
class
TestCudnnWithGroup
(
TestWithGroup
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
class
TestCudnnWith1x1
(
TestWith1x1
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
# cudnn v5 does not support dilation conv.
# class TestCudnnWithDilation(TestWithDilation):
# def init_op_type(self):
# self.op_type = "conv_cudnn"
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/fluid/tests/test_conv3d_op.py
浏览文件 @
4fc9f55e
...
...
@@ -10,26 +10,39 @@ def conv3d_forward_naive(input, filter, group, conv_param):
assert
np
.
mod
(
out_c
,
group
)
==
0
sub_out_c
=
out_c
/
group
stride
,
pad
=
conv_param
[
'stride'
],
conv_param
[
'pad'
]
out_d
=
1
+
(
in_d
+
2
*
pad
[
0
]
-
f_h
)
/
stride
[
0
]
out_h
=
1
+
(
in_h
+
2
*
pad
[
1
]
-
f_h
)
/
stride
[
1
]
out_w
=
1
+
(
in_w
+
2
*
pad
[
2
]
-
f_w
)
/
stride
[
2
]
stride
,
pad
,
dilation
=
conv_param
[
'stride'
],
conv_param
[
'pad'
],
conv_param
[
'dilations'
]
out_d
=
1
+
(
in_d
+
2
*
pad
[
0
]
-
(
dilation
[
0
]
*
(
f_d
-
1
)
+
1
))
/
stride
[
0
]
out_h
=
1
+
(
in_h
+
2
*
pad
[
1
]
-
(
dilation
[
1
]
*
(
f_h
-
1
)
+
1
))
/
stride
[
1
]
out_w
=
1
+
(
in_w
+
2
*
pad
[
2
]
-
(
dilation
[
2
]
*
(
f_w
-
1
)
+
1
))
/
stride
[
2
]
out
=
np
.
zeros
((
in_n
,
out_c
,
out_d
,
out_h
,
out_w
))
d_bolck_d
=
(
dilation
[
0
]
*
(
f_d
-
1
)
+
1
)
d_bolck_h
=
(
dilation
[
1
]
*
(
f_h
-
1
)
+
1
)
d_bolck_w
=
(
dilation
[
2
]
*
(
f_w
-
1
)
+
1
)
input_pad
=
np
.
pad
(
input
,
((
0
,
),
(
0
,
),
(
pad
[
0
],
),
(
pad
[
1
],
),
(
pad
[
2
],
)),
mode
=
'constant'
,
constant_values
=
0
)
filter_dilation
=
np
.
zeros
((
out_c
,
f_c
,
d_bolck_d
,
d_bolck_h
,
d_bolck_w
))
filter_dilation
[:,
:,
0
:
d_bolck_d
:
dilation
[
0
],
0
:
d_bolck_h
:
dilation
[
1
],
0
:
d_bolck_w
:
dilation
[
2
]]
=
filter
for
d
in
range
(
out_d
):
for
i
in
range
(
out_h
):
for
j
in
range
(
out_w
):
for
g
in
range
(
group
):
input_pad_masked
=
\
input_pad
[:,
g
*
f_c
:(
g
+
1
)
*
f_c
,
d
*
stride
[
0
]:
d
*
stride
[
0
]
+
f_d
,
i
*
stride
[
1
]:
i
*
stride
[
1
]
+
f_h
,
j
*
stride
[
2
]:
j
*
stride
[
2
]
+
f_w
]
f_sub
=
filter
[
g
*
sub_out_c
:(
g
+
1
)
*
d
*
stride
[
0
]:
d
*
stride
[
0
]
+
d_bolck_d
,
i
*
stride
[
1
]:
i
*
stride
[
1
]
+
d_bolck_h
,
j
*
stride
[
2
]:
j
*
stride
[
2
]
+
d_bolck_w
]
f_sub
=
filter_dilation
[
g
*
sub_out_c
:(
g
+
1
)
*
sub_out_c
,
:,
:,
:,
:]
for
k
in
range
(
sub_out_c
):
out
[:,
g
*
sub_out_c
+
k
,
d
,
i
,
j
]
=
\
...
...
@@ -43,9 +56,14 @@ class TestConv3dOp(OpTest):
def
setUp
(
self
):
self
.
init_group
()
self
.
init_op_type
()
self
.
init_dilation
()
self
.
init_test_case
()
conv3d_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
conv3d_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
,
'dilations'
:
self
.
dilations
}
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
"float32"
)
filter
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
output
=
conv3d_forward_naive
(
input
,
filter
,
self
.
groups
,
...
...
@@ -55,7 +73,8 @@ class TestConv3dOp(OpTest):
self
.
attrs
=
{
'strides'
:
self
.
stride
,
'paddings'
:
self
.
pad
,
'groups'
:
self
.
groups
'groups'
:
self
.
groups
,
'dilations'
:
self
.
dilations
}
self
.
outputs
=
{
'Output'
:
output
}
...
...
@@ -88,6 +107,9 @@ class TestConv3dOp(OpTest):
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
,
3
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
,
1
]
def
init_group
(
self
):
self
.
groups
=
1
...
...
@@ -104,27 +126,47 @@ class TestCase1(TestConv3dOp):
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
,
3
]
def
init_group
(
self
):
self
.
groups
=
1
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d"
class
TestWithGroup1
(
TestConv3dOp
):
def
init_group
(
self
):
self
.
groups
=
3
class
TestWithGroup
1
(
TestConv3dOp
):
class
TestWithGroup
2
(
TestCase1
):
def
init_group
(
self
):
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d"
class
TestWith1x1
(
TestConv3dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
,
0
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
4
,
4
,
4
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
1
,
1
,
1
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
,
1
]
class
TestWithGroup2
(
TestCase1
):
def
init_group
(
self
):
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d"
class
TestWithDilation
(
TestConv3dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
,
0
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
6
,
6
,
6
]
# NCDHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
2
,
2
,
2
]
def
init_dilation
(
self
):
self
.
dilations
=
[
2
,
2
,
2
]
def
init_group
(
self
):
self
.
groups
=
3
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录