Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4fc9f55e
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4fc9f55e
编写于
11月 15, 2017
作者:
C
chengduo
提交者:
GitHub
11月 15, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5472 from chengduoZH/refine_im2col
Add dilations for conv2d and optimize conv2d code
上级
09866fb7
00e0881b
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
944 addition
and
635 deletion
+944
-635
paddle/operators/conv_cudnn_op.cc
paddle/operators/conv_cudnn_op.cc
+0
-2
paddle/operators/conv_op.cc
paddle/operators/conv_op.cc
+40
-14
paddle/operators/conv_op.h
paddle/operators/conv_op.h
+79
-46
paddle/operators/conv_transpose_op.cc
paddle/operators/conv_transpose_op.cc
+12
-9
paddle/operators/conv_transpose_op.h
paddle/operators/conv_transpose_op.h
+24
-19
paddle/operators/math/context_project.h
paddle/operators/math/context_project.h
+29
-24
paddle/operators/math/im2col.cc
paddle/operators/math/im2col.cc
+142
-151
paddle/operators/math/im2col.cu
paddle/operators/math/im2col.cu
+210
-193
paddle/operators/math/im2col.h
paddle/operators/math/im2col.h
+17
-7
paddle/operators/math/im2col_test.cc
paddle/operators/math/im2col_test.cc
+12
-12
paddle/operators/math/vol2col.cc
paddle/operators/math/vol2col.cc
+83
-38
paddle/operators/math/vol2col.cu
paddle/operators/math/vol2col.cu
+120
-58
paddle/operators/math/vol2col.h
paddle/operators/math/vol2col.h
+19
-8
paddle/operators/math/vol2col_test.cc
paddle/operators/math/vol2col_test.cc
+11
-9
paddle/operators/sequence_conv_op.h
paddle/operators/sequence_conv_op.h
+11
-11
python/paddle/v2/fluid/tests/test_conv2d_op.py
python/paddle/v2/fluid/tests/test_conv2d_op.py
+72
-13
python/paddle/v2/fluid/tests/test_conv3d_op.py
python/paddle/v2/fluid/tests/test_conv3d_op.py
+63
-21
未找到文件。
paddle/operators/conv_cudnn_op.cc
浏览文件 @
4fc9f55e
...
@@ -22,8 +22,6 @@ class CudnnConvOpMaker : public Conv2DOpMaker {
...
@@ -22,8 +22,6 @@ class CudnnConvOpMaker : public Conv2DOpMaker {
CudnnConvOpMaker
(
framework
::
OpProto
*
proto
,
CudnnConvOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
framework
::
OpAttrChecker
*
op_checker
)
:
Conv2DOpMaker
(
proto
,
op_checker
)
{
:
Conv2DOpMaker
(
proto
,
op_checker
)
{
AddAttr
<
std
::
vector
<
int
>>
(
"dilations"
,
"dilations of convolution operator."
)
.
SetDefault
(
std
::
vector
<
int
>
{
1
,
1
});
AddAttr
<
int
>
(
"workspace_size_MB"
,
AddAttr
<
int
>
(
"workspace_size_MB"
,
"workspace size for cudnn, in MB, "
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"workspace is a section of GPU memory which will be "
...
...
paddle/operators/conv_op.cc
浏览文件 @
4fc9f55e
...
@@ -30,6 +30,7 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -30,6 +30,7 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
int
groups
=
ctx
->
Attrs
().
Get
<
int
>
(
"groups"
);
int
groups
=
ctx
->
Attrs
().
Get
<
int
>
(
"groups"
);
std
::
vector
<
int
>
dilations
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"dilations"
);
int
input_channels
=
in_dims
[
1
];
int
input_channels
=
in_dims
[
1
];
int
output_channels
=
filter_dims
[
0
];
int
output_channels
=
filter_dims
[
0
];
...
@@ -52,9 +53,15 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -52,9 +53,15 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
"The number of output channels should be divided by groups."
);
"The number of output channels should be divided by groups."
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
0
]});
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
0
]});
for
(
size_t
i
=
0
;
i
<
paddings
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
in_dims
[
i
+
2
]
+
2
*
paddings
[
i
]
-
(
dilations
[
i
]
*
(
filter_dims
[
i
+
2
]
-
1
)
+
1
)
>
0
,
"Due to the settings of paddings, filter_dims and "
"dilations, the output size is less than 0, please check "
"again."
);
output_shape
.
push_back
(
OutputSize
(
in_dims
[
i
+
2
],
filter_dims
[
i
+
2
],
output_shape
.
push_back
(
OutputSize
(
in_dims
[
i
+
2
],
filter_dims
[
i
+
2
],
paddings
[
i
],
strides
[
i
]));
dilations
[
i
],
paddings
[
i
],
strides
[
i
]));
}
}
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
}
}
...
@@ -78,9 +85,15 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
...
@@ -78,9 +85,15 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
AddOutput
(
"Output"
,
AddOutput
(
"Output"
,
"(Tensor) The output tensor of convolution operator. "
"(Tensor) The output tensor of convolution operator. "
"The format of output tensor is also NCHW."
);
"The format of output tensor is also NCHW."
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"strides of convolution operator."
)
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector<int> default:{1, 1}), the "
"strides(h_stride, w_stride) of "
"convolution operator."
)
.
SetDefault
({
1
,
1
});
.
SetDefault
({
1
,
1
});
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"paddings of convolution operator."
)
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"(vector<int> default:{0, 0}), the "
"paddings(h_pad, w_pad) of "
"convolution operator."
)
.
SetDefault
({
0
,
0
});
.
SetDefault
({
0
,
0
});
AddAttr
<
int
>
(
AddAttr
<
int
>
(
"groups"
,
"groups"
,
...
@@ -90,15 +103,20 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
...
@@ -90,15 +103,20 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
"first half of the input channels, while the second half of the filters "
"first half of the input channels, while the second half of the filters "
"is only connected to the second half of the input channels."
)
"is only connected to the second half of the input channels."
)
.
SetDefault
(
1
);
.
SetDefault
(
1
);
AddAttr
<
std
::
vector
<
int
>>
(
"dilations"
,
"(vector<int> default:{1, 1}), the "
"dilations(h_dilation, w_dilation) of "
"convolution operator."
)
.
SetDefault
({
1
,
1
});
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Convolution Operator.
Convolution Operator.
The convolution operation calculates the output based on the input, filter
The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
and strides, paddings, groups
, dilations
parameters. The size of each dimension of the
parameters is checked in the infer-shape.
parameters is checked in the infer-shape.
Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch
Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch
size, C is the number of channels, H is the height of the feature, and W is
size, C is the number of channels, H is the height of the feature, and W is
the width of the feature. Parameters(ksize, strides, paddings) are two elements.
the width of the feature. Parameters(ksize, strides, paddings
, dilations
) are two elements.
These two elements represent height and width, respectively.
These two elements represent height and width, respectively.
The input(X) size and output(Out) size may be different.
The input(X) size and output(Out) size may be different.
...
@@ -109,8 +127,8 @@ Example:
...
@@ -109,8 +127,8 @@ Example:
Output:
Output:
Output shape: (N, C_out, H_out, W_out)
Output shape: (N, C_out, H_out, W_out)
where
where
H_out = (H_in
- filter_size[0] + 2 * paddings[0]
) / strides[0] + 1;
H_out = (H_in
+ 2 * paddings[0] - (dilations[0]*(filter_size[0] - 1) + 1)
) / strides[0] + 1;
W_out = (W_in
- filter_size[1] + 2 * paddings[1]
) / strides[1] + 1;
W_out = (W_in
+ 2 * paddings[1] - (dilations[1]*(filter_size[1] - 1) + 1)
) / strides[1] + 1;
)DOC"
);
)DOC"
);
}
}
...
@@ -135,13 +153,15 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto,
...
@@ -135,13 +153,15 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto,
AddOutput
(
"Output"
,
AddOutput
(
"Output"
,
"(Tensor) The output tensor of convolution operator."
"(Tensor) The output tensor of convolution operator."
"The format of output tensor is also NCDHW."
);
"The format of output tensor is also NCDHW."
);
AddAttr
<
std
::
vector
<
int
>>
(
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"strides"
,
"(vector<int>, default:{1, 1, 1}), the "
"(vector, default:{0, 0, 0}), the strides of convolution operator."
)
"strides(d_stride, h_stride, w_stride) of "
"convolution operator."
)
.
SetDefault
({
1
,
1
,
1
});
.
SetDefault
({
1
,
1
,
1
});
AddAttr
<
std
::
vector
<
int
>>
(
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"paddings"
,
"(vector<int>, default:{0, 0, 0}), the "
"(vector, default:{0, 0, 0}), the paddings of convolution operator."
)
"paddings(d_pad, h_pad, w_pad) of convolution "
"operator."
)
.
SetDefault
({
0
,
0
,
0
});
.
SetDefault
({
0
,
0
,
0
});
AddAttr
<
int
>
(
AddAttr
<
int
>
(
"groups"
,
"groups"
,
...
@@ -151,6 +171,12 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto,
...
@@ -151,6 +171,12 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto,
"first half of the input channels, while the second half of the filters "
"first half of the input channels, while the second half of the filters "
"is only connected to the second half of the input channels."
)
"is only connected to the second half of the input channels."
)
.
SetDefault
(
1
);
.
SetDefault
(
1
);
AddAttr
<
std
::
vector
<
int
>>
(
"dilations"
,
"(vector<int> default:{1, 1, 1}), the "
"dilations(d_dilation, h_dilation, w_dilation) of "
"convolution operator. Currently, conv3d doesn't "
"support dilation."
)
.
SetDefault
({
1
,
1
,
1
});
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Convolution3D Operator.
Convolution3D Operator.
...
...
paddle/operators/conv_op.h
浏览文件 @
4fc9f55e
...
@@ -27,11 +27,24 @@ using Tensor = framework::Tensor;
...
@@ -27,11 +27,24 @@ using Tensor = framework::Tensor;
// Base convolution operator definations for other conv
// Base convolution operator definations for other conv
// like operators to reuse the implementation.
// like operators to reuse the implementation.
inline
int
OutputSize
(
int
input_size
,
int
filter_size
,
int
padding
,
inline
int
OutputSize
(
int
input_size
,
int
filter_size
,
int
dilation
,
int
stride
)
{
int
padding
,
int
stride
)
{
int
output_size
=
(
input_size
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
const
int
dkernel
=
dilation
*
(
filter_size
-
1
)
+
1
;
const
int
output_size
=
(
input_size
+
2
*
padding
-
dkernel
)
/
stride
+
1
;
return
output_size
;
return
output_size
;
}
}
inline
bool
IsExpand
(
std
::
vector
<
int64_t
>&
filter_dim
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
dilations
)
{
bool
filter_1
=
true
,
strides_1
=
true
,
padding_0
=
true
,
dilation_1
=
true
;
for
(
size_t
j
=
0
;
j
<
strides
.
size
();
++
j
)
{
filter_1
=
filter_1
&&
(
static_cast
<
int
>
(
filter_dim
[
j
])
==
1
);
strides_1
=
strides_1
&&
(
strides
[
j
]
==
1
);
padding_0
=
padding_0
&&
(
paddings
[
j
]
==
0
);
dilation_1
=
dilation_1
&&
(
dilations
[
j
]
==
1
);
}
return
!
(
filter_1
&&
strides_1
&&
padding_0
&&
dilation_1
);
}
// Define Op classes in .h file so that other conv
// Define Op classes in .h file so that other conv
// operator implementations can reuse the code.
// operator implementations can reuse the code.
...
@@ -50,14 +63,12 @@ class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -50,14 +63,12 @@ class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker {
class
ConvOp
:
public
framework
::
OperatorWithKernel
{
class
ConvOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
};
class
ConvOpGrad
:
public
framework
::
OperatorWithKernel
{
class
ConvOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
};
...
@@ -73,9 +84,10 @@ class GemmConvKernel : public framework::OpKernel<T> {
...
@@ -73,9 +84,10 @@ class GemmConvKernel : public framework::OpKernel<T> {
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
groups
=
context
.
Attr
<
int
>
(
"groups"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
int
groups
=
context
.
Attr
<
int
>
(
"group
s"
);
std
::
vector
<
int
>
dilations
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dilation
s"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
...
@@ -106,14 +118,17 @@ class GemmConvKernel : public framework::OpKernel<T> {
...
@@ -106,14 +118,17 @@ class GemmConvKernel : public framework::OpKernel<T> {
framework
::
DDim
col_matrix_shape
=
framework
::
DDim
col_matrix_shape
=
framework
::
flatten_to_2d
(
col_shape
,
filter_shape_vec
.
size
()
+
1
);
framework
::
flatten_to_2d
(
col_shape
,
filter_shape_vec
.
size
()
+
1
);
bool
is_expand
=
IsExpand
(
filter_shape_vec
,
strides
,
paddings
,
dilations
);
Tensor
col
;
Tensor
col
;
col
.
mutable_data
<
T
>
(
col_shape
,
context
.
GetPlace
());
// col_matrix shares the same piece of data with col,
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
// to call the matrix multiplication interface.
Tensor
col_matrix
;
Tensor
col_matrix
;
col_matrix
.
ShareDataWith
(
col
);
if
(
is_expand
)
{
col_matrix
.
Resize
(
col_matrix_shape
);
col
.
mutable_data
<
T
>
(
col_shape
,
context
.
GetPlace
());
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
static_cast
<
int
>
(
input
->
dims
().
size
()));
input
->
dims
(),
1
,
static_cast
<
int
>
(
input
->
dims
().
size
()));
...
@@ -130,24 +145,30 @@ class GemmConvKernel : public framework::OpKernel<T> {
...
@@ -130,24 +145,30 @@ class GemmConvKernel : public framework::OpKernel<T> {
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output
->
dims
()[
1
])
/
groups
;
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
);
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
Tensor
in_slice
=
in_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
Tensor
in_slice
=
in_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
if
(
filter_shape_vec
.
size
()
==
2
)
{
if
(
!
is_expand
)
{
col
.
ShareDataWith
(
in_slice
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
else
if
(
filter_shape_vec
.
size
()
==
2
)
{
// im2col
// im2col
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
im2col
(
context
.
device_context
(),
in_slice
,
dilations
,
strides
,
im2col
(
context
.
device_context
(),
in_slice
,
col
,
stride
s
[
0
],
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
padding
s
[
0
],
strides
[
1
],
paddings
[
0
],
paddings
[
0
],
paddings
[
1
]
,
paddings
[
1
]}
,
paddings
[
1
]
);
&
col
);
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
// vol2col
// vol2col
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
vol2col
(
context
.
device_context
(),
in_slice
,
dilations
,
strides
,
vol2col
(
context
.
device_context
(),
in_slice
,
col
,
strides
[
0
],
paddings
,
&
col
);
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
}
}
// gemm
// gemm
...
@@ -178,9 +199,10 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -178,9 +199,10 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
if
(
!
input_grad
&&
!
filter_grad
)
return
;
if
(
!
input_grad
&&
!
filter_grad
)
return
;
int
groups
=
context
.
Attr
<
int
>
(
"groups"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
int
groups
=
context
.
Attr
<
int
>
(
"group
s"
);
std
::
vector
<
int
>
dilations
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dilation
s"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
...
@@ -230,14 +252,17 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -230,14 +252,17 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output_grad
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output_grad
->
dims
()[
1
])
/
groups
;
bool
is_expand
=
IsExpand
(
filter_shape_vec
,
strides
,
paddings
,
dilations
);
Tensor
col
;
Tensor
col
;
// col_matrix shares the same piece of data with col,
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
// to call the matrix multiplication interface.
Tensor
col_matrix
;
Tensor
col_matrix
;
col
.
mutable_data
<
T
>
(
col_shape
,
context
.
GetPlace
());
if
(
is_expand
)
{
col_matrix
.
ShareDataWith
(
col
);
col
.
mutable_data
<
T
>
(
col_shape
,
context
.
GetPlace
());
col_matrix
.
Resize
(
col_matrix_shape
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
math
::
SetConstant
<
Place
,
T
>
set_zero
;
math
::
SetConstant
<
Place
,
T
>
set_zero
;
...
@@ -245,6 +270,9 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -245,6 +270,9 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
set_zero
(
context
.
device_context
(),
input_grad
,
static_cast
<
T
>
(
0
));
set_zero
(
context
.
device_context
(),
input_grad
,
static_cast
<
T
>
(
0
));
math
::
Col2VolFunctor
<
Place
,
T
>
col2vol
;
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
col2im
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
out_grad_batch
=
Tensor
out_grad_batch
=
output_grad
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
output_grad
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
...
@@ -254,24 +282,26 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -254,24 +282,26 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
Tensor
out_grad_slice
=
Tensor
out_grad_slice
=
out_grad_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
out_grad_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
filter_slice
,
true
,
out_grad_slice
,
false
,
T
(
1.0
),
&
col_matrix
,
T
(
0.0
));
// col2im
Tensor
in_grad_slice
=
Tensor
in_grad_slice
=
in_grad_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
in_grad_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
if
(
filter_shape_vec
.
size
()
==
2
)
{
if
(
!
is_expand
)
{
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
col2im
;
col_matrix
.
ShareDataWith
(
in_grad_slice
);
col2im
(
context
.
device_context
(),
in_grad_slice
,
col
,
strides
[
0
],
col_matrix
.
Resize
(
col_matrix_shape
);
strides
[
1
],
paddings
[
0
],
paddings
[
0
],
paddings
[
1
],
}
paddings
[
1
]);
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
filter_slice
,
true
,
out_grad_slice
,
false
,
T
(
1.0
),
&
col_matrix
,
T
(
0.0
));
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
if
(
is_expand
&&
filter_shape_vec
.
size
()
==
2
)
{
math
::
Col2VolFunctor
<
Place
,
T
>
col2vol
;
col2im
(
context
.
device_context
(),
col
,
dilations
,
strides
,
col2vol
(
context
.
device_context
(),
in_grad_slice
,
col
,
strides
[
0
],
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
1
]},
paddings
[
2
]);
&
in_grad_slice
);
}
else
if
(
is_expand
&&
filter_shape_vec
.
size
()
==
3
)
{
col2vol
(
context
.
device_context
(),
col
,
dilations
,
strides
,
paddings
,
&
in_grad_slice
);
}
}
}
}
}
}
...
@@ -282,7 +312,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -282,7 +312,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
Tensor
filter_grad_
=
*
filter_grad
;
Tensor
filter_grad_
=
*
filter_grad
;
filter_grad_
.
Resize
(
filter_matrix_shape
);
filter_grad_
.
Resize
(
filter_matrix_shape
);
set_zero
(
context
.
device_context
(),
filter_grad
,
static_cast
<
T
>
(
0
));
set_zero
(
context
.
device_context
(),
filter_grad
,
static_cast
<
T
>
(
0
));
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
out_grad_batch
=
Tensor
out_grad_batch
=
output_grad
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
output_grad
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
...
@@ -293,16 +324,18 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -293,16 +324,18 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
out_grad_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
out_grad_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
in_slice
=
in_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
Tensor
in_slice
=
in_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
if
(
filter_shape_vec
.
size
()
==
2
)
{
if
(
!
is_expand
)
{
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
col
.
ShareDataWith
(
in_slice
);
im2col
(
context
.
device_context
(),
in_slice
,
col
,
strides
[
0
],
col_matrix
.
ShareDataWith
(
col
);
strides
[
1
],
paddings
[
0
],
paddings
[
0
],
paddings
[
1
],
col_matrix
.
Resize
(
col_matrix_shape
);
paddings
[
1
]);
}
else
if
(
filter_shape_vec
.
size
()
==
2
)
{
im2col
(
context
.
device_context
(),
in_slice
,
dilations
,
strides
,
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]},
&
col
);
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
vol2col
(
context
.
device_context
(),
in_slice
,
dilations
,
strides
,
vol2col
(
context
.
device_context
(),
in_slice
,
col
,
strides
[
0
],
paddings
,
&
col
);
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
}
}
// gemm
// gemm
...
...
paddle/operators/conv_transpose_op.cc
浏览文件 @
4fc9f55e
...
@@ -51,7 +51,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -51,7 +51,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
"as the number of filters."
);
"as the number of filters."
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
1
]});
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
1
]});
for
(
size_t
i
=
0
;
i
<
padding
s
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
stride
s
.
size
();
++
i
)
{
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
+
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
+
filter_dims
[
i
+
2
]);
filter_dims
[
i
+
2
]);
}
}
...
@@ -79,11 +79,13 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
...
@@ -79,11 +79,13 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
"The format of output tensor is also NCHW."
);
"The format of output tensor is also NCHW."
);
AddAttr
<
std
::
vector
<
int
>>
(
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"strides"
,
"(vector defalut:{1, 1}), strides of convolution transpose operator."
)
"(vector<int> defalut:{1, 1}), the strides(h_stride, w_stride) of "
"convolution transpose operator."
)
.
SetDefault
({
1
,
1
});
.
SetDefault
({
1
,
1
});
AddAttr
<
std
::
vector
<
int
>>
(
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"paddings"
,
"(vector defalut:{0, 0}), paddings of convolution transpose operator."
)
"(vector<int> defalut:{0, 0}), the paddings(h_pad, w_pad) of convolution "
"transpose operator."
)
.
SetDefault
({
0
,
0
});
.
SetDefault
({
0
,
0
});
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Convolution2D Transpose Operator.
Convolution2D Transpose Operator.
...
@@ -132,13 +134,14 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(
...
@@ -132,13 +134,14 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(
"Where N is batch size, C is "
"Where N is batch size, C is "
"the number of channels, D is the depth of the feature, H is the "
"the number of channels, D is the depth of the feature, H is the "
"height of the feature, and W is the width of the feature."
);
"height of the feature, and W is the width of the feature."
);
AddAttr
<
std
::
vector
<
int
>>
(
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"strides"
,
"(vector<int> defalut:{1, 1, 1}), the "
"(vector defalut:{1, 1, 1}), strides of convolution transpose operator."
)
"strides{d_stride, h_stride, w_stride} of "
"convolution transpose operator."
)
.
SetDefault
({
1
,
1
,
1
});
.
SetDefault
({
1
,
1
,
1
});
AddAttr
<
std
::
vector
<
int
>>
(
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"paddings"
,
"(vector<int> defalut:{0, 0, 0}), paddings(d_pad, "
"(vector defalut:{0, 0, 0}), paddings
of convolution transpose operator."
)
"h_pad, w_pad)
of convolution transpose operator."
)
.
SetDefault
({
0
,
0
,
0
});
.
SetDefault
({
0
,
0
,
0
});
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Convolution3D Transpose Operator.
Convolution3D Transpose Operator.
...
...
paddle/operators/conv_transpose_op.h
浏览文件 @
4fc9f55e
...
@@ -43,16 +43,12 @@ class Conv3DTransposeOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -43,16 +43,12 @@ class Conv3DTransposeOpMaker : public framework::OpProtoAndCheckerMaker {
class
ConvTransposeOp
:
public
framework
::
OperatorWithKernel
{
class
ConvTransposeOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
};
class
ConvTransposeOpGrad
:
public
framework
::
OperatorWithKernel
{
class
ConvTransposeOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
};
};
...
@@ -66,6 +62,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
...
@@ -66,6 +62,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
// Actually, no paddings and groups allowed in conv transpose.
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
// TODO(Zhuoyuan): Paddings can be added in future.
// TODO(Zhuoyuan): Paddings can be added in future.
// groups will alway be disabled in conv2dtranspose.
// groups will alway be disabled in conv2dtranspose.
...
@@ -120,6 +118,10 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
...
@@ -120,6 +118,10 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
math
::
SetConstant
<
Place
,
T
>
set_zero
;
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
context
.
device_context
(),
output
,
static_cast
<
T
>
(
0
));
set_zero
(
context
.
device_context
(),
output
,
static_cast
<
T
>
(
0
));
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
col2im
;
math
::
Col2VolFunctor
<
Place
,
T
>
col2vol
;
std
::
vector
<
int
>
dilations
({
1
,
1
,
1
});
// convolution transpose: gemm + col2im or col2vol (similar to conv-backward
// convolution transpose: gemm + col2im or col2vol (similar to conv-backward
// on input)
// on input)
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
...
@@ -138,16 +140,16 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
...
@@ -138,16 +140,16 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
if
(
filter_shape_vec
.
size
()
==
2
)
{
if
(
filter_shape_vec
.
size
()
==
2
)
{
// col2im: col_matrix -> dy
// col2im: col_matrix -> dy
// from (c * k_h * k_w, h * w) to (c, o_h, o_w)
// from (c * k_h * k_w, h * w) to (c, o_h, o_w)
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
col2im
;
col2im
(
context
.
device_context
(),
col
,
std
::
vector
<
int
>
{
dilations
[
0
],
dilations
[
1
]},
strides
,
col2im
(
context
.
device_context
(),
output_batch
,
col
,
strides
[
0
],
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
strides
[
1
],
0
,
0
,
0
,
0
);
paddings
[
1
]},
&
output_batch
);
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
// col2vol: col_matrix -> dy
// col2vol: col_matrix -> dy
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
math
::
Col2VolFunctor
<
Place
,
T
>
col2vol
;
col2vol
(
context
.
device_context
(),
col
,
dilations
,
strides
,
col2vol
(
context
.
device_context
(),
output_batch
,
col
,
strides
[
0
],
std
::
vector
<
int
>
{
0
,
0
,
0
},
&
output_batch
);
strides
[
1
],
strides
[
2
],
0
,
0
,
0
);
}
}
}
}
}
}
...
@@ -228,6 +230,10 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
...
@@ -228,6 +230,10 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
Tensor
filter_grad_
;
Tensor
filter_grad_
;
math
::
SetConstant
<
Place
,
T
>
set_zero
;
math
::
SetConstant
<
Place
,
T
>
set_zero
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
std
::
vector
<
int
>
dilations
({
1
,
1
,
1
});
if
(
input_grad
)
{
if
(
input_grad
)
{
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
set_zero
(
context
.
device_context
(),
input_grad
,
static_cast
<
T
>
(
0
));
set_zero
(
context
.
device_context
(),
input_grad
,
static_cast
<
T
>
(
0
));
...
@@ -247,17 +253,16 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
...
@@ -247,17 +253,16 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
if
(
filter_shape_vec
.
size
()
==
2
)
{
if
(
filter_shape_vec
.
size
()
==
2
)
{
// im2col: dy -> col matrix
// im2col: dy -> col matrix
// from (c, o_h, o_w) to (c * k_h * k_w, h * w)
// from (c, o_h, o_w) to (c * k_h * k_w, h * w)
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
Place
,
T
>
im2col
;
im2col
(
context
.
device_context
(),
output_grad_batch
,
im2col
(
context
.
device_context
(),
output_grad_batch
,
col
,
strides
[
0
],
std
::
vector
<
int
>
{
dilations
[
0
],
dilations
[
1
]},
strides
,
strides
[
1
],
paddings
[
0
],
paddings
[
0
],
paddings
[
1
],
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]);
paddings
[
1
]},
&
col
);
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
// vol2col: dy -> col_matrix
// vol2col: dy -> col_matrix
// from (c, o_d, o_h, o_w) to (c * k_d * k_h * k_w, d * h * w)
// from (c, o_d, o_h, o_w) to (c * k_d * k_h * k_w, d * h * w)
math
::
Vol2ColFunctor
<
Place
,
T
>
vol2col
;
vol2col
(
context
.
device_context
(),
output_grad_batch
,
dilations
,
vol2col
(
context
.
device_context
(),
output_grad_batch
,
col
,
strides
[
0
],
strides
,
paddings
,
&
col
);
strides
[
1
],
strides
[
2
],
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
}
}
if
(
input_grad
)
{
if
(
input_grad
)
{
...
...
paddle/operators/math/context_project.h
浏览文件 @
4fc9f55e
...
@@ -88,13 +88,18 @@ template <typename Place, typename T>
...
@@ -88,13 +88,18 @@ template <typename Place, typename T>
class
ContextProjectFunctor
{
class
ContextProjectFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
LoDTensor
&
in
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
LoDTensor
&
in
,
const
Tensor
&
padding_data
,
Tensor
&
col
,
const
Tensor
&
padding_data
,
bool
padding_trainable
,
bool
padding_trainable
,
int
context_start
,
int
context_length
,
const
int
context_start
,
const
int
context_length
,
int
context_stride
,
int
up_pad
,
int
down_pad
)
{
const
int
context_stride
,
const
int
up_pad
,
const
int
down_pad
,
Tensor
*
col
)
{
auto
lod_level_0
=
in
.
lod
()[
0
];
auto
lod_level_0
=
in
.
lod
()[
0
];
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kOCF
,
Place
,
float
>
im2col_ocf
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kOCF
,
Place
,
float
>
im2col_ocf
;
std
::
vector
<
int
>
dilation
({
1
,
1
});
std
::
vector
<
int
>
padding
({
up_pad
,
0
,
down_pad
,
0
});
std
::
vector
<
int
>
stride
({
context_stride
,
1
});
int
input_row_begin
,
input_row_end
;
int
input_row_begin
,
input_row_end
;
int
sequence_height
,
sequence_width
;
int
sequence_height
,
sequence_width
;
sequence_width
=
in
.
dims
()[
1
];
sequence_width
=
in
.
dims
()[
1
];
...
@@ -105,8 +110,8 @@ class ContextProjectFunctor {
...
@@ -105,8 +110,8 @@ class ContextProjectFunctor {
:
static_cast
<
int
>
(
lod_level_0
[
i
]);
:
static_cast
<
int
>
(
lod_level_0
[
i
]);
input_row_end
=
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]);
input_row_end
=
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]);
Tensor
out_t
=
col
.
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
Tensor
out_t
=
col
->
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
...
@@ -123,17 +128,14 @@ class ContextProjectFunctor {
...
@@ -123,17 +128,14 @@ class ContextProjectFunctor {
{
1
,
input_row_end
-
input_row_begin
,
{
1
,
input_row_end
-
input_row_begin
,
sequence_width
});
// input_channels, input_height, input_width
sequence_width
});
// input_channels, input_height, input_width
in_t
.
Resize
(
framework
::
make_ddim
(
input_shape
));
in_t
.
Resize
(
framework
::
make_ddim
(
input_shape
));
im2col_ocf
(
context
,
in_t
,
dilation
,
stride
,
padding
,
&
out_t
);
im2col_ocf
(
context
,
in_t
,
out_t
,
/*stride_height*/
context_stride
,
/*stride_width*/
1
,
up_pad
,
down_pad
,
0
,
0
);
out_t
.
Resize
({
sequence_height
,
context_length
*
sequence_width
});
out_t
.
Resize
({
sequence_height
,
context_length
*
sequence_width
});
}
}
}
}
if
(
padding_trainable
)
{
if
(
padding_trainable
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod_level_0
.
size
())
-
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod_level_0
.
size
())
-
1
;
++
i
)
{
Tensor
out_t
=
col
.
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
Tensor
out_t
=
col
->
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
...
@@ -196,14 +198,19 @@ class ContextProjectFunctor {
...
@@ -196,14 +198,19 @@ class ContextProjectFunctor {
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
ContextProjectGradFunctor
{
class
ContextProjectGradFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
LoDTensor
&
in
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
LoDTensor
&
in
,
Tensor
&
padding_data
,
Tensor
&
col
,
bool
padding_trainable
,
bool
padding_trainable
,
const
int
context_start
,
int
context_start
,
int
context_length
,
int
context_stride
,
const
int
context_length
,
const
int
context_stride
,
int
up_pad
,
int
down_pad
,
bool
input_grad
,
bool
pad_grad
)
{
const
int
up_pad
,
const
int
down_pad
,
bool
pad_grad
,
bool
input_grad
,
Tensor
*
padding_data
,
Tensor
*
col
)
{
auto
lod_level_0
=
in
.
lod
()[
0
];
auto
lod_level_0
=
in
.
lod
()[
0
];
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kOCF
,
Place
,
float
>
col2im_ocf
;
math
::
Col2ImFunctor
<
math
::
ColFormat
::
kOCF
,
Place
,
float
>
col2im_ocf
;
std
::
vector
<
int
>
dilation
({
1
,
1
});
std
::
vector
<
int
>
padding
({
up_pad
,
0
,
down_pad
,
0
});
std
::
vector
<
int
>
stride
({
context_stride
,
1
});
int
input_row_begin
,
input_row_end
;
int
input_row_begin
,
input_row_end
;
int
sequence_height
,
sequence_width
;
int
sequence_height
,
sequence_width
;
sequence_width
=
in
.
dims
()[
1
];
sequence_width
=
in
.
dims
()[
1
];
...
@@ -215,8 +222,8 @@ class ContextProjectGradFunctor {
...
@@ -215,8 +222,8 @@ class ContextProjectGradFunctor {
:
static_cast
<
int
>
(
lod_level_0
[
i
]);
:
static_cast
<
int
>
(
lod_level_0
[
i
]);
input_row_end
=
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]);
input_row_end
=
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]);
Tensor
out_t
=
col
.
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
Tensor
out_t
=
col
->
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
...
@@ -234,9 +241,7 @@ class ContextProjectGradFunctor {
...
@@ -234,9 +241,7 @@ class ContextProjectGradFunctor {
sequence_width
});
// input_channels, input_height, input_width
sequence_width
});
// input_channels, input_height, input_width
in_t
.
Resize
(
framework
::
make_ddim
(
input_shape
));
in_t
.
Resize
(
framework
::
make_ddim
(
input_shape
));
col2im_ocf
(
context
,
in_t
,
out_t
,
col2im_ocf
(
context
,
out_t
,
dilation
,
stride
,
padding
,
&
in_t
);
/*stride_height*/
context_stride
,
/*stride_width*/
1
,
up_pad
,
down_pad
,
0
,
0
);
out_t
.
Resize
({
sequence_height
,
context_length
*
sequence_width
});
out_t
.
Resize
({
sequence_height
,
context_length
*
sequence_width
});
}
}
}
}
...
@@ -244,8 +249,8 @@ class ContextProjectGradFunctor {
...
@@ -244,8 +249,8 @@ class ContextProjectGradFunctor {
if
(
pad_grad
)
{
if
(
pad_grad
)
{
if
(
padding_trainable
)
{
if
(
padding_trainable
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod_level_0
.
size
())
-
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod_level_0
.
size
())
-
1
;
++
i
)
{
Tensor
out_t
=
col
.
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
Tensor
out_t
=
col
->
Slice
(
static_cast
<
int
>
(
lod_level_0
[
i
]),
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
static_cast
<
int
>
(
lod_level_0
[
i
+
1
]));
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
sequence_height
=
static_cast
<
int
>
(
out_t
.
dims
()[
0
]);
out_t
.
Resize
({
sequence_height
*
context_length
,
sequence_width
});
out_t
.
Resize
({
sequence_height
*
context_length
,
sequence_width
});
...
@@ -259,7 +264,7 @@ class ContextProjectGradFunctor {
...
@@ -259,7 +264,7 @@ class ContextProjectGradFunctor {
k
+
context_length
<
up_pad
?
context_length
:
up_pad
-
k
;
k
+
context_length
<
up_pad
?
context_length
:
up_pad
-
k
;
Tensor
out_t_sub
=
out_t
.
Slice
(
k
*
context_length
,
Tensor
out_t_sub
=
out_t
.
Slice
(
k
*
context_length
,
k
*
context_length
+
padding_size
);
k
*
context_length
+
padding_size
);
Tensor
w_sub
=
padding_data
.
Slice
(
k
,
k
+
padding_size
);
Tensor
w_sub
=
padding_data
->
Slice
(
k
,
k
+
padding_size
);
auto
out_t_sub_e
=
EigenMatrix
<
T
>::
From
(
out_t_sub
);
auto
out_t_sub_e
=
EigenMatrix
<
T
>::
From
(
out_t_sub
);
auto
w_sub_e
=
EigenMatrix
<
T
>::
From
(
w_sub
);
auto
w_sub_e
=
EigenMatrix
<
T
>::
From
(
w_sub
);
w_sub_e
.
device
(
*
context
.
GetEigenDevice
<
Place
>
())
=
w_sub_e
.
device
(
*
context
.
GetEigenDevice
<
Place
>
())
=
...
@@ -292,7 +297,7 @@ class ContextProjectGradFunctor {
...
@@ -292,7 +297,7 @@ class ContextProjectGradFunctor {
Tensor
out_t_sub
=
out_t
.
Slice
(
Tensor
out_t_sub
=
out_t
.
Slice
(
(
down_pad_begin_row
+
t
)
*
context_length
-
padding_size
,
(
down_pad_begin_row
+
t
)
*
context_length
-
padding_size
,
(
down_pad_begin_row
+
t
)
*
context_length
);
(
down_pad_begin_row
+
t
)
*
context_length
);
Tensor
w_sub
=
padding_data
.
Slice
(
Tensor
w_sub
=
padding_data
->
Slice
(
up_pad
+
padding_idx
,
up_pad
+
padding_idx
+
padding_size
);
up_pad
+
padding_idx
,
up_pad
+
padding_idx
+
padding_size
);
auto
out_t_sub_e
=
EigenMatrix
<
T
>::
From
(
out_t_sub
);
auto
out_t_sub_e
=
EigenMatrix
<
T
>::
From
(
out_t_sub
);
auto
w_sub_e
=
EigenMatrix
<
T
>::
From
(
w_sub
);
auto
w_sub_e
=
EigenMatrix
<
T
>::
From
(
w_sub
);
...
...
paddle/operators/math/im2col.cc
浏览文件 @
4fc9f55e
...
@@ -28,57 +28,55 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
...
@@ -28,57 +28,55 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
platform
::
CPUPlace
,
T
>
{
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
const
std
::
vector
<
int
>&
stride
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
5
);
int
i
nput
_channels
=
im
.
dims
()[
0
];
int
i
m
_channels
=
im
.
dims
()[
0
];
int
i
nput
_height
=
im
.
dims
()[
1
];
int
i
m
_height
=
im
.
dims
()[
1
];
int
i
nput
_width
=
im
.
dims
()[
2
];
int
i
m
_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
1
];
int
filter_height
=
col
->
dims
()[
1
];
int
filter_width
=
col
.
dims
()[
2
];
int
filter_width
=
col
->
dims
()[
2
];
int
output_height
=
col
.
dims
()[
3
];
int
col_height
=
col
->
dims
()[
3
];
int
output_width
=
col
.
dims
()[
4
];
int
col_width
=
col
->
dims
()[
4
];
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
(
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
((
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
)
/
stride_height
+
stride
[
0
]
+
1
,
1
,
output
_height
,
col
_height
,
"Output_height and padding(padding_up, padding_down) are "
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
"inconsistent."
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
(
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
((
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
)
/
stride_width
+
stride
[
1
]
+
1
,
1
,
output
_width
,
col
_width
,
"output_width and padding(padding_left, padding_right
) are "
"Output_height and padding(padding_up, padding_down
) are "
"inconsistent."
);
"inconsistent."
);
int
channels_col
=
i
nput
_channels
*
filter_height
*
filter_width
;
int
channels_col
=
i
m
_channels
*
filter_height
*
filter_width
;
const
T
*
im_data
=
im
.
data
<
T
>
();
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
int
w_offset
=
c
%
filter_width
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
int
c_im
=
c
/
filter_width
/
filter_height
;
int
c_im
=
c
/
filter_width
/
filter_height
;
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
for
(
int
h
=
0
;
h
<
col_height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
for
(
int
w
=
0
;
w
<
col_width
;
++
w
)
{
int
im_row_idx
=
h
*
stride_height
+
h_offset
-
padding_up
;
int
im_row_idx
=
h
*
stride
[
0
]
-
padding
[
0
]
+
h_offset
*
dilation
[
0
];
int
im_col_idx
=
w
*
stride_width
+
w_offset
-
padding_left
;
int
im_col_idx
=
w
*
stride
[
1
]
-
padding
[
1
]
+
w_offset
*
dilation
[
1
];
int
col_idx
=
(
c
*
col_height
+
h
)
*
col_width
+
w
;
int
im_idx
=
(
im_row_idx
+
c_im
*
im_height
)
*
im_width
+
im_col_idx
;
if
(
im_row_idx
<
0
||
im_row_idx
>=
input_height
||
im_col_idx
<
0
||
col_data
[
col_idx
]
=
(
im_row_idx
<
0
||
im_row_idx
>=
im_height
||
im_col_idx
>=
input_width
)
{
im_col_idx
<
0
||
im_col_idx
>=
im_width
)
col_data
[(
c
*
output_height
+
h
)
*
output_width
+
w
]
=
T
(
0
);
?
static_cast
<
T
>
(
0
)
}
else
{
:
im_data
[
im_idx
];
im_row_idx
+=
c_im
*
input_height
;
col_data
[(
c
*
output_height
+
h
)
*
output_width
+
w
]
=
im_data
[
im_row_idx
*
input_width
+
im_col_idx
];
}
}
}
}
}
}
}
...
@@ -94,54 +92,55 @@ template <class T>
...
@@ -94,54 +92,55 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
CPUPlace
,
T
>
{
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
const
framework
::
Tensor
&
col
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
const
std
::
vector
<
int
>&
dilation
,
int
padding_left
,
int
padding_right
)
{
const
std
::
vector
<
int
>&
stride
,
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
PADDLE_ENFORCE
(
im
->
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
i
nput_channels
=
im
.
dims
()[
0
];
int
i
m_channels
=
im
->
dims
()[
0
];
int
i
nput_height
=
im
.
dims
()[
1
];
int
i
m_height
=
im
->
dims
()[
1
];
int
i
nput_width
=
im
.
dims
()[
2
];
int
i
m_width
=
im
->
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
1
];
int
filter_width
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
2
];
int
output
_height
=
col
.
dims
()[
3
];
int
col
_height
=
col
.
dims
()[
3
];
int
output
_width
=
col
.
dims
()[
4
];
int
col
_width
=
col
.
dims
()[
4
];
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
(
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
((
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
)
/
stride_height
+
stride
[
0
]
+
1
,
1
,
output
_height
,
col
_height
,
"Output_height and padding(padding_up, padding_down) are "
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
"inconsistent."
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
(
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
((
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
)
/
stride_width
+
stride
[
1
]
+
1
,
1
,
output
_width
,
col
_width
,
"output_width and padding(padding_left, padding_right
) are "
"Output_height and padding(padding_up, padding_down
) are "
"inconsistent."
);
"inconsistent."
);
int
channels_col
=
i
nput
_channels
*
filter_height
*
filter_width
;
int
channels_col
=
i
m
_channels
*
filter_height
*
filter_width
;
T
*
im_data
=
im
.
data
<
T
>
();
T
*
im_data
=
im
->
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
int
w_offset
=
c
%
filter_width
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
int
c_im
=
c
/
filter_width
/
filter_height
;
int
c_im
=
c
/
filter_width
/
filter_height
;
for
(
int
h
=
0
;
h
<
output
_height
;
++
h
)
{
for
(
int
h
=
0
;
h
<
col
_height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
output
_width
;
++
w
)
{
for
(
int
w
=
0
;
w
<
col
_width
;
++
w
)
{
int
im_row_idx
=
h
*
stride
_height
+
h_offset
-
padding_up
;
int
im_row_idx
=
h
*
stride
[
0
]
-
padding
[
0
]
+
h_offset
*
dilation
[
0
]
;
int
im_col_idx
=
w
*
stride
_width
+
w_offset
-
padding_left
;
int
im_col_idx
=
w
*
stride
[
1
]
-
padding
[
1
]
+
w_offset
*
dilation
[
1
]
;
if
((
im_row_idx
)
>=
0
&&
(
im_row_idx
)
<
i
nput
_height
&&
if
((
im_row_idx
)
>=
0
&&
(
im_row_idx
)
<
i
m
_height
&&
(
im_col_idx
)
>=
0
&&
(
im_col_idx
)
<
i
nput
_width
)
{
(
im_col_idx
)
>=
0
&&
(
im_col_idx
)
<
i
m
_width
)
{
im_row_idx
+=
c_im
*
i
nput
_height
;
im_row_idx
+=
c_im
*
i
m
_height
;
im_data
[
im_row_idx
*
i
nput
_width
+
im_col_idx
]
+=
im_data
[
im_row_idx
*
i
m
_width
+
im_col_idx
]
+=
col_data
[(
c
*
output_height
+
h
)
*
output
_width
+
w
];
col_data
[(
c
*
col_height
+
h
)
*
col
_width
+
w
];
}
}
}
}
}
}
...
@@ -168,64 +167,59 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
...
@@ -168,64 +167,59 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
platform
::
CPUPlace
,
T
>
{
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
const
std
::
vector
<
int
>&
stride
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
5
);
int
i
nput
_channels
=
im
.
dims
()[
0
];
int
i
m
_channels
=
im
.
dims
()[
0
];
int
i
nput
_height
=
im
.
dims
()[
1
];
int
i
m
_height
=
im
.
dims
()[
1
];
int
i
nput
_width
=
im
.
dims
()[
2
];
int
i
m
_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
3
];
int
filter_height
=
col
->
dims
()[
3
];
int
filter_width
=
col
.
dims
()[
4
];
int
filter_width
=
col
->
dims
()[
4
];
int
output_height
=
col
.
dims
()[
0
];
int
col_height
=
col
->
dims
()[
0
];
int
output_width
=
col
.
dims
()[
1
];
int
col_width
=
col
->
dims
()[
1
];
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
(
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
filter_height
)
/
stride
[
0
]
+
1
,
stride_height
+
col_height
,
1
,
output_height
,
"Output_height and padding(padding_up, padding_down) are "
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
"inconsistent."
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
(
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
(
im_width
+
padding
[
1
]
+
padding
[
3
]
-
filter_width
)
/
stride
[
1
]
+
1
,
stride_width
+
col_width
,
1
,
"col_width and padding(padding_left, padding_right) are "
output_width
,
"output_width and padding(padding_left, padding_right) are "
"inconsistent."
);
"inconsistent."
);
const
T
*
im_data
=
im
.
data
<
T
>
();
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
for
(
int
col_row_idx
=
0
;
col_row_idx
<
output
_height
;
++
col_row_idx
)
{
for
(
int
col_row_idx
=
0
;
col_row_idx
<
col
_height
;
++
col_row_idx
)
{
for
(
int
col_col_idx
=
0
;
col_col_idx
<
output
_width
;
++
col_col_idx
)
{
for
(
int
col_col_idx
=
0
;
col_col_idx
<
col
_width
;
++
col_col_idx
)
{
for
(
int
channel
=
0
;
channel
<
i
nput
_channels
;
++
channel
)
{
for
(
int
channel
=
0
;
channel
<
i
m
_channels
;
++
channel
)
{
for
(
int
filter_row_idx
=
0
;
filter_row_idx
<
filter_height
;
for
(
int
filter_row_idx
=
0
;
filter_row_idx
<
filter_height
;
++
filter_row_idx
)
{
++
filter_row_idx
)
{
for
(
int
filter_col_idx
=
0
;
filter_col_idx
<
filter_width
;
for
(
int
filter_col_idx
=
0
;
filter_col_idx
<
filter_width
;
++
filter_col_idx
)
{
++
filter_col_idx
)
{
int
im_row_offset
=
int
im_row_offset
=
col_row_idx
*
stride
_height
+
filter_row_idx
-
padding_up
;
col_row_idx
*
stride
[
0
]
+
filter_row_idx
-
padding
[
0
]
;
int
im_col_offset
=
int
im_col_offset
=
col_col_idx
*
stride_width
+
filter_col_idx
-
padding_left
;
col_col_idx
*
stride
[
1
]
+
filter_col_idx
-
padding
[
1
];
int
col_offset
=
((((
col_row_idx
)
*
output_width
+
col_col_idx
)
*
int
col_offset
=
input_channels
+
((((
col_row_idx
)
*
col_width
+
col_col_idx
)
*
im_channels
+
channel
)
*
channel
)
*
filter_height
+
filter_height
+
filter_row_idx
)
*
filter_row_idx
)
*
filter_width
+
filter_width
+
filter_col_idx
;
filter_col_idx
;
if
(
im_row_offset
<
0
||
im_row_offset
>=
input_height
||
im_col_offset
<
0
||
im_col_offset
>=
input_width
)
{
int
im_offset
=
(
channel
*
im_height
+
im_row_offset
)
*
im_width
+
col_data
[
col_offset
]
=
T
(
0
);
im_col_offset
;
}
else
{
col_data
[
col_offset
]
=
int
im_offset
=
(
im_row_offset
<
0
||
im_row_offset
>=
im_height
||
(
channel
*
input_height
+
im_row_offset
)
*
input_width
+
im_col_offset
<
0
||
im_col_offset
>=
im_width
)
im_col_offset
;
?
static_cast
<
T
>
(
0
)
col_data
[
col_offset
]
=
im_data
[
im_offset
];
:
im_data
[
im_offset
];
}
}
}
}
}
}
}
...
@@ -243,60 +237,57 @@ template <class T>
...
@@ -243,60 +237,57 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
CPUPlace
,
T
>
{
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
const
framework
::
Tensor
&
col
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
const
std
::
vector
<
int
>&
dilation
,
int
padding_left
,
int
padding_right
)
{
const
std
::
vector
<
int
>&
stride
,
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
PADDLE_ENFORCE
(
im
->
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
i
nput_channels
=
im
.
dims
()[
0
];
int
i
m_channels
=
im
->
dims
()[
0
];
int
i
nput_height
=
im
.
dims
()[
1
];
int
i
m_height
=
im
->
dims
()[
1
];
int
i
nput_width
=
im
.
dims
()[
2
];
int
i
m_width
=
im
->
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
3
];
int
filter_height
=
col
.
dims
()[
3
];
int
filter_width
=
col
.
dims
()[
4
];
int
filter_width
=
col
.
dims
()[
4
];
int
output
_height
=
col
.
dims
()[
0
];
int
col
_height
=
col
.
dims
()[
0
];
int
output
_width
=
col
.
dims
()[
1
];
int
col
_width
=
col
.
dims
()[
1
];
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
(
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
filter_height
)
/
stride
[
0
]
+
1
,
stride_height
+
col_height
,
1
,
output_height
,
"Output_height and padding(padding_up, padding_down) are "
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
"inconsistent."
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
(
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
(
im_width
+
padding
[
1
]
+
padding
[
3
]
-
filter_width
)
/
stride
[
1
]
+
1
,
stride_width
+
col_width
,
1
,
"col_width and padding(padding_left, padding_right) are "
output_width
,
"output_width and padding(padding_left, padding_right) are "
"inconsistent."
);
"inconsistent."
);
T
*
im_data
=
im
.
data
<
T
>
();
T
*
im_data
=
im
->
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
col_row_idx
=
0
;
col_row_idx
<
output
_height
;
++
col_row_idx
)
{
for
(
int
col_row_idx
=
0
;
col_row_idx
<
col
_height
;
++
col_row_idx
)
{
for
(
int
col_col_idx
=
0
;
col_col_idx
<
output
_width
;
++
col_col_idx
)
{
for
(
int
col_col_idx
=
0
;
col_col_idx
<
col
_width
;
++
col_col_idx
)
{
for
(
int
channel
=
0
;
channel
<
i
nput
_channels
;
++
channel
)
{
for
(
int
channel
=
0
;
channel
<
i
m
_channels
;
++
channel
)
{
for
(
int
filter_row_idx
=
0
;
filter_row_idx
<
filter_height
;
for
(
int
filter_row_idx
=
0
;
filter_row_idx
<
filter_height
;
++
filter_row_idx
)
{
++
filter_row_idx
)
{
for
(
int
filter_col_idx
=
0
;
filter_col_idx
<
filter_width
;
for
(
int
filter_col_idx
=
0
;
filter_col_idx
<
filter_width
;
++
filter_col_idx
)
{
++
filter_col_idx
)
{
int
im_row_offset
=
int
im_row_offset
=
col_row_idx
*
stride
_height
+
filter_row_idx
-
padding_up
;
col_row_idx
*
stride
[
0
]
+
filter_row_idx
-
padding
[
0
]
;
int
im_col_offset
=
int
im_col_offset
=
col_col_idx
*
stride
_width
+
filter_col_idx
-
padding_left
;
col_col_idx
*
stride
[
1
]
+
filter_col_idx
-
padding
[
1
]
;
int
col_offset
=
(((
col_row_idx
*
output_width
+
col_col_idx
)
*
int
col_offset
=
input
_channels
+
(((
col_row_idx
*
col_width
+
col_col_idx
)
*
im
_channels
+
channel
)
*
channel
)
*
filter_height
+
filter_height
+
filter_row_idx
)
*
filter_row_idx
)
*
filter_width
+
filter_width
+
filter_col_idx
;
filter_col_idx
;
if
(
im_row_offset
>=
0
&&
im_row_offset
<
i
nput
_height
&&
if
(
im_row_offset
>=
0
&&
im_row_offset
<
i
m
_height
&&
im_col_offset
>=
0
&&
im_col_offset
<
i
nput
_width
)
{
im_col_offset
>=
0
&&
im_col_offset
<
i
m
_width
)
{
int
im_offset
=
int
im_offset
=
(
channel
*
i
nput_height
+
im_row_offset
)
*
input
_width
+
(
channel
*
i
m_height
+
im_row_offset
)
*
im
_width
+
im_col_offset
;
im_col_offset
;
im_data
[
im_offset
]
+=
col_data
[
col_offset
];
im_data
[
im_offset
]
+=
col_data
[
col_offset
];
}
}
...
...
paddle/operators/math/im2col.cu
浏览文件 @
4fc9f55e
...
@@ -20,36 +20,32 @@ namespace operators {
...
@@ -20,36 +20,32 @@ namespace operators {
namespace
math
{
namespace
math
{
template
<
class
T
>
template
<
class
T
>
__global__
void
im2col
(
const
T
*
data_im
,
int
num_outs
,
int
height
,
int
width
,
__global__
void
im2col
(
const
T
*
data_im
,
int
num_outs
,
int
im_height
,
int
im_width
,
int
dilation_h
,
int
dilation_w
,
int
filter_height
,
int
filter_width
,
int
stride_height
,
int
filter_height
,
int
filter_width
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
int
output_height
,
int
output_width
,
T
*
data_col
)
{
int
col_height
,
int
col_width
,
T
*
data_col
)
{
int
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
const
int
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
num_outs
)
{
if
(
index
<
num_outs
)
{
int
w_out
=
index
%
output_width
;
int
w_out
=
index
%
col_width
;
index
/=
output_width
;
int
h_out
=
(
index
/
col_width
)
%
col_height
;
int
h_out
=
index
%
output_height
;
int
channel_in
=
index
/
col_width
/
col_height
;
int
channel_in
=
index
/
output_height
;
int
channel_out
=
channel_in
*
filter_height
*
filter_width
;
int
channel_out
=
channel_in
*
filter_height
*
filter_width
;
int
h_in
=
h_out
*
stride_height
;
int
h_in
=
h_out
*
stride_height
-
padding_height
;
int
w_in
=
w_out
*
stride_width
;
int
w_in
=
w_out
*
stride_width
-
padding_width
;
data_col
+=
(
channel_out
*
output_height
+
h_out
)
*
output_width
+
w_out
;
data_col
+=
(
channel_out
*
col_height
+
h_out
)
*
col_width
+
w_out
;
data_im
+=
(
channel_in
*
im_height
+
h_in
)
*
im_width
+
w_in
;
for
(
int
i
=
0
;
i
<
filter_height
;
++
i
)
{
for
(
int
i
=
0
;
i
<
filter_height
;
++
i
)
{
for
(
int
j
=
0
;
j
<
filter_width
;
++
j
)
{
for
(
int
j
=
0
;
j
<
filter_width
;
++
j
)
{
int
rIdx
=
int
(
h_in
+
i
);
int
rIdx
=
h_in
+
i
*
dilation_h
;
int
cIdx
=
int
(
w_in
+
j
);
int
cIdx
=
w_in
+
j
*
dilation_w
;
if
((
rIdx
-
(
int
)
padding_height
)
>=
(
int
)
height
||
*
data_col
=
(
rIdx
-
(
int
)
padding_height
)
<
0
||
(
rIdx
>=
im_height
||
rIdx
<
0
||
cIdx
>=
im_width
||
cIdx
<
0
)
(
cIdx
-
(
int
)
padding_width
)
>=
(
int
)
width
||
?
0
(
cIdx
-
(
int
)
padding_width
)
<
0
)
{
:
data_im
[
i
*
dilation_h
*
im_width
+
j
*
dilation_w
];
*
data_col
=
0
;
data_col
+=
col_height
*
col_width
;
}
else
{
rIdx
=
rIdx
+
channel_in
*
height
-
padding_height
;
cIdx
=
cIdx
-
padding_width
;
*
data_col
=
data_im
[
rIdx
*
width
+
cIdx
];
}
data_col
+=
output_height
*
output_width
;
}
}
}
}
}
}
...
@@ -65,30 +61,36 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
...
@@ -65,30 +61,36 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
platform
::
GPUPlace
,
T
>
{
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
const
std
::
vector
<
int
>&
stride
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
5
);
int
input_channels
=
im
.
dims
()[
0
];
int
im_channels
=
im
.
dims
()[
0
];
int
input_height
=
im
.
dims
()[
1
];
int
im_height
=
im
.
dims
()[
1
];
int
input_width
=
im
.
dims
()[
2
];
int
im_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
1
];
int
filter_height
=
col
->
dims
()[
1
];
int
filter_width
=
col
.
dims
()[
2
];
int
filter_width
=
col
->
dims
()[
2
];
int
output_height
=
col
.
dims
()[
3
];
int
col_height
=
col
->
dims
()[
3
];
int
output_width
=
col
.
dims
()[
4
];
int
col_width
=
col
->
dims
()[
4
];
PADDLE_ENFORCE
((
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
stride_height
+
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
1
==
stride
[
0
]
+
output_height
);
1
,
PADDLE_ENFORCE
((
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
col_height
,
stride_width
+
"Output_height and padding(padding_up, padding_down) are "
1
==
"inconsistent."
);
output_width
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
int
num_outputs
=
input_channels
*
output_height
*
output_width
;
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
int
num_outputs
=
im_channels
*
col_height
*
col_width
;
int
blocks
=
(
num_outputs
+
1024
-
1
)
/
1024
;
int
blocks
=
(
num_outputs
+
1024
-
1
)
/
1024
;
int
block_x
=
512
;
int
block_x
=
512
;
int
block_y
=
(
blocks
+
512
-
1
)
/
512
;
int
block_y
=
(
blocks
+
512
-
1
)
/
512
;
...
@@ -97,56 +99,57 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
...
@@ -97,56 +99,57 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
im2col
<
T
><<<
grid
,
threads
,
0
,
im2col
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
im
.
data
<
T
>
(),
num_outputs
,
i
nput_height
,
input_width
,
filter_height
,
im
.
data
<
T
>
(),
num_outputs
,
i
m_height
,
im_width
,
dilation
[
0
]
,
filter_width
,
stride_height
,
stride_width
,
padding_up
,
padding_left
,
dilation
[
1
],
filter_height
,
filter_width
,
stride
[
0
],
stride
[
1
]
,
output_height
,
output_width
,
col
.
data
<
T
>
());
padding
[
0
],
padding
[
1
],
col_height
,
col_width
,
col
->
data
<
T
>
());
}
}
};
};
template
<
class
T
>
template
<
class
T
>
__global__
void
col2im
(
size_t
n
,
const
T
*
data_col
,
size_t
height
,
size_t
width
,
__global__
void
col2im
(
int
n
,
const
T
*
data_col
,
int
im_height
,
int
im_width
,
size_t
channels
,
size_t
filter_height
,
int
dilation_h
,
int
dilation_w
,
int
filter_height
,
size_t
filter_width
,
size_t
stride_height
,
int
filter_width
,
int
stride_height
,
int
stride_width
,
size_t
stride_width
,
size_t
padding_height
,
int
padding_height
,
int
padding_width
,
int
col_height
,
size_t
padding_width
,
size_t
output_height
,
int
col_width
,
T
*
data_im
)
{
size_t
output_width
,
T
*
data_im
)
{
const
int
index
=
size_t
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
const
int
d_filter_height
=
dilation_h
*
(
filter_height
-
1
)
+
1
;
const
int
d_filter_width
=
dilation_w
*
(
filter_width
-
1
)
+
1
;
if
(
index
<
n
)
{
if
(
index
<
n
)
{
T
val
=
0
;
T
val
=
0
;
int
w
=
int
(
index
%
width
);
int
w
=
index
%
im_width
;
int
h
=
int
((
index
/
width
)
%
height
);
int
h
=
(
index
/
im_width
)
%
im_height
;
int
c
=
int
(
index
/
(
width
*
height
));
int
c
=
index
/
(
im_width
*
im_height
);
if
((
w
-
(
int
)
padding_width
)
>=
0
&&
(
w
-
(
int
)
padding_width
)
<
(
width
-
2
*
padding_width
)
&&
// compute the start and end of the output
(
h
-
(
int
)
padding_height
)
>=
0
&&
int
w_col_start
=
(
h
-
padding_height
)
<
(
height
-
2
*
padding_height
))
{
(
w
<
d_filter_width
)
?
0
:
(
w
-
d_filter_width
)
/
stride_width
+
1
;
// compute the start and end of the output
int
w_col_end
=
min
(
w
/
stride_width
+
1
,
col_width
);
int
w_col_start
=
(
w
<
(
int
)
filter_width
)
int
h_col_start
=
?
0
(
h
<
d_filter_height
)
?
0
:
(
h
-
d_filter_height
)
/
stride_height
+
1
;
:
(
w
-
int
(
filter_width
))
/
(
int
)
stride_width
+
1
;
int
h_col_end
=
min
(
h
/
stride_height
+
1
,
col_height
);
int
w_col_end
=
min
((
int
)(
w
/
(
int
)
stride_width
+
1
),
(
int
)(
output_width
));
for
(
int
h_col
=
h_col_start
;
h_col
<
h_col_end
;
++
h_col
)
{
int
h_col_start
=
(
h
<
(
int
)
filter_height
)
for
(
int
w_col
=
w_col_start
;
w_col
<
w_col_end
;
++
w_col
)
{
?
0
int
h_off
=
(
h
-
h_col
*
stride_height
);
:
(
h
-
(
int
)
filter_height
)
/
(
int
)
stride_height
+
1
;
int
w_off
=
(
w
-
w_col
*
stride_width
);
int
h_col_end
=
min
(
int
(
h
/
stride_height
+
1
),
int
(
output_height
));
if
(
h_off
%
dilation_h
==
0
&&
w_off
%
dilation_w
==
0
)
{
for
(
int
h_col
=
h_col_start
;
h_col
<
h_col_end
;
++
h_col
)
{
h_off
/=
dilation_h
;
for
(
int
w_col
=
w_col_start
;
w_col
<
w_col_end
;
++
w_col
)
{
w_off
/=
dilation_w
;
// the col location: [c * width * height + h_out, w_out]
int
data_col_index
=
int
c_col
=
int
(
c
*
filter_height
*
filter_width
)
+
(((
c
*
filter_height
+
h_off
)
*
filter_width
+
w_off
)
*
(
h
-
h_col
*
(
int
)
stride_height
)
*
(
int
)
filter_width
+
col_height
+
(
w
-
w_col
*
(
int
)
stride_width
);
h_col
)
*
val
+=
col_width
+
data_col
[(
c_col
*
output_height
+
h_col
)
*
output_width
+
w_col
];
w_col
;
val
+=
data_col
[
data_col_index
];
}
}
}
}
h
-=
padding_height
;
w
-=
padding_width
;
data_im
[
c
*
((
width
-
2
*
padding_width
)
*
(
height
-
2
*
padding_height
))
+
h
*
(
width
-
2
*
padding_width
)
+
w
]
+=
val
;
}
}
data_im
[
index
]
=
val
;
}
}
}
}
...
@@ -159,33 +162,38 @@ template <class T>
...
@@ -159,33 +162,38 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
GPUPlace
,
T
>
{
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
const
framework
::
Tensor
&
col
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
const
std
::
vector
<
int
>&
dilation
,
int
padding_left
,
int
padding_right
)
{
const
std
::
vector
<
int
>&
stride
,
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
PADDLE_ENFORCE
(
im
->
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
i
nput_channels
=
im
.
dims
()[
0
];
int
i
m_channels
=
im
->
dims
()[
0
];
int
i
nput_height
=
im
.
dims
()[
1
];
int
i
m_height
=
im
->
dims
()[
1
];
int
i
nput_width
=
im
.
dims
()[
2
];
int
i
m_width
=
im
->
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
1
];
int
filter_width
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
2
];
int
output_height
=
col
.
dims
()[
3
];
int
col_height
=
col
.
dims
()[
3
];
int
output_width
=
col
.
dims
()[
4
];
int
col_width
=
col
.
dims
()[
4
];
PADDLE_ENFORCE
((
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
stride_height
+
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
1
==
stride
[
0
]
+
output_height
);
1
,
PADDLE_ENFORCE
((
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
col_height
,
stride_width
+
"Output_height and padding(padding_up, padding_down) are "
1
==
"inconsistent."
);
output_width
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
size_t
num_kernels
=
input_channels
*
stride
[
1
]
+
(
input_height
+
padding_up
+
padding_down
)
*
1
,
(
input_width
+
padding_left
+
padding_right
);
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
size_t
num_kernels
=
im_channels
*
im_height
*
im_width
;
size_t
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
size_t
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
size_t
block_x
=
512
;
size_t
block_x
=
512
;
...
@@ -198,10 +206,9 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
...
@@ -198,10 +206,9 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
col2im
<
T
><<<
grid
,
threads
,
0
,
col2im
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
num_kernels
,
col
.
data
<
T
>
(),
input_height
+
padding_up
+
padding_down
,
num_kernels
,
col
.
data
<
T
>
(),
im_height
,
im_width
,
dilation
[
0
],
input_width
+
padding_left
+
padding_left
,
input_channels
,
dilation
[
1
],
filter_height
,
filter_width
,
stride
[
0
],
stride
[
1
],
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_up
,
padding
[
0
],
padding
[
2
],
col_height
,
col_width
,
im
->
data
<
T
>
());
padding_left
,
output_height
,
output_width
,
im
.
data
<
T
>
());
}
}
};
};
...
@@ -215,33 +222,32 @@ template class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
...
@@ -215,33 +222,32 @@ template class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
platform
::
GPUPlace
,
double
>;
platform
::
GPUPlace
,
double
>;
template
<
class
T
>
template
<
class
T
>
__global__
void
im2colOCF
(
const
T
*
im_data
,
T
*
col_data
,
int
input_channels
,
__global__
void
im2colOCF
(
const
T
*
im_data
,
int
im_channels
,
int
im_height
,
int
i
nput_height
,
int
input_width
,
int
filter_height
,
int
i
m_width
,
int
filter_height
,
int
filter_width
,
int
filter_width
,
int
stride_height
,
int
stride_width
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
int
padding_height
,
int
padding_width
,
int
col_height
,
int
output_height
,
int
output_width
)
{
int
col_width
,
T
*
col_data
)
{
int
swid
=
blockIdx
.
x
;
int
swid
=
blockIdx
.
x
;
int
shid
=
blockIdx
.
y
;
int
shid
=
blockIdx
.
y
;
for
(
int
channelid
=
threadIdx
.
z
;
channelid
<
i
nput
_channels
;
for
(
int
channelid
=
threadIdx
.
z
;
channelid
<
i
m
_channels
;
channelid
+=
blockDim
.
z
)
{
channelid
+=
blockDim
.
z
)
{
for
(
int
idy
=
threadIdx
.
y
;
idy
<
filter_height
;
idy
+=
blockDim
.
y
)
{
for
(
int
idy
=
threadIdx
.
y
;
idy
<
filter_height
;
idy
+=
blockDim
.
y
)
{
for
(
int
idx
=
threadIdx
.
x
;
idx
<
filter_width
;
idx
+=
blockDim
.
x
)
{
for
(
int
idx
=
threadIdx
.
x
;
idx
<
filter_width
;
idx
+=
blockDim
.
x
)
{
int
width_offset
=
idx
+
swid
*
stride_width
-
padding_width
;
int
width_offset
=
idx
+
swid
*
stride_width
-
padding_width
;
int
height_offset
=
idy
+
shid
*
stride_height
-
padding_height
;
int
height_offset
=
idy
+
shid
*
stride_height
-
padding_height
;
int
im_offset
=
width_offset
+
height_offset
*
i
nput
_width
+
int
im_offset
=
width_offset
+
height_offset
*
i
m
_width
+
channelid
*
i
nput_height
*
input
_width
;
channelid
*
i
m_height
*
im
_width
;
int
col_offset
=
idx
+
idy
*
filter_width
+
int
col_offset
=
idx
+
idy
*
filter_width
+
channelid
*
filter_height
*
filter_width
+
channelid
*
filter_height
*
filter_width
+
(
shid
*
output_width
+
swid
)
*
(
shid
*
col_width
+
swid
)
*
(
input_channels
*
filter_height
*
filter_width
);
(
im_channels
*
filter_height
*
filter_width
);
if
(
height_offset
>=
input_height
||
height_offset
<
0
||
col_data
[
col_offset
]
=
width_offset
>=
input_width
||
width_offset
<
0
)
{
(
height_offset
>=
im_height
||
height_offset
<
0
||
col_data
[
col_offset
]
=
T
(
0
);
width_offset
>=
im_width
||
width_offset
<
0
)
}
else
{
?
T
(
0
)
col_data
[
col_offset
]
=
im_data
[
im_offset
];
:
im_data
[
im_offset
];
}
}
}
}
}
}
}
...
@@ -257,27 +263,33 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
...
@@ -257,27 +263,33 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
platform
::
GPUPlace
,
T
>
{
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
const
std
::
vector
<
int
>&
stride
,
int
padding_down
,
int
padding_left
,
int
padding_right
)
{
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
5
);
int
input_channels
=
im
.
dims
()[
0
];
int
im_channels
=
im
.
dims
()[
0
];
int
input_height
=
im
.
dims
()[
1
];
int
im_height
=
im
.
dims
()[
1
];
int
input_width
=
im
.
dims
()[
2
];
int
im_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
3
];
int
filter_height
=
col
->
dims
()[
3
];
int
filter_width
=
col
.
dims
()[
4
];
int
filter_width
=
col
->
dims
()[
4
];
int
output_height
=
col
.
dims
()[
0
];
int
col_height
=
col
->
dims
()[
0
];
int
output_width
=
col
.
dims
()[
1
];
int
col_width
=
col
->
dims
()[
1
];
PADDLE_ENFORCE
((
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
stride_height
+
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
1
==
stride
[
0
]
+
output_height
);
1
,
PADDLE_ENFORCE
((
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
col_height
,
stride_width
+
"Output_height and padding(padding_up, padding_down) are "
1
==
"inconsistent."
);
output_width
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
int
block_dim_x
=
0
;
int
block_dim_x
=
0
;
int
block_dim_y
=
0
;
int
block_dim_y
=
0
;
...
@@ -296,42 +308,41 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
...
@@ -296,42 +308,41 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
}
}
int
block_dim_z
=
1024
/
block_dim_x
/
block_dim_y
;
int
block_dim_z
=
1024
/
block_dim_x
/
block_dim_y
;
dim3
threads
(
block_dim_x
,
block_dim_y
,
dim3
threads
(
block_dim_x
,
block_dim_y
,
std
::
min
(
block_dim_z
,
im_channels
));
std
::
min
(
block_dim_z
,
input_channels
));
dim3
grid
(
col_width
,
col_height
);
dim3
grid
(
output_width
,
output_height
);
im2colOCF
<
T
><<<
grid
,
threads
,
0
,
im2colOCF
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
im
.
data
<
T
>
(),
col
.
data
<
T
>
(),
input_channels
,
input_height
,
input_width
,
im
.
data
<
T
>
(),
im_channels
,
im_height
,
im_width
,
filter_height
,
filter_
height
,
filter_width
,
stride_height
,
stride_width
,
padding_up
,
filter_
width
,
stride
[
0
],
stride
[
1
],
padding
[
0
],
padding
[
1
],
col_height
,
padding_left
,
output_height
,
output_width
);
col_width
,
col
->
data
<
T
>
()
);
}
}
};
};
template
<
class
T
>
template
<
class
T
>
__global__
void
col2imOCF
(
T
*
im_data
,
const
T
*
col_data
,
int
input_channels
,
__global__
void
col2imOCF
(
const
T
*
col_data
,
int
im_channels
,
int
im_height
,
int
i
nput_height
,
int
input_width
,
int
filter_height
,
int
i
m_width
,
int
filter_height
,
int
filter_width
,
int
filter_width
,
int
stride_height
,
int
stride_width
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
int
padding_height
,
int
padding_width
,
int
col_height
,
int
output_height
,
int
output_width
)
{
int
col_width
,
T
*
im_data
)
{
int
swid
=
blockIdx
.
x
;
int
swid
=
blockIdx
.
x
;
int
shid
=
blockIdx
.
y
;
int
shid
=
blockIdx
.
y
;
for
(
int
channelid
=
threadIdx
.
z
;
channelid
<
i
nput
_channels
;
for
(
int
channelid
=
threadIdx
.
z
;
channelid
<
i
m
_channels
;
channelid
+=
blockDim
.
z
)
{
channelid
+=
blockDim
.
z
)
{
for
(
int
idy
=
threadIdx
.
y
;
idy
<
filter_height
;
idy
+=
blockDim
.
y
)
{
for
(
int
idy
=
threadIdx
.
y
;
idy
<
filter_height
;
idy
+=
blockDim
.
y
)
{
for
(
int
idx
=
threadIdx
.
x
;
idx
<
filter_width
;
idx
+=
blockDim
.
x
)
{
for
(
int
idx
=
threadIdx
.
x
;
idx
<
filter_width
;
idx
+=
blockDim
.
x
)
{
int
width_offset
=
idx
+
swid
*
stride_width
-
padding_width
;
int
width_offset
=
idx
+
swid
*
stride_width
-
padding_width
;
int
height_offset
=
idy
+
shid
*
stride_height
-
padding_height
;
int
height_offset
=
idy
+
shid
*
stride_height
-
padding_height
;
int
im_offset
=
width_offset
+
height_offset
*
i
nput
_width
+
int
im_offset
=
width_offset
+
height_offset
*
i
m
_width
+
channelid
*
i
nput_height
*
input
_width
;
channelid
*
i
m_height
*
im
_width
;
int
col_offset
=
idx
+
idy
*
filter_width
+
int
col_offset
=
idx
+
idy
*
filter_width
+
channelid
*
filter_height
*
filter_width
+
channelid
*
filter_height
*
filter_width
+
(
shid
*
output
_width
+
swid
)
*
(
shid
*
col
_width
+
swid
)
*
(
i
nput
_channels
*
filter_height
*
filter_width
);
(
i
m
_channels
*
filter_height
*
filter_width
);
if
(
height_offset
>=
0
&&
height_offset
<
i
nput
_height
&&
if
(
height_offset
>=
0
&&
height_offset
<
i
m
_height
&&
width_offset
>=
0
&&
width_offset
<
i
nput
_width
)
{
width_offset
>=
0
&&
width_offset
<
i
m
_width
)
{
paddle
::
platform
::
CudaAtomicAdd
(
im_data
+
im_offset
,
paddle
::
platform
::
CudaAtomicAdd
(
im_data
+
im_offset
,
col_data
[
col_offset
]);
col_data
[
col_offset
]);
}
}
...
@@ -349,28 +360,35 @@ template <class T>
...
@@ -349,28 +360,35 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
GPUPlace
,
T
>
{
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
const
framework
::
Tensor
&
col
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
const
std
::
vector
<
int
>&
dilation
,
int
padding_left
,
int
padding_right
)
{
const
std
::
vector
<
int
>&
stride
,
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
)
{
PADDLE_ENFORCE
(
im
->
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
i
nput_channels
=
im
.
dims
()[
0
];
int
i
m_channels
=
im
->
dims
()[
0
];
int
i
nput_height
=
im
.
dims
()[
1
];
int
i
m_height
=
im
->
dims
()[
1
];
int
i
nput_width
=
im
.
dims
()[
2
];
int
i
m_width
=
im
->
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
3
];
int
filter_height
=
col
.
dims
()[
3
];
int
filter_width
=
col
.
dims
()[
4
];
int
filter_width
=
col
.
dims
()[
4
];
int
output_height
=
col
.
dims
()[
0
];
int
col_height
=
col
.
dims
()[
0
];
int
output_width
=
col
.
dims
()[
1
];
int
col_width
=
col
.
dims
()[
1
];
PADDLE_ENFORCE
((
input_height
+
padding_up
+
padding_down
-
filter_height
)
/
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
stride_height
+
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
1
==
stride
[
0
]
+
output_height
);
1
,
PADDLE_ENFORCE
((
input_width
+
padding_left
+
padding_right
-
filter_width
)
/
col_height
,
stride_width
+
"Output_height and padding(padding_up, padding_down) are "
1
==
"inconsistent."
);
output_width
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
int
block_dim_x
=
0
;
int
block_dim_x
=
0
;
int
block_dim_y
=
0
;
int
block_dim_y
=
0
;
...
@@ -389,15 +407,14 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
...
@@ -389,15 +407,14 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
}
}
int
block_dim_z
=
1024
/
block_dim_x
/
block_dim_y
;
int
block_dim_z
=
1024
/
block_dim_x
/
block_dim_y
;
dim3
threads
(
block_dim_x
,
block_dim_y
,
dim3
threads
(
block_dim_x
,
block_dim_y
,
std
::
min
(
block_dim_z
,
im_channels
));
std
::
min
(
block_dim_z
,
input_channels
));
dim3
grid
(
col_width
,
col_height
);
dim3
grid
(
output_width
,
output_height
);
col2imOCF
<
T
><<<
grid
,
threads
,
0
,
col2imOCF
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
im
.
data
<
T
>
(),
col
.
data
<
T
>
(),
input_channels
,
input_height
,
input_width
,
col
.
data
<
T
>
(),
im_channels
,
im_height
,
im_width
,
filter_height
,
filter_
height
,
filter_width
,
stride_height
,
stride_width
,
padding_up
,
filter_
width
,
stride
[
0
],
stride
[
1
],
padding
[
0
],
padding
[
1
],
col_height
,
padding_left
,
output_height
,
output_width
);
col_width
,
im
->
data
<
T
>
()
);
}
}
};
};
...
...
paddle/operators/math/im2col.h
浏览文件 @
4fc9f55e
...
@@ -35,6 +35,15 @@ enum class ColFormat { kCFO = 0, kOCF = 1 };
...
@@ -35,6 +35,15 @@ enum class ColFormat { kCFO = 0, kOCF = 1 };
* \param colData Column data.
* \param colData Column data.
* \param colShape The shape of colData.
* \param colShape The shape of colData.
*
*
* \param dilations dilation data.
* \param 2-dimension [dilation_height, dilation_width].
*
* \param strides stride data.
* \param 2-dimension [stride_height, stride_width].
*
* \param paddings padding data.
* \param 4-dimension [up_pad, left_pad, down_pad, right_pad].
*
* If the template argument Format is kCFO, the shape of colData is:
* If the template argument Format is kCFO, the shape of colData is:
* [input_channels, filter_height, filter_width, output_height, output_width]
* [input_channels, filter_height, filter_width, output_height, output_width]
* So, it is easy to reshape into a convolution matrix for convolution
* So, it is easy to reshape into a convolution matrix for convolution
...
@@ -73,18 +82,19 @@ template <ColFormat Format, typename Place, typename T>
...
@@ -73,18 +82,19 @@ template <ColFormat Format, typename Place, typename T>
class
Im2ColFunctor
{
class
Im2ColFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>&
dilation
,
int
stride_height
,
int
stride_width
,
int
padding_up
,
const
std
::
vector
<
int
>&
stride
,
int
padding_down
,
int
padding_left
,
int
padding_right
);
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
col
);
};
};
template
<
ColFormat
Format
,
typename
Place
,
typename
T
>
template
<
ColFormat
Format
,
typename
Place
,
typename
T
>
class
Col2ImFunctor
{
class
Col2ImFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
const
framework
::
Tensor
&
col
,
int
stride_width
,
int
padding_up
,
int
padding_down
,
const
std
::
vector
<
int
>&
dilation
,
int
padding_left
,
int
padding_right
);
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
framework
::
Tensor
*
im
);
};
};
}
// namespace math
}
// namespace math
...
...
paddle/operators/math/im2col_test.cc
浏览文件 @
4fc9f55e
...
@@ -45,10 +45,14 @@ void testIm2col() {
...
@@ -45,10 +45,14 @@ void testIm2col() {
int
input_height
=
2
;
int
input_height
=
2
;
int
input_width
=
3
;
int
input_width
=
3
;
int
filter_size
=
2
;
int
filter_size
=
2
;
int
stride
=
1
;
std
::
vector
<
int
>
stride
({
1
,
1
});
// stride_y, stride_x
int
padding
=
0
;
std
::
vector
<
int
>
padding
(
int
output_height
=
(
input_height
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
{
0
,
0
,
0
,
0
});
// up_pad, left_pad, down_pad, right_pad
int
output_width
=
(
input_width
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
std
::
vector
<
int
>
dilation
({
1
,
1
});
// dilation_y, dilation_x
int
output_height
=
(
input_height
-
filter_size
+
padding
[
0
]
+
padding
[
1
])
/
stride
[
0
]
+
1
;
int
output_width
=
(
input_width
-
filter_size
+
padding
[
2
]
+
padding
[
3
])
/
stride
[
1
]
+
1
;
float
*
input_ptr
=
input_tmp
.
mutable_data
<
float
>
(
float
*
input_ptr
=
input_tmp
.
mutable_data
<
float
>
(
{
1
,
input_height
,
input_width
},
paddle
::
platform
::
CPUPlace
());
{
1
,
input_height
,
input_width
},
paddle
::
platform
::
CPUPlace
());
float
arr
[
6
]
=
{
0
,
1
,
2
,
3
,
4
,
5
};
float
arr
[
6
]
=
{
0
,
1
,
2
,
3
,
4
,
5
};
...
@@ -85,10 +89,8 @@ void testIm2col() {
...
@@ -85,10 +89,8 @@ void testIm2col() {
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
Place
,
float
>
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
Place
,
float
>
im2col_ocf
;
im2col_ocf
;
im2col
(
*
context
,
input
,
output_cfo
,
stride
,
stride
,
padding
,
padding
,
padding
,
im2col
(
*
context
,
input
,
dilation
,
stride
,
padding
,
&
output_cfo
);
padding
);
im2col_ocf
(
*
context
,
input
,
dilation
,
stride
,
padding
,
&
output_ocf
);
im2col_ocf
(
*
context
,
input
,
output_ocf
,
stride
,
stride
,
padding
,
padding
,
padding
,
padding
);
float
out_cfo_data
[]
=
{
0
,
1
,
1
,
2
,
3
,
4
,
4
,
5
};
float
out_cfo_data
[]
=
{
0
,
1
,
1
,
2
,
3
,
4
,
4
,
5
};
float
out_ocf_data
[]
=
{
0
,
1
,
3
,
4
,
1
,
2
,
4
,
5
};
float
out_ocf_data
[]
=
{
0
,
1
,
3
,
4
,
1
,
2
,
4
,
5
};
...
@@ -131,8 +133,7 @@ void testIm2col() {
...
@@ -131,8 +133,7 @@ void testIm2col() {
input
.
CopyFrom
(
input_tmp
,
*
place
,
*
context
);
input
.
CopyFrom
(
input_tmp
,
*
place
,
*
context
);
}
}
col2im
(
*
context
,
input
,
output_cfo
,
stride
,
stride
,
padding
,
padding
,
padding
,
col2im
(
*
context
,
output_cfo
,
dilation
,
stride
,
padding
,
&
input
);
padding
);
float
*
in_ptr
;
float
*
in_ptr
;
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
...
@@ -153,8 +154,7 @@ void testIm2col() {
...
@@ -153,8 +154,7 @@ void testIm2col() {
input
.
CopyFrom
(
input_tmp
,
*
place
,
*
context
);
input
.
CopyFrom
(
input_tmp
,
*
place
,
*
context
);
}
}
col2im_ocf
(
*
context
,
input
,
output_ocf
,
stride
,
stride
,
padding
,
padding
,
col2im_ocf
(
*
context
,
output_ocf
,
dilation
,
stride
,
padding
,
&
input
);
padding
,
padding
);
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
in_ptr
=
input
.
data
<
float
>
();
in_ptr
=
input
.
data
<
float
>
();
...
...
paddle/operators/math/vol2col.cc
浏览文件 @
4fc9f55e
...
@@ -28,28 +28,51 @@ template <class T>
...
@@ -28,28 +28,51 @@ template <class T>
class
Vol2ColFunctor
<
platform
::
CPUPlace
,
T
>
{
class
Vol2ColFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
vol
,
framework
::
Tensor
&
col
,
const
framework
::
Tensor
&
vol
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
const
std
::
vector
<
int
>&
dilations
,
int
padding_depth
,
int
padding_height
,
const
std
::
vector
<
int
>&
strides
,
int
padding_width
)
const
{
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
col
)
const
{
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
input_width
=
vol
.
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_depth
=
col
->
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_height
=
col
->
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
int
filter_width
=
col
->
dims
()[
3
];
int
output_depth
=
col
.
dims
()[
4
];
int
output_depth
=
col
->
dims
()[
4
];
int
output_height
=
col
.
dims
()[
5
];
int
output_height
=
col
->
dims
()[
5
];
int
output_width
=
col
.
dims
()[
6
];
int
output_width
=
col
->
dims
()[
6
];
int
channels_col
=
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
PADDLE_ENFORCE_EQ
((
input_depth
+
2
*
paddings
[
0
]
-
((
dilations
[
0
]
*
(
filter_depth
-
1
)
+
1
)))
/
strides
[
0
]
+
1
,
output_depth
,
"input_depth and output_depth are "
"mismatching."
);
PADDLE_ENFORCE_EQ
((
input_height
+
2
*
paddings
[
1
]
-
((
dilations
[
1
]
*
(
filter_height
-
1
)
+
1
)))
/
strides
[
1
]
+
1
,
output_height
,
"input_height and output_height are "
"mismatching."
);
PADDLE_ENFORCE_EQ
((
input_width
+
2
*
paddings
[
2
]
-
((
dilations
[
2
]
*
(
filter_width
-
1
)
+
1
)))
/
strides
[
2
]
+
1
,
output_width
,
"input_width and output_width are "
"mismatching."
);
const
T
*
vol_data
=
vol
.
data
<
T
>
();
const
T
*
vol_data
=
vol
.
data
<
T
>
();
T
*
col_data
=
col
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
int
w_offset
=
c
%
filter_width
;
...
@@ -57,24 +80,23 @@ class Vol2ColFunctor<platform::CPUPlace, T> {
...
@@ -57,24 +80,23 @@ class Vol2ColFunctor<platform::CPUPlace, T> {
int
d_offset
=
(
c
/
filter_width
/
filter_height
)
%
filter_depth
;
int
d_offset
=
(
c
/
filter_width
/
filter_height
)
%
filter_depth
;
int
c_in
=
c
/
filter_width
/
filter_height
/
filter_depth
;
int
c_in
=
c
/
filter_width
/
filter_height
/
filter_depth
;
for
(
int
d
=
0
;
d
<
output_depth
;
++
d
)
{
for
(
int
d
=
0
;
d
<
output_depth
;
++
d
)
{
int
d_pad
=
d
*
stride
_depth
-
padding_depth
+
d_offset
;
int
d_pad
=
d
*
stride
s
[
0
]
-
paddings
[
0
]
+
d_offset
*
dilations
[
0
]
;
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
int
h_pad
=
h
*
stride
_height
-
padding_height
+
h_offset
;
int
h_pad
=
h
*
stride
s
[
1
]
-
paddings
[
1
]
+
h_offset
*
dilations
[
1
]
;
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
int
w_pad
=
w
*
stride
_width
-
padding_width
+
w_offset
;
int
w_pad
=
w
*
stride
s
[
2
]
-
paddings
[
2
]
+
w_offset
*
dilations
[
2
]
;
int
col_idx
=
int
col_idx
=
((
c
*
output_depth
+
d
)
*
output_height
+
h
)
*
output_width
+
w
;
((
c
*
output_depth
+
d
)
*
output_height
+
h
)
*
output_width
+
w
;
if
(
h_pad
<
0
||
h_pad
>=
input_height
||
w_pad
<
0
||
int
vol_idx
=
w_pad
>=
input_width
||
d_pad
<
0
||
d_pad
>=
input_depth
)
{
((
c_in
*
input_depth
+
d_pad
)
*
input_height
+
h_pad
)
*
col_data
[
col_idx
]
=
static_cast
<
T
>
(
0
);
input_width
+
}
else
{
w_pad
;
int
vol_idx
=
col_data
[
col_idx
]
=
((
c_in
*
input_depth
+
d_pad
)
*
input_height
+
h_pad
)
*
(
h_pad
<
0
||
h_pad
>=
input_height
||
w_pad
<
0
||
input_width
+
w_pad
>=
input_width
||
d_pad
<
0
||
d_pad
>=
input_depth
)
w_pad
;
?
static_cast
<
T
>
(
0
)
col_data
[
col_idx
]
=
vol_data
[
vol_idx
];
:
vol_data
[
vol_idx
];
}
}
}
}
}
}
}
...
@@ -92,17 +114,18 @@ template <class T>
...
@@ -92,17 +114,18 @@ template <class T>
class
Col2VolFunctor
<
platform
::
CPUPlace
,
T
>
{
class
Col2VolFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
vol
,
const
framework
::
Tensor
&
col
,
const
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
const
std
::
vector
<
int
>&
dilations
,
int
padding_depth
,
int
padding_height
,
const
std
::
vector
<
int
>&
strides
,
int
padding_width
)
const
{
const
std
::
vector
<
int
>&
paddings
,
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
framework
::
Tensor
*
vol
)
const
{
PADDLE_ENFORCE
(
vol
->
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_channels
=
vol
->
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_depth
=
vol
->
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_height
=
vol
->
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
input_width
=
vol
->
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
int
filter_width
=
col
.
dims
()[
3
];
...
@@ -112,7 +135,28 @@ class Col2VolFunctor<platform::CPUPlace, T> {
...
@@ -112,7 +135,28 @@ class Col2VolFunctor<platform::CPUPlace, T> {
int
channels_col
=
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
T
*
vol_data
=
vol
.
data
<
T
>
();
PADDLE_ENFORCE_EQ
((
input_depth
+
2
*
paddings
[
0
]
-
((
dilations
[
0
]
*
(
filter_depth
-
1
)
+
1
)))
/
strides
[
0
]
+
1
,
output_depth
,
"input_depth and output_depth are "
"mismatching."
);
PADDLE_ENFORCE_EQ
((
input_height
+
2
*
paddings
[
1
]
-
((
dilations
[
1
]
*
(
filter_height
-
1
)
+
1
)))
/
strides
[
1
]
+
1
,
output_height
,
"input_height and output_height are "
"mismatching."
);
PADDLE_ENFORCE_EQ
((
input_width
+
2
*
paddings
[
2
]
-
((
dilations
[
2
]
*
(
filter_width
-
1
)
+
1
)))
/
strides
[
2
]
+
1
,
output_width
,
"input_width and output_width are "
"mismatching."
);
T
*
vol_data
=
vol
->
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
...
@@ -121,11 +165,11 @@ class Col2VolFunctor<platform::CPUPlace, T> {
...
@@ -121,11 +165,11 @@ class Col2VolFunctor<platform::CPUPlace, T> {
int
d_offset
=
(
c
/
filter_width
/
filter_height
)
%
filter_depth
;
int
d_offset
=
(
c
/
filter_width
/
filter_height
)
%
filter_depth
;
int
cIm
=
c
/
filter_width
/
filter_height
/
filter_depth
;
int
cIm
=
c
/
filter_width
/
filter_height
/
filter_depth
;
for
(
int
d
=
0
;
d
<
output_depth
;
++
d
)
{
for
(
int
d
=
0
;
d
<
output_depth
;
++
d
)
{
int
d_pad
=
d
*
stride
_depth
-
padding_depth
+
d_offset
;
int
d_pad
=
d
*
stride
s
[
0
]
-
paddings
[
0
]
+
d_offset
*
dilations
[
0
]
;
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
int
h_pad
=
h
*
stride
_height
-
padding_height
+
h_offset
;
int
h_pad
=
h
*
stride
s
[
1
]
-
paddings
[
1
]
+
h_offset
*
dilations
[
1
]
;
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
int
w_pad
=
w
*
stride
_width
-
padding_width
+
w_offset
;
int
w_pad
=
w
*
stride
s
[
2
]
-
paddings
[
2
]
+
w_offset
*
dilations
[
2
]
;
if
(
h_pad
>=
0
&&
h_pad
<
input_height
&&
w_pad
>=
0
&&
if
(
h_pad
>=
0
&&
h_pad
<
input_height
&&
w_pad
>=
0
&&
w_pad
<
input_width
&&
d_pad
>=
0
&&
d_pad
<
input_depth
)
{
w_pad
<
input_width
&&
d_pad
>=
0
&&
d_pad
<
input_depth
)
{
...
@@ -133,6 +177,7 @@ class Col2VolFunctor<platform::CPUPlace, T> {
...
@@ -133,6 +177,7 @@ class Col2VolFunctor<platform::CPUPlace, T> {
((
cIm
*
input_depth
+
d_pad
)
*
input_height
+
h_pad
)
*
((
cIm
*
input_depth
+
d_pad
)
*
input_height
+
h_pad
)
*
input_width
+
input_width
+
w_pad
;
w_pad
;
int
col_idx
=
int
col_idx
=
((
c
*
output_depth
+
d
)
*
output_height
+
h
)
*
output_width
+
((
c
*
output_depth
+
d
)
*
output_height
+
h
)
*
output_width
+
w
;
w
;
...
...
paddle/operators/math/vol2col.cu
浏览文件 @
4fc9f55e
...
@@ -21,11 +21,12 @@ namespace math {
...
@@ -21,11 +21,12 @@ namespace math {
template
<
class
T
>
template
<
class
T
>
__global__
void
vol2col
(
int
num_kernels
,
const
T
*
data_vol
,
int
depth
,
__global__
void
vol2col
(
int
num_kernels
,
const
T
*
data_vol
,
int
depth
,
int
height
,
int
width
,
int
filter_depth
,
int
height
,
int
width
,
int
dilation_d
,
int
dilation_h
,
int
filter_height
,
int
filter_width
,
int
stride_depth
,
int
dilation_w
,
int
filter_depth
,
int
filter_height
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
filter_width
,
int
stride_depth
,
int
stride_height
,
int
padding_height
,
int
padding_width
,
int
output_detph
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
output_height
,
int
output_width
,
T
*
data_col
)
{
int
padding_width
,
int
output_detph
,
int
output_height
,
int
output_width
,
T
*
data_col
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
w_out
=
index
%
output_width
;
int
w_out
=
index
%
output_width
;
...
@@ -44,12 +45,14 @@ __global__ void vol2col(int num_kernels, const T* data_vol, int depth,
...
@@ -44,12 +45,14 @@ __global__ void vol2col(int num_kernels, const T* data_vol, int depth,
for
(
int
k
=
0
;
k
<
filter_depth
;
++
k
)
{
for
(
int
k
=
0
;
k
<
filter_depth
;
++
k
)
{
for
(
int
i
=
0
;
i
<
filter_height
;
++
i
)
{
for
(
int
i
=
0
;
i
<
filter_height
;
++
i
)
{
for
(
int
j
=
0
;
j
<
filter_width
;
++
j
)
{
for
(
int
j
=
0
;
j
<
filter_width
;
++
j
)
{
int
d
=
d_in
+
k
;
int
d
=
d_in
+
k
*
dilation_d
;
int
h
=
h_in
+
i
;
int
h
=
h_in
+
i
*
dilation_h
;
int
w
=
w_in
+
j
;
int
w
=
w_in
+
j
*
dilation_w
;
int
col_idx
=
(
k
*
dilation_d
*
height
+
i
*
dilation_h
)
*
width
+
j
*
dilation_w
;
*
data_col
=
(
d
>=
0
&&
d
<
depth
&&
h
>=
0
&&
h
<
height
&&
w
>=
0
&&
*
data_col
=
(
d
>=
0
&&
d
<
depth
&&
h
>=
0
&&
h
<
height
&&
w
>=
0
&&
w
<
width
)
w
<
width
)
?
data_vol
[
(
k
*
height
+
i
)
*
width
+
j
]
?
data_vol
[
col_idx
]
:
0
;
:
0
;
data_col
+=
output_detph
*
output_height
*
output_width
;
data_col
+=
output_detph
*
output_height
*
output_width
;
}
}
...
@@ -68,23 +71,46 @@ template <class T>
...
@@ -68,23 +71,46 @@ template <class T>
class
Vol2ColFunctor
<
platform
::
GPUPlace
,
T
>
{
class
Vol2ColFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
vol
,
framework
::
Tensor
&
col
,
const
framework
::
Tensor
&
vol
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
const
std
::
vector
<
int
>&
dilations
,
int
padding_depth
,
int
padding_height
,
const
std
::
vector
<
int
>&
strides
,
int
padding_width
)
const
{
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
col
)
const
{
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
PADDLE_ENFORCE
(
col
->
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
input_width
=
vol
.
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_depth
=
col
->
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_height
=
col
->
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
int
filter_width
=
col
->
dims
()[
3
];
int
output_depth
=
col
.
dims
()[
4
];
int
output_depth
=
col
->
dims
()[
4
];
int
output_height
=
col
.
dims
()[
5
];
int
output_height
=
col
->
dims
()[
5
];
int
output_width
=
col
.
dims
()[
6
];
int
output_width
=
col
->
dims
()[
6
];
PADDLE_ENFORCE_EQ
((
input_depth
+
2
*
paddings
[
0
]
-
((
dilations
[
0
]
*
(
filter_depth
-
1
)
+
1
)))
/
strides
[
0
]
+
1
,
output_depth
,
"input_depth and output_depth are "
"Mismatching."
);
PADDLE_ENFORCE_EQ
((
input_height
+
2
*
paddings
[
1
]
-
((
dilations
[
1
]
*
(
filter_height
-
1
)
+
1
)))
/
strides
[
1
]
+
1
,
output_height
,
"input_height and output_height are "
"Mismatching."
);
PADDLE_ENFORCE_EQ
((
input_width
+
2
*
paddings
[
2
]
-
((
dilations
[
2
]
*
(
filter_width
-
1
)
+
1
)))
/
strides
[
2
]
+
1
,
output_width
,
"input_width and output_width are "
"Mismatching."
);
int
num_outputs
=
int
num_outputs
=
input_channels
*
output_depth
*
output_height
*
output_width
;
input_channels
*
output_depth
*
output_height
*
output_width
;
...
@@ -95,19 +121,25 @@ class Vol2ColFunctor<platform::GPUPlace, T> {
...
@@ -95,19 +121,25 @@ class Vol2ColFunctor<platform::GPUPlace, T> {
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
num_outputs
,
vol
.
data
<
T
>
(),
input_depth
,
input_height
,
input_width
,
num_outputs
,
vol
.
data
<
T
>
(),
input_depth
,
input_height
,
input_width
,
filter_depth
,
filter_height
,
filter_width
,
stride_depth
,
stride_height
,
dilations
[
0
],
dilations
[
1
],
dilations
[
2
],
filter_depth
,
filter_height
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
filter_width
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
paddings
[
0
],
output_depth
,
output_height
,
output_width
,
col
.
data
<
T
>
());
paddings
[
1
],
paddings
[
2
],
output_depth
,
output_height
,
output_width
,
col
->
data
<
T
>
());
}
}
};
};
template
<
class
T
>
template
<
class
T
>
__global__
void
col2vol
(
int
num_kernels
,
const
T
*
data_col
,
int
depth
,
__global__
void
col2vol
(
int
num_kernels
,
const
T
*
data_col
,
int
depth
,
int
height
,
int
width
,
int
filter_depth
,
int
height
,
int
width
,
int
dilation_d
,
int
dilation_h
,
int
filter_height
,
int
filter_width
,
int
stride_depth
,
int
dilation_w
,
int
filter_depth
,
int
filter_height
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
filter_width
,
int
stride_depth
,
int
stride_height
,
int
padding_height
,
int
padding_width
,
int
output_detph
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
output_height
,
int
output_width
,
T
*
data_vol
)
{
int
padding_width
,
int
output_detph
,
int
output_height
,
int
output_width
,
T
*
data_vol
)
{
const
int
d_filter_depth
=
dilation_d
*
(
filter_depth
-
1
)
+
1
;
const
int
d_filter_height
=
dilation_h
*
(
filter_height
-
1
)
+
1
;
const
int
d_filter_width
=
dilation_w
*
(
filter_width
-
1
)
+
1
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
T
src_val
=
0
;
T
src_val
=
0
;
...
@@ -115,35 +147,41 @@ __global__ void col2vol(int num_kernels, const T* data_col, int depth,
...
@@ -115,35 +147,41 @@ __global__ void col2vol(int num_kernels, const T* data_col, int depth,
int
h
=
(
index
/
width
)
%
height
+
padding_height
;
int
h
=
(
index
/
width
)
%
height
+
padding_height
;
int
d
=
(
index
/
width
/
height
)
%
depth
+
padding_depth
;
int
d
=
(
index
/
width
/
height
)
%
depth
+
padding_depth
;
int
c
=
index
/
width
/
height
/
depth
;
int
c
=
index
/
width
/
height
/
depth
;
// compute the start and end of the output
// compute the start and end of the output
int
w_col_start
=
int
w_col_start
=
(
w
<
filter_width
)
?
0
:
(
w
-
filter_width
)
/
stride_width
+
1
;
(
w
<
d_filter_width
)
?
0
:
(
w
-
d_
filter_width
)
/
stride_width
+
1
;
int
w_col_end
=
min
(
w
/
stride_width
+
1
,
output_width
);
int
w_col_end
=
min
(
w
/
stride_width
+
1
,
output_width
);
int
h_col_start
=
int
h_col_start
=
(
h
<
filter_height
)
?
0
:
(
h
-
filter_height
)
/
stride_height
+
1
;
(
h
<
d_filter_height
)
?
0
:
(
h
-
d_
filter_height
)
/
stride_height
+
1
;
int
h_col_end
=
min
(
h
/
stride_height
+
1
,
output_height
);
int
h_col_end
=
min
(
h
/
stride_height
+
1
,
output_height
);
int
d_col_start
=
int
d_col_start
=
(
d
<
filter_depth
)
?
0
:
(
d
-
filter_depth
)
/
stride_depth
+
1
;
(
d
<
d_filter_depth
)
?
0
:
(
d
-
d_
filter_depth
)
/
stride_depth
+
1
;
int
d_col_end
=
min
(
d
/
stride_depth
+
1
,
output_detph
);
int
d_col_end
=
min
(
d
/
stride_depth
+
1
,
output_detph
);
int
offset
=
(
c
*
filter_depth
*
filter_height
*
filter_width
+
d
*
filter_width
*
filter_height
+
h
*
filter_width
+
w
)
*
output_detph
*
output_height
*
output_width
;
int
coeff_d_col
=
(
1
-
stride_depth
*
filter_width
*
filter_height
*
output_detph
)
*
output_height
*
output_width
;
int
coeff_h_col
=
(
1
-
stride_height
*
filter_width
*
output_detph
*
output_height
)
*
output_width
;
int
coeff_w_col
=
(
1
-
stride_width
*
output_detph
*
output_height
*
output_width
);
for
(
int
d_col
=
d_col_start
;
d_col
<
d_col_end
;
++
d_col
)
{
for
(
int
d_col
=
d_col_start
;
d_col
<
d_col_end
;
++
d_col
)
{
for
(
int
h_col
=
h_col_start
;
h_col
<
h_col_end
;
++
h_col
)
{
for
(
int
h_col
=
h_col_start
;
h_col
<
h_col_end
;
++
h_col
)
{
for
(
int
w_col
=
w_col_start
;
w_col
<
w_col_end
;
++
w_col
)
{
for
(
int
w_col
=
w_col_start
;
w_col
<
w_col_end
;
++
w_col
)
{
src_val
+=
data_col
[
offset
+
d_col
*
coeff_d_col
+
int
d_off
=
(
d
-
d_col
*
stride_depth
);
h_col
*
coeff_h_col
+
w_col
*
coeff_w_col
];
int
h_off
=
(
h
-
h_col
*
stride_height
);
int
w_off
=
(
w
-
w_col
*
stride_width
);
if
(
d_off
%
dilation_d
==
0
&&
h_off
%
dilation_h
==
0
&&
w_off
%
dilation_w
==
0
)
{
d_off
/=
dilation_d
;
h_off
/=
dilation_h
;
w_off
/=
dilation_w
;
int
data_col_index
=
(((((
c
*
filter_depth
+
d_off
)
*
filter_height
+
h_off
)
*
filter_width
+
w_off
)));
data_col_index
=
((
data_col_index
*
output_detph
+
d_col
)
*
output_height
+
h_col
)
*
output_width
+
w_col
;
src_val
+=
data_col
[
data_col_index
];
}
}
}
}
}
}
}
...
@@ -161,17 +199,18 @@ template <class T>
...
@@ -161,17 +199,18 @@ template <class T>
class
Col2VolFunctor
<
platform
::
GPUPlace
,
T
>
{
class
Col2VolFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
vol
,
const
framework
::
Tensor
&
col
,
const
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
const
std
::
vector
<
int
>&
dilations
,
int
padding_depth
,
int
padding_height
,
const
std
::
vector
<
int
>&
strides
,
int
padding_width
)
const
{
const
std
::
vector
<
int
>&
paddings
,
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
framework
::
Tensor
*
vol
)
const
{
PADDLE_ENFORCE
(
vol
->
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_channels
=
vol
->
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_depth
=
vol
->
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_height
=
vol
->
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
input_width
=
vol
->
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
int
filter_width
=
col
.
dims
()[
3
];
...
@@ -179,6 +218,28 @@ class Col2VolFunctor<platform::GPUPlace, T> {
...
@@ -179,6 +218,28 @@ class Col2VolFunctor<platform::GPUPlace, T> {
int
output_height
=
col
.
dims
()[
5
];
int
output_height
=
col
.
dims
()[
5
];
int
output_width
=
col
.
dims
()[
6
];
int
output_width
=
col
.
dims
()[
6
];
PADDLE_ENFORCE_EQ
((
input_depth
+
2
*
paddings
[
0
]
-
((
dilations
[
0
]
*
(
filter_depth
-
1
)
+
1
)))
/
strides
[
0
]
+
1
,
output_depth
,
"input_depth and output_depth are "
"Mismatching."
);
PADDLE_ENFORCE_EQ
((
input_height
+
2
*
paddings
[
1
]
-
((
dilations
[
1
]
*
(
filter_height
-
1
)
+
1
)))
/
strides
[
1
]
+
1
,
output_height
,
"input_height and output_height are "
"Mismatching."
);
PADDLE_ENFORCE_EQ
((
input_width
+
2
*
paddings
[
2
]
-
((
dilations
[
2
]
*
(
filter_width
-
1
)
+
1
)))
/
strides
[
2
]
+
1
,
output_width
,
"input_width and output_width are "
"Mismatching."
);
int
num_kernels
=
input_channels
*
input_depth
*
input_height
*
input_width
;
int
num_kernels
=
input_channels
*
input_depth
*
input_height
*
input_width
;
const
int
threads
=
1024
;
const
int
threads
=
1024
;
...
@@ -188,9 +249,10 @@ class Col2VolFunctor<platform::GPUPlace, T> {
...
@@ -188,9 +249,10 @@ class Col2VolFunctor<platform::GPUPlace, T> {
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
num_kernels
,
col
.
data
<
T
>
(),
input_depth
,
input_height
,
input_width
,
num_kernels
,
col
.
data
<
T
>
(),
input_depth
,
input_height
,
input_width
,
filter_depth
,
filter_height
,
filter_width
,
stride_depth
,
stride_height
,
dilations
[
0
],
dilations
[
1
],
dilations
[
2
],
filter_depth
,
filter_height
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
filter_width
,
strides
[
0
],
strides
[
1
],
strides
[
2
],
paddings
[
0
],
output_depth
,
output_height
,
output_width
,
vol
.
data
<
T
>
());
paddings
[
1
],
paddings
[
2
],
output_depth
,
output_height
,
output_width
,
vol
->
data
<
T
>
());
}
}
};
};
...
...
paddle/operators/math/vol2col.h
浏览文件 @
4fc9f55e
...
@@ -31,6 +31,15 @@ namespace math {
...
@@ -31,6 +31,15 @@ namespace math {
* \param colData Column data.
* \param colData Column data.
* \param colShape The shape of colData.
* \param colShape The shape of colData.
*
*
* \param dilations dilation data.
* \param 3-dimension [dilation_depth, dilation_height, dilation_width].
*
* \param strides stride data.
* \param 3-dimension [stride_depth, stride_height, stride_width].
*
* \param paddings padding data.
* \param 3-dimension [d_pad, h_pad, w_pad].
*
* The shape of colData is:
* The shape of colData is:
* [input_channels, filter_depth, filter_height, filter_width, output_depth,
* [input_channels, filter_depth, filter_height, filter_width, output_depth,
* output_height, output_width]
* output_height, output_width]
...
@@ -57,20 +66,22 @@ template <typename Place, typename T>
...
@@ -57,20 +66,22 @@ template <typename Place, typename T>
class
Vol2ColFunctor
{
class
Vol2ColFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
vol
,
framework
::
Tensor
&
col
,
const
framework
::
Tensor
&
vol
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
const
std
::
vector
<
int
>&
dilations
,
int
padding_depth
,
int
padding_height
,
const
std
::
vector
<
int
>&
strides
,
int
padding_width
)
const
;
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
col
)
const
;
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
Col2VolFunctor
{
class
Col2VolFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
vol
,
const
framework
::
Tensor
&
col
,
const
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
const
std
::
vector
<
int
>&
dilations
,
int
padding_depth
,
int
padding_height
,
const
std
::
vector
<
int
>&
strides
,
int
padding_width
)
const
;
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
vol
)
const
;
};
};
}
// namespace math
}
// namespace math
...
...
paddle/operators/math/vol2col_test.cc
浏览文件 @
4fc9f55e
...
@@ -62,11 +62,15 @@ void testVol2col() {
...
@@ -62,11 +62,15 @@ void testVol2col() {
int
input_height
=
2
;
int
input_height
=
2
;
int
input_width
=
3
;
int
input_width
=
3
;
int
filter_size
=
2
;
int
filter_size
=
2
;
int
stride
=
1
;
std
::
vector
<
int
>
strides
({
1
,
1
,
1
});
int
padding
=
0
;
std
::
vector
<
int
>
paddings
({
0
,
0
,
0
});
int
output_depth
=
(
input_depth
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
std
::
vector
<
int
>
dilations
({
1
,
1
,
1
});
int
output_height
=
(
input_height
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
int
output_depth
=
int
output_width
=
(
input_width
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
(
input_depth
-
filter_size
+
2
*
paddings
[
0
])
/
strides
[
0
]
+
1
;
int
output_height
=
(
input_height
-
filter_size
+
2
*
paddings
[
1
])
/
strides
[
1
]
+
1
;
int
output_width
=
(
input_width
-
filter_size
+
2
*
paddings
[
2
])
/
strides
[
2
]
+
1
;
// Vol2Col test
// Vol2Col test
float
*
input_ptr
=
float
*
input_ptr
=
...
@@ -85,8 +89,7 @@ void testVol2col() {
...
@@ -85,8 +89,7 @@ void testVol2col() {
*
place
);
*
place
);
paddle
::
operators
::
math
::
Vol2ColFunctor
<
Place
,
float
>
vol2col
;
paddle
::
operators
::
math
::
Vol2ColFunctor
<
Place
,
float
>
vol2col
;
vol2col
(
*
context
,
input
,
output
,
stride
,
stride
,
stride
,
padding
,
padding
,
vol2col
(
*
context
,
input
,
dilations
,
strides
,
paddings
,
&
output
);
padding
);
float
vol_2_col
[]
=
{
0
,
1
,
1
,
2
,
3
,
4
,
4
,
5
,
6
,
7
,
7
,
8
,
9
,
10
,
10
,
11
};
float
vol_2_col
[]
=
{
0
,
1
,
1
,
2
,
3
,
4
,
4
,
5
,
6
,
7
,
7
,
8
,
9
,
10
,
10
,
11
};
float
*
out_cfo_ptr
;
float
*
out_cfo_ptr
;
...
@@ -111,8 +114,7 @@ void testVol2col() {
...
@@ -111,8 +114,7 @@ void testVol2col() {
}
}
paddle
::
operators
::
math
::
Col2VolFunctor
<
Place
,
float
>
col2vol
;
paddle
::
operators
::
math
::
Col2VolFunctor
<
Place
,
float
>
col2vol
;
col2vol
(
*
context
,
input
,
output
,
stride
,
stride
,
stride
,
padding
,
padding
,
col2vol
(
*
context
,
output
,
dilations
,
strides
,
paddings
,
&
input
);
padding
);
float
*
in_ptr
;
float
*
in_ptr
;
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
...
...
paddle/operators/sequence_conv_op.h
浏览文件 @
4fc9f55e
...
@@ -62,9 +62,9 @@ class SequenceConvKernel : public framework::OpKernel<T> {
...
@@ -62,9 +62,9 @@ class SequenceConvKernel : public framework::OpKernel<T> {
math
::
ContextProjectFunctor
<
Place
,
T
>
seq_project_functor
;
math
::
ContextProjectFunctor
<
Place
,
T
>
seq_project_functor
;
seq_project_functor
(
context
.
device_context
(),
*
in
,
*
padding_data
,
col
,
seq_project_functor
(
context
.
device_context
(),
*
in
,
*
padding_data
,
padding_trainable
,
context_start
,
context_length
,
padding_trainable
,
context_start
,
context_length
,
context_stride
,
up_pad
,
down_pad
);
context_stride
,
up_pad
,
down_pad
,
&
col
);
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
col
,
false
,
filter
,
false
,
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
col
,
false
,
filter
,
false
,
static_cast
<
T
>
(
1.0
),
out
,
static_cast
<
T
>
(
0.0
));
static_cast
<
T
>
(
1.0
),
out
,
static_cast
<
T
>
(
0.0
));
...
@@ -117,10 +117,10 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
...
@@ -117,10 +117,10 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
in_g
->
set_lod
(
in
->
lod
());
in_g
->
set_lod
(
in
->
lod
());
set_zero
(
context
.
device_context
(),
in_g
,
static_cast
<
T
>
(
0
));
set_zero
(
context
.
device_context
(),
in_g
,
static_cast
<
T
>
(
0
));
seq_project_grad_functor
(
context
.
device_context
(),
*
in_g
,
*
padding_data_g
,
seq_project_grad_functor
(
context
.
device_context
(),
*
in_g
,
col
,
padding_trainable
,
context_start
,
padding_trainable
,
context_start
,
context_length
,
context_
length
,
context_stride
,
up_pad
,
down_pad
,
context_
stride
,
up_pad
,
down_pad
,
false
,
true
,
true
,
false
);
padding_data_g
,
&
col
);
}
}
if
(
padding_trainable
&&
padding_data_g
)
{
if
(
padding_trainable
&&
padding_data_g
)
{
...
@@ -129,9 +129,9 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
...
@@ -129,9 +129,9 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
LoDTensor
*
input
=
const_cast
<
LoDTensor
*>
(
in
);
LoDTensor
*
input
=
const_cast
<
LoDTensor
*>
(
in
);
seq_project_grad_functor
(
context
.
device_context
(),
*
input
,
seq_project_grad_functor
(
context
.
device_context
(),
*
input
,
*
padding_data_g
,
col
,
padding_trainable
,
padding_trainable
,
context_start
,
context_length
,
context_st
art
,
context_length
,
context_strid
e
,
context_st
ride
,
up_pad
,
down_pad
,
true
,
fals
e
,
up_pad
,
down_pad
,
false
,
true
);
padding_data_g
,
&
col
);
}
}
if
(
filter_g
)
{
if
(
filter_g
)
{
...
@@ -146,9 +146,9 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
...
@@ -146,9 +146,9 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
padding_data
=
context
.
Input
<
Tensor
>
(
"PaddingData"
);
padding_data
=
context
.
Input
<
Tensor
>
(
"PaddingData"
);
}
}
seq_project_functor
(
context
.
device_context
(),
*
in
,
*
padding_data
,
col
,
seq_project_functor
(
context
.
device_context
(),
*
in
,
*
padding_data
,
padding_trainable
,
context_start
,
context_length
,
padding_trainable
,
context_start
,
context_length
,
context_stride
,
up_pad
,
down_pad
);
context_stride
,
up_pad
,
down_pad
,
&
col
);
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
col
,
true
,
out_grad
,
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
col
,
true
,
out_grad
,
false
,
T
(
1.0
),
&
filter_grad
,
T
(
1.0
));
false
,
T
(
1.0
),
&
filter_grad
,
T
(
1.0
));
...
...
python/paddle/v2/fluid/tests/test_conv2d_op.py
浏览文件 @
4fc9f55e
...
@@ -10,23 +10,33 @@ def conv2d_forward_naive(input, filter, group, conv_param):
...
@@ -10,23 +10,33 @@ def conv2d_forward_naive(input, filter, group, conv_param):
assert
np
.
mod
(
out_c
,
group
)
==
0
assert
np
.
mod
(
out_c
,
group
)
==
0
sub_out_c
=
out_c
/
group
sub_out_c
=
out_c
/
group
stride
,
pad
=
conv_param
[
'stride'
],
conv_param
[
'pad'
]
stride
,
pad
,
dilation
=
conv_param
[
'stride'
],
conv_param
[
'pad'
],
conv_param
[
out_h
=
1
+
(
in_h
+
2
*
pad
[
0
]
-
f_h
)
/
stride
[
0
]
'dilation'
]
out_w
=
1
+
(
in_w
+
2
*
pad
[
1
]
-
f_w
)
/
stride
[
1
]
out_h
=
1
+
(
in_h
+
2
*
pad
[
0
]
-
(
dilation
[
0
]
*
(
f_h
-
1
)
+
1
))
/
stride
[
0
]
out_w
=
1
+
(
in_w
+
2
*
pad
[
1
]
-
(
dilation
[
1
]
*
(
f_w
-
1
)
+
1
))
/
stride
[
1
]
out
=
np
.
zeros
((
in_n
,
out_c
,
out_h
,
out_w
))
out
=
np
.
zeros
((
in_n
,
out_c
,
out_h
,
out_w
))
d_bolck_w
=
(
dilation
[
0
]
*
(
f_h
-
1
)
+
1
)
d_bolck_h
=
(
dilation
[
1
]
*
(
f_w
-
1
)
+
1
)
input_pad
=
np
.
pad
(
input
,
((
0
,
),
(
0
,
),
(
pad
[
0
],
),
(
pad
[
1
],
)),
input_pad
=
np
.
pad
(
input
,
((
0
,
),
(
0
,
),
(
pad
[
0
],
),
(
pad
[
1
],
)),
mode
=
'constant'
,
mode
=
'constant'
,
constant_values
=
0
)
constant_values
=
0
)
filter_dilation
=
np
.
zeros
((
out_c
,
f_c
,
d_bolck_h
,
d_bolck_w
))
filter_dilation
[:,
:,
0
:
d_bolck_h
:
dilation
[
0
],
0
:
d_bolck_w
:
dilation
[
1
]]
=
filter
for
i
in
range
(
out_h
):
for
i
in
range
(
out_h
):
for
j
in
range
(
out_w
):
for
j
in
range
(
out_w
):
for
g
in
range
(
group
):
for
g
in
range
(
group
):
input_pad_masked
=
\
input_pad_masked
=
\
input_pad
[:,
g
*
f_c
:(
g
+
1
)
*
f_c
,
input_pad
[:,
g
*
f_c
:(
g
+
1
)
*
f_c
,
i
*
stride
[
0
]:
i
*
stride
[
0
]
+
f
_h
,
i
*
stride
[
0
]:
i
*
stride
[
0
]
+
d_bolck
_h
,
j
*
stride
[
1
]:
j
*
stride
[
1
]
+
f
_w
]
j
*
stride
[
1
]:
j
*
stride
[
1
]
+
d_bolck
_w
]
f_sub
=
filter
[
g
*
sub_out_c
:(
g
+
1
)
*
sub_out_c
,
:,
:,
:]
f_sub
=
filter_dilation
[
g
*
sub_out_c
:(
g
+
1
)
*
sub_out_c
,
:,
:,
:]
for
k
in
range
(
sub_out_c
):
for
k
in
range
(
sub_out_c
):
out
[:,
g
*
sub_out_c
+
k
,
i
,
j
]
=
\
out
[:,
g
*
sub_out_c
+
k
,
i
,
j
]
=
\
np
.
sum
(
input_pad_masked
*
f_sub
[
k
,
:,
:,
:],
np
.
sum
(
input_pad_masked
*
f_sub
[
k
,
:,
:,
:],
...
@@ -39,9 +49,14 @@ class TestConv2dOp(OpTest):
...
@@ -39,9 +49,14 @@ class TestConv2dOp(OpTest):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
init_op_type
()
self
.
init_op_type
()
self
.
init_group
()
self
.
init_group
()
self
.
init_dilation
()
self
.
init_test_case
()
self
.
init_test_case
()
conv2d_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
conv2d_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
,
'dilation'
:
self
.
dilations
}
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
"float32"
)
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
"float32"
)
filter
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
filter
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
output
=
conv2d_forward_naive
(
input
,
filter
,
self
.
groups
,
output
=
conv2d_forward_naive
(
input
,
filter
,
self
.
groups
,
...
@@ -80,12 +95,14 @@ class TestConv2dOp(OpTest):
...
@@ -80,12 +95,14 @@ class TestConv2dOp(OpTest):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
]
self
.
pad
=
[
0
,
0
]
self
.
stride
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
]
def
init_group
(
self
):
def
init_group
(
self
):
self
.
groups
=
1
self
.
groups
=
1
...
@@ -101,24 +118,66 @@ class TestWithGroup(TestConv2dOp):
...
@@ -101,24 +118,66 @@ class TestWithGroup(TestConv2dOp):
self
.
op_type
=
"conv2d"
self
.
op_type
=
"conv2d"
#----------------Conv2dCudnn----------------
class
TestWith1x1
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
1
,
1
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
]
class
TestCudnn
(
TestConv2dOp
):
def
init_group
(
self
):
def
init_group
(
self
):
self
.
groups
=
1
self
.
groups
=
3
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv
_cudnn
"
self
.
op_type
=
"conv
2d
"
class
TestCudnnWithGroup
(
TestConv2dOp
):
class
TestWithDilation
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
10
,
10
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
def
init_dilation
(
self
):
self
.
dilations
=
[
2
,
2
]
def
init_group
(
self
):
def
init_group
(
self
):
self
.
groups
=
3
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv2d"
#----------------Conv2dCudnn----------------
class
TestCudnn
(
TestConv2dOp
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
class
TestCudnnWithGroup
(
TestWithGroup
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
self
.
op_type
=
"conv_cudnn"
class
TestCudnnWith1x1
(
TestWith1x1
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
# cudnn v5 does not support dilation conv.
# class TestCudnnWithDilation(TestWithDilation):
# def init_op_type(self):
# self.op_type = "conv_cudnn"
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/v2/fluid/tests/test_conv3d_op.py
浏览文件 @
4fc9f55e
...
@@ -10,27 +10,40 @@ def conv3d_forward_naive(input, filter, group, conv_param):
...
@@ -10,27 +10,40 @@ def conv3d_forward_naive(input, filter, group, conv_param):
assert
np
.
mod
(
out_c
,
group
)
==
0
assert
np
.
mod
(
out_c
,
group
)
==
0
sub_out_c
=
out_c
/
group
sub_out_c
=
out_c
/
group
stride
,
pad
=
conv_param
[
'stride'
],
conv_param
[
'pad'
]
stride
,
pad
,
dilation
=
conv_param
[
'stride'
],
conv_param
[
'pad'
],
conv_param
[
out_d
=
1
+
(
in_d
+
2
*
pad
[
0
]
-
f_h
)
/
stride
[
0
]
'dilations'
]
out_h
=
1
+
(
in_h
+
2
*
pad
[
1
]
-
f_h
)
/
stride
[
1
]
out_w
=
1
+
(
in_w
+
2
*
pad
[
2
]
-
f_w
)
/
stride
[
2
]
out_d
=
1
+
(
in_d
+
2
*
pad
[
0
]
-
(
dilation
[
0
]
*
(
f_d
-
1
)
+
1
))
/
stride
[
0
]
out_h
=
1
+
(
in_h
+
2
*
pad
[
1
]
-
(
dilation
[
1
]
*
(
f_h
-
1
)
+
1
))
/
stride
[
1
]
out_w
=
1
+
(
in_w
+
2
*
pad
[
2
]
-
(
dilation
[
2
]
*
(
f_w
-
1
)
+
1
))
/
stride
[
2
]
out
=
np
.
zeros
((
in_n
,
out_c
,
out_d
,
out_h
,
out_w
))
out
=
np
.
zeros
((
in_n
,
out_c
,
out_d
,
out_h
,
out_w
))
d_bolck_d
=
(
dilation
[
0
]
*
(
f_d
-
1
)
+
1
)
d_bolck_h
=
(
dilation
[
1
]
*
(
f_h
-
1
)
+
1
)
d_bolck_w
=
(
dilation
[
2
]
*
(
f_w
-
1
)
+
1
)
input_pad
=
np
.
pad
(
input
,
((
0
,
),
(
0
,
),
(
pad
[
0
],
),
(
pad
[
1
],
),
input_pad
=
np
.
pad
(
input
,
((
0
,
),
(
0
,
),
(
pad
[
0
],
),
(
pad
[
1
],
),
(
pad
[
2
],
)),
(
pad
[
2
],
)),
mode
=
'constant'
,
mode
=
'constant'
,
constant_values
=
0
)
constant_values
=
0
)
filter_dilation
=
np
.
zeros
((
out_c
,
f_c
,
d_bolck_d
,
d_bolck_h
,
d_bolck_w
))
filter_dilation
[:,
:,
0
:
d_bolck_d
:
dilation
[
0
],
0
:
d_bolck_h
:
dilation
[
1
],
0
:
d_bolck_w
:
dilation
[
2
]]
=
filter
for
d
in
range
(
out_d
):
for
d
in
range
(
out_d
):
for
i
in
range
(
out_h
):
for
i
in
range
(
out_h
):
for
j
in
range
(
out_w
):
for
j
in
range
(
out_w
):
for
g
in
range
(
group
):
for
g
in
range
(
group
):
input_pad_masked
=
\
input_pad_masked
=
\
input_pad
[:,
g
*
f_c
:(
g
+
1
)
*
f_c
,
input_pad
[:,
g
*
f_c
:(
g
+
1
)
*
f_c
,
d
*
stride
[
0
]:
d
*
stride
[
0
]
+
f_d
,
d
*
stride
[
0
]:
d
*
stride
[
0
]
+
d_bolck_d
,
i
*
stride
[
1
]:
i
*
stride
[
1
]
+
f_h
,
i
*
stride
[
1
]:
i
*
stride
[
1
]
+
d_bolck_h
,
j
*
stride
[
2
]:
j
*
stride
[
2
]
+
f_w
]
j
*
stride
[
2
]:
j
*
stride
[
2
]
+
d_bolck_w
]
f_sub
=
filter
[
g
*
sub_out_c
:(
g
+
1
)
*
sub_out_c
,
:,
:,
:,
:]
f_sub
=
filter_dilation
[
g
*
sub_out_c
:(
g
+
1
)
*
sub_out_c
,
:,
:,
:,
:]
for
k
in
range
(
sub_out_c
):
for
k
in
range
(
sub_out_c
):
out
[:,
g
*
sub_out_c
+
k
,
d
,
i
,
j
]
=
\
out
[:,
g
*
sub_out_c
+
k
,
d
,
i
,
j
]
=
\
np
.
sum
(
input_pad_masked
*
f_sub
[
k
,
:,
:,
:,
:],
np
.
sum
(
input_pad_masked
*
f_sub
[
k
,
:,
:,
:,
:],
...
@@ -43,9 +56,14 @@ class TestConv3dOp(OpTest):
...
@@ -43,9 +56,14 @@ class TestConv3dOp(OpTest):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
init_group
()
self
.
init_group
()
self
.
init_op_type
()
self
.
init_op_type
()
self
.
init_dilation
()
self
.
init_test_case
()
self
.
init_test_case
()
conv3d_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
conv3d_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
,
'dilations'
:
self
.
dilations
}
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
"float32"
)
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
"float32"
)
filter
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
filter
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
output
=
conv3d_forward_naive
(
input
,
filter
,
self
.
groups
,
output
=
conv3d_forward_naive
(
input
,
filter
,
self
.
groups
,
...
@@ -55,7 +73,8 @@ class TestConv3dOp(OpTest):
...
@@ -55,7 +73,8 @@ class TestConv3dOp(OpTest):
self
.
attrs
=
{
self
.
attrs
=
{
'strides'
:
self
.
stride
,
'strides'
:
self
.
stride
,
'paddings'
:
self
.
pad
,
'paddings'
:
self
.
pad
,
'groups'
:
self
.
groups
'groups'
:
self
.
groups
,
'dilations'
:
self
.
dilations
}
}
self
.
outputs
=
{
'Output'
:
output
}
self
.
outputs
=
{
'Output'
:
output
}
...
@@ -88,6 +107,9 @@ class TestConv3dOp(OpTest):
...
@@ -88,6 +107,9 @@ class TestConv3dOp(OpTest):
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
,
3
]
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
,
3
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
,
1
]
def
init_group
(
self
):
def
init_group
(
self
):
self
.
groups
=
1
self
.
groups
=
1
...
@@ -104,27 +126,47 @@ class TestCase1(TestConv3dOp):
...
@@ -104,27 +126,47 @@ class TestCase1(TestConv3dOp):
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
,
3
]
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
,
3
]
def
init_group
(
self
):
self
.
groups
=
1
def
init_op_type
(
self
):
class
TestWithGroup1
(
TestConv3dOp
):
self
.
op_type
=
"conv3d"
def
init_group
(
self
):
self
.
groups
=
3
class
TestWithGroup
1
(
TestConv3dOp
):
class
TestWithGroup
2
(
TestCase1
):
def
init_group
(
self
):
def
init_group
(
self
):
self
.
groups
=
3
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d"
class
TestWith1x1
(
TestConv3dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
,
0
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
4
,
4
,
4
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
1
,
1
,
1
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
,
1
]
class
TestWithGroup2
(
TestCase1
):
def
init_group
(
self
):
def
init_group
(
self
):
self
.
groups
=
3
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d"
class
TestWithDilation
(
TestConv3dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
,
0
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
6
,
6
,
6
]
# NCDHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
2
,
2
,
2
]
def
init_dilation
(
self
):
self
.
dilations
=
[
2
,
2
,
2
]
def
init_group
(
self
):
self
.
groups
=
3
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录