Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
1a615b48
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1a615b48
编写于
9月 04, 2017
作者:
H
hedaoyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Gpu test of im2col.
上级
3f555001
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
65 addition
and
31 deletion
+65
-31
paddle/operators/math/im2col.cu
paddle/operators/math/im2col.cu
+12
-8
paddle/operators/math/im2col_test.cc
paddle/operators/math/im2col_test.cc
+53
-23
未找到文件。
paddle/operators/math/im2col.cu
浏览文件 @
1a615b48
...
@@ -83,8 +83,9 @@ class Im2ColFunctor<kCFO, platform::GPUPlace, T> {
...
@@ -83,8 +83,9 @@ class Im2ColFunctor<kCFO, platform::GPUPlace, T> {
int
block_y
=
(
blocks
+
512
-
1
)
/
512
;
int
block_y
=
(
blocks
+
512
-
1
)
/
512
;
dim3
threads
(
1024
,
1
);
dim3
threads
(
1024
,
1
);
dim3
grid
(
block_x
,
block_y
);
dim3
grid
(
block_x
,
block_y
);
// TODO(hedaoyuan): launch kernel on specified stream
im2col
<
T
><<<
im2col
<
T
><<<
grid
,
threads
>>>
(
grid
,
threads
,
0
,
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
context
)
->
stream
()
>>>
(
im
.
data
<
T
>
(),
num_outputs
,
input_height
,
input_width
,
filter_height
,
im
.
data
<
T
>
(),
num_outputs
,
input_height
,
input_width
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
output_height
,
output_width
,
col
.
data
<
T
>
());
padding_width
,
output_height
,
output_width
,
col
.
data
<
T
>
());
...
@@ -171,8 +172,9 @@ class Col2ImFunctor<kCFO, platform::GPUPlace, T> {
...
@@ -171,8 +172,9 @@ class Col2ImFunctor<kCFO, platform::GPUPlace, T> {
// To avoid involving atomic operations, we will launch one kernel per
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
// bottom dimension, and then in the kernel add up the top dimensions.
// TODO(hedaoyuan): launch kernel on specified stream
col2im
<
T
><<<
col2im
<
T
><<<
grid
,
threads
>>>
(
grid
,
threads
,
0
,
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
context
)
->
stream
()
>>>
(
num_kernels
,
col
.
data
<
T
>
(),
input_height
+
2
*
padding_height
,
num_kernels
,
col
.
data
<
T
>
(),
input_height
+
2
*
padding_height
,
input_width
+
2
*
padding_width
,
input_channels
,
filter_height
,
input_width
+
2
*
padding_width
,
input_channels
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
...
@@ -259,8 +261,9 @@ class Im2ColFunctor<kOCF, platform::GPUPlace, T> {
...
@@ -259,8 +261,9 @@ class Im2ColFunctor<kOCF, platform::GPUPlace, T> {
dim3
threads
(
block_dim_x
,
block_dim_y
,
dim3
threads
(
block_dim_x
,
block_dim_y
,
std
::
min
(
block_dim_z
,
input_channels
));
std
::
min
(
block_dim_z
,
input_channels
));
dim3
grid
(
output_width
,
output_height
);
dim3
grid
(
output_width
,
output_height
);
// TODO(hedaoyuan): launch kernel on specified stream
im2colOCF
<
T
><<<
im2colOCF
<
T
><<<
grid
,
threads
>>>
(
grid
,
threads
,
0
,
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
context
)
->
stream
()
>>>
(
im
.
data
<
T
>
(),
col
.
data
<
T
>
(),
input_channels
,
input_height
,
input_width
,
im
.
data
<
T
>
(),
col
.
data
<
T
>
(),
input_channels
,
input_height
,
input_width
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
output_height
,
output_width
);
padding_height
,
padding_width
,
output_height
,
output_width
);
...
@@ -340,8 +343,9 @@ class Col2ImFunctor<kOCF, platform::GPUPlace, T> {
...
@@ -340,8 +343,9 @@ class Col2ImFunctor<kOCF, platform::GPUPlace, T> {
dim3
threads
(
block_dim_x
,
block_dim_y
,
dim3
threads
(
block_dim_x
,
block_dim_y
,
std
::
min
(
block_dim_z
,
input_channels
));
std
::
min
(
block_dim_z
,
input_channels
));
dim3
grid
(
output_width
,
output_height
);
dim3
grid
(
output_width
,
output_height
);
// TODO(hedaoyuan): launch kernel on specified stream
col2imOCF
<
T
><<<
col2imOCF
<
T
><<<
grid
,
threads
>>>
(
grid
,
threads
,
0
,
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
context
)
->
stream
()
>>>
(
im
.
data
<
T
>
(),
col
.
data
<
T
>
(),
input_channels
,
input_height
,
input_width
,
im
.
data
<
T
>
(),
col
.
data
<
T
>
(),
input_channels
,
input_height
,
input_width
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
output_height
,
output_width
);
padding_height
,
padding_width
,
output_height
,
output_width
);
...
...
paddle/operators/math/im2col_test.cc
浏览文件 @
1a615b48
...
@@ -16,19 +16,13 @@ limitations under the License. */
...
@@ -16,19 +16,13 @@ limitations under the License. */
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <iostream>
#include <iostream>
TEST
(
math
,
im2col
)
{
template
<
typename
Place
>
void
testIm2col
()
{
paddle
::
framework
::
Tensor
input_tmp
;
paddle
::
framework
::
Tensor
input
;
paddle
::
framework
::
Tensor
input
;
paddle
::
framework
::
Tensor
output_cfo
;
paddle
::
framework
::
Tensor
output_cfo
;
paddle
::
framework
::
Tensor
output_ocf
;
paddle
::
framework
::
Tensor
output_ocf
;
paddle
::
framework
::
Tensor
input_check
;
paddle
::
framework
::
Tensor
output_tmp
;
int
input_height
=
2
;
int
input_width
=
3
;
int
filter_size
=
2
;
int
stride
=
1
;
int
padding
=
0
;
int
output_height
=
(
input_height
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
int
output_width
=
(
input_width
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
/**
/**
* input = [0, 1, 2,
* input = [0, 1, 2,
...
@@ -42,31 +36,54 @@ TEST(math, im2col) {
...
@@ -42,31 +36,54 @@ TEST(math, im2col) {
* output_ocf = [0, 1, 3, 4
* output_ocf = [0, 1, 3, 4
* 1, 2, 4, 5]
* 1, 2, 4, 5]
*/
*/
auto
*
cpu_place
=
new
paddle
::
platform
::
CPUPlace
();
int
input_height
=
2
;
float
*
input_ptr
=
int
input_width
=
3
;
input
.
mutable_data
<
float
>
({
1
,
input_height
,
input_width
},
*
cpu_place
);
int
filter_size
=
2
;
int
stride
=
1
;
int
padding
=
0
;
int
output_height
=
(
input_height
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
int
output_width
=
(
input_width
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
float
*
input_ptr
=
input_tmp
.
mutable_data
<
float
>
(
{
1
,
input_height
,
input_width
},
paddle
::
platform
::
CPUPlace
());
float
arr
[
6
]
=
{
0
,
1
,
2
,
3
,
4
,
5
};
float
arr
[
6
]
=
{
0
,
1
,
2
,
3
,
4
,
5
};
memcpy
(
input_ptr
,
arr
,
6
*
sizeof
(
float
));
memcpy
(
input_ptr
,
arr
,
6
*
sizeof
(
float
));
auto
*
place
=
new
Place
();
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
}
else
{
input
.
CopyFrom
<
float
>
(
input_tmp
,
*
place
);
}
output_cfo
.
mutable_data
<
float
>
(
output_cfo
.
mutable_data
<
float
>
(
{
1
,
filter_size
,
filter_size
,
output_height
,
output_width
},
*
cpu_
place
);
{
1
,
filter_size
,
filter_size
,
output_height
,
output_width
},
*
place
);
output_ocf
.
mutable_data
<
float
>
(
output_ocf
.
mutable_data
<
float
>
(
{
output_height
,
output_width
,
1
,
filter_size
,
filter_size
},
*
cpu_
place
);
{
output_height
,
output_width
,
1
,
filter_size
,
filter_size
},
*
place
);
paddle
::
operators
::
math
::
Im2ColFunctor
<
paddle
::
operators
::
math
::
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
paddle
::
platform
::
CPUPlace
,
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
Place
,
float
>
float
>
im2col
;
im2col
;
paddle
::
operators
::
math
::
Im2ColFunctor
<
paddle
::
operators
::
math
::
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
paddle
::
platform
::
CPUPlace
,
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
Place
,
float
>
float
>
im2col_ocf
;
im2col_ocf
;
paddle
::
platform
::
DeviceContext
*
context
=
paddle
::
platform
::
DeviceContext
*
context
;
new
paddle
::
platform
::
CPUDeviceContext
(
*
cpu_place
);
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
context
=
new
paddle
::
platform
::
CPUDeviceContext
(
paddle
::
platform
::
CPUPlace
());
}
else
{
context
=
new
paddle
::
platform
::
CUDADeviceContext
(
paddle
::
platform
::
GPUPlace
());
}
im2col
(
input
,
output_cfo
,
stride
,
stride
,
padding
,
padding
,
context
);
im2col
(
input
,
output_cfo
,
stride
,
stride
,
padding
,
padding
,
context
);
im2col_ocf
(
input
,
output_ocf
,
stride
,
stride
,
padding
,
padding
,
context
);
im2col_ocf
(
input
,
output_ocf
,
stride
,
stride
,
padding
,
padding
,
context
);
float
*
out_cfo_ptr
=
output_cfo
.
data
<
float
>
();
float
*
out_cfo_ptr
;
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
out_cfo_ptr
=
output_cfo
.
data
<
float
>
();
}
else
{
output_tmp
.
CopyFrom
<
float
>
(
output_cfo
,
paddle
::
platform
::
CPUPlace
());
out_cfo_ptr
=
output_tmp
.
data
<
float
>
();
}
EXPECT_EQ
(
out_cfo_ptr
[
0
],
0
);
EXPECT_EQ
(
out_cfo_ptr
[
0
],
0
);
EXPECT_EQ
(
out_cfo_ptr
[
1
],
1
);
EXPECT_EQ
(
out_cfo_ptr
[
1
],
1
);
EXPECT_EQ
(
out_cfo_ptr
[
2
],
1
);
EXPECT_EQ
(
out_cfo_ptr
[
2
],
1
);
...
@@ -76,7 +93,13 @@ TEST(math, im2col) {
...
@@ -76,7 +93,13 @@ TEST(math, im2col) {
EXPECT_EQ
(
out_cfo_ptr
[
6
],
4
);
EXPECT_EQ
(
out_cfo_ptr
[
6
],
4
);
EXPECT_EQ
(
out_cfo_ptr
[
7
],
5
);
EXPECT_EQ
(
out_cfo_ptr
[
7
],
5
);
float
*
out_ocf_ptr
=
output_ocf
.
data
<
float
>
();
float
*
out_ocf_ptr
;
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
out_ocf_ptr
=
output_ocf
.
data
<
float
>
();
}
else
{
output_tmp
.
CopyFrom
<
float
>
(
output_ocf
,
paddle
::
platform
::
CPUPlace
());
out_ocf_ptr
=
output_tmp
.
data
<
float
>
();
}
EXPECT_EQ
(
out_ocf_ptr
[
0
],
0
);
EXPECT_EQ
(
out_ocf_ptr
[
0
],
0
);
EXPECT_EQ
(
out_ocf_ptr
[
1
],
1
);
EXPECT_EQ
(
out_ocf_ptr
[
1
],
1
);
EXPECT_EQ
(
out_ocf_ptr
[
2
],
3
);
EXPECT_EQ
(
out_ocf_ptr
[
2
],
3
);
...
@@ -86,3 +109,10 @@ TEST(math, im2col) {
...
@@ -86,3 +109,10 @@ TEST(math, im2col) {
EXPECT_EQ
(
out_ocf_ptr
[
6
],
4
);
EXPECT_EQ
(
out_ocf_ptr
[
6
],
4
);
EXPECT_EQ
(
out_ocf_ptr
[
7
],
5
);
EXPECT_EQ
(
out_ocf_ptr
[
7
],
5
);
}
}
TEST
(
math
,
im2col
)
{
testIm2col
<
paddle
::
platform
::
CPUPlace
>
();
#ifndef PADDLE_ONLY_CPU
testIm2col
<
paddle
::
platform
::
GPUPlace
>
();
#endif
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录