Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
bcf260e1
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
694
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
bcf260e1
编写于
4月 26, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix several unit tests
上级
3cd99f4b
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
60 addition
and
60 deletion
+60
-60
paddle/fluid/operators/fetch_op.cc
paddle/fluid/operators/fetch_op.cc
+1
-2
paddle/fluid/operators/math/concat_test.cc
paddle/fluid/operators/math/concat_test.cc
+12
-12
paddle/fluid/operators/math/im2col_test.cc
paddle/fluid/operators/math/im2col_test.cc
+9
-7
paddle/fluid/operators/math/math_function_test.cu
paddle/fluid/operators/math/math_function_test.cu
+31
-31
paddle/fluid/operators/math/vol2col_test.cc
paddle/fluid/operators/math/vol2col_test.cc
+5
-4
paddle/fluid/operators/nccl_op_test.cu.cc
paddle/fluid/operators/nccl_op_test.cu.cc
+2
-4
未找到文件。
paddle/fluid/operators/fetch_op.cc
浏览文件 @
bcf260e1
...
@@ -59,8 +59,7 @@ class FetchOp : public framework::OperatorBase {
...
@@ -59,8 +59,7 @@ class FetchOp : public framework::OperatorBase {
// CPU outputs?
// CPU outputs?
auto
&
dev_ctx
=
*
pool
.
Get
(
src_item
.
place
());
auto
&
dev_ctx
=
*
pool
.
Get
(
src_item
.
place
());
TensorCopy
(
src_item
,
platform
::
CPUPlace
(),
dev_ctx
,
&
dst_item
);
TensorCopy
(
src_item
,
platform
::
CPUPlace
(),
dev_ctx
,
&
dst_item
,
true
);
dev_ctx
.
Wait
();
dst_item
.
set_lod
(
src_item
.
lod
());
dst_item
.
set_lod
(
src_item
.
lod
());
VLOG
(
3
)
<<
"Fetch variable "
<<
fetch_var_name
<<
" to "
<<
out_name
;
VLOG
(
3
)
<<
"Fetch variable "
<<
fetch_var_name
<<
" to "
<<
out_name
;
...
...
paddle/fluid/operators/math/concat_test.cc
浏览文件 @
bcf260e1
...
@@ -72,8 +72,8 @@ void testConcat() {
...
@@ -72,8 +72,8 @@ void testConcat() {
}
}
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
TensorCopy
(
input_a_cpu
,
Place
(),
*
context
,
&
input_a
);
TensorCopy
(
input_a_cpu
,
Place
(),
*
context
,
&
input_a
,
true
);
TensorCopy
(
input_b_cpu
,
Place
(),
*
context
,
&
input_b
);
TensorCopy
(
input_b_cpu
,
Place
(),
*
context
,
&
input_b
,
true
);
}
}
std
::
vector
<
Tensor
>
input
;
std
::
vector
<
Tensor
>
input
;
...
@@ -89,7 +89,7 @@ void testConcat() {
...
@@ -89,7 +89,7 @@ void testConcat() {
int
*
out_ptr
;
int
*
out_ptr
;
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
TensorCopy
(
out
,
CPUPlace
(),
*
context
,
&
out_cpu
);
TensorCopy
(
out
,
CPUPlace
(),
*
context
,
&
out_cpu
,
true
);
out_ptr
=
out_cpu
.
data
<
int
>
();
out_ptr
=
out_cpu
.
data
<
int
>
();
}
else
{
}
else
{
out_ptr
=
out
.
data
<
int
>
();
out_ptr
=
out
.
data
<
int
>
();
...
@@ -144,8 +144,8 @@ void testConcat() {
...
@@ -144,8 +144,8 @@ void testConcat() {
}
}
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
TensorCopy
(
input_a_cpu
,
Place
(),
*
context
,
&
input_a
);
TensorCopy
(
input_a_cpu
,
Place
(),
*
context
,
&
input_a
,
true
);
TensorCopy
(
input_b_cpu
,
Place
(),
*
context
,
&
input_b
);
TensorCopy
(
input_b_cpu
,
Place
(),
*
context
,
&
input_b
,
true
);
}
}
input
.
clear
();
input
.
clear
();
...
@@ -159,7 +159,7 @@ void testConcat() {
...
@@ -159,7 +159,7 @@ void testConcat() {
PADDLE_ENFORCE_EQ
(
input_b
.
dims
(),
dim_b
);
PADDLE_ENFORCE_EQ
(
input_b
.
dims
(),
dim_b
);
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
TensorCopy
(
out
,
CPUPlace
(),
*
context
,
&
out_cpu
);
TensorCopy
(
out
,
CPUPlace
(),
*
context
,
&
out_cpu
,
true
);
out_ptr
=
out_cpu
.
data
<
int
>
();
out_ptr
=
out_cpu
.
data
<
int
>
();
}
else
{
}
else
{
out_ptr
=
out
.
data
<
int
>
();
out_ptr
=
out
.
data
<
int
>
();
...
@@ -216,8 +216,8 @@ void testConcat() {
...
@@ -216,8 +216,8 @@ void testConcat() {
}
}
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
TensorCopy
(
input_a_cpu
,
Place
(),
*
context
,
&
input_a
);
TensorCopy
(
input_a_cpu
,
Place
(),
*
context
,
&
input_a
,
true
);
TensorCopy
(
input_b_cpu
,
Place
(),
*
context
,
&
input_b
);
TensorCopy
(
input_b_cpu
,
Place
(),
*
context
,
&
input_b
,
true
);
}
}
input
.
clear
();
input
.
clear
();
...
@@ -231,7 +231,7 @@ void testConcat() {
...
@@ -231,7 +231,7 @@ void testConcat() {
PADDLE_ENFORCE_EQ
(
input_b
.
dims
(),
dim_b
);
PADDLE_ENFORCE_EQ
(
input_b
.
dims
(),
dim_b
);
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
TensorCopy
(
out
,
CPUPlace
(),
*
context
,
&
out_cpu
);
TensorCopy
(
out
,
CPUPlace
(),
*
context
,
&
out_cpu
,
true
);
out_ptr
=
out_cpu
.
data
<
int
>
();
out_ptr
=
out_cpu
.
data
<
int
>
();
}
else
{
}
else
{
out_ptr
=
out
.
data
<
int
>
();
out_ptr
=
out
.
data
<
int
>
();
...
@@ -290,8 +290,8 @@ void testConcat() {
...
@@ -290,8 +290,8 @@ void testConcat() {
}
}
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
TensorCopy
(
input_a_cpu
,
Place
(),
*
context
,
&
input_a
);
TensorCopy
(
input_a_cpu
,
Place
(),
*
context
,
&
input_a
,
true
);
TensorCopy
(
input_b_cpu
,
Place
(),
*
context
,
&
input_b
);
TensorCopy
(
input_b_cpu
,
Place
(),
*
context
,
&
input_b
,
true
);
}
}
input
.
clear
();
input
.
clear
();
...
@@ -305,7 +305,7 @@ void testConcat() {
...
@@ -305,7 +305,7 @@ void testConcat() {
PADDLE_ENFORCE_EQ
(
input_b
.
dims
(),
dim_b
);
PADDLE_ENFORCE_EQ
(
input_b
.
dims
(),
dim_b
);
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
if
(
paddle
::
platform
::
is_gpu_place
(
Place
()))
{
TensorCopy
(
out
,
CPUPlace
(),
*
context
,
&
out_cpu
);
TensorCopy
(
out
,
CPUPlace
(),
*
context
,
&
out_cpu
,
true
);
out_ptr
=
out_cpu
.
data
<
int
>
();
out_ptr
=
out_cpu
.
data
<
int
>
();
}
else
{
}
else
{
out_ptr
=
out
.
data
<
int
>
();
out_ptr
=
out
.
data
<
int
>
();
...
...
paddle/fluid/operators/math/im2col_test.cc
浏览文件 @
bcf260e1
...
@@ -62,7 +62,7 @@ void testIm2col() {
...
@@ -62,7 +62,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
input
=
input_tmp
;
}
else
{
}
else
{
TensorCopy
(
input_tmp
,
*
place
,
*
context
,
&
input
);
TensorCopy
(
input_tmp
,
*
place
,
*
context
,
&
input
,
true
);
}
}
output_cfo
.
mutable_data
<
float
>
(
output_cfo
.
mutable_data
<
float
>
(
{
1
,
filter_size
,
filter_size
,
output_height
,
output_width
},
*
place
);
{
1
,
filter_size
,
filter_size
,
output_height
,
output_width
},
*
place
);
...
@@ -87,7 +87,8 @@ void testIm2col() {
...
@@ -87,7 +87,8 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
out_cfo_ptr
=
output_cfo
.
data
<
float
>
();
out_cfo_ptr
=
output_cfo
.
data
<
float
>
();
}
else
{
}
else
{
TensorCopy
(
output_cfo
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
);
TensorCopy
(
output_cfo
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
,
true
);
out_cfo_ptr
=
output_tmp
.
data
<
float
>
();
out_cfo_ptr
=
output_tmp
.
data
<
float
>
();
}
}
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
...
@@ -98,7 +99,8 @@ void testIm2col() {
...
@@ -98,7 +99,8 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
out_ocf_ptr
=
output_ocf
.
data
<
float
>
();
out_ocf_ptr
=
output_ocf
.
data
<
float
>
();
}
else
{
}
else
{
TensorCopy
(
output_ocf
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
);
TensorCopy
(
output_ocf
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
,
true
);
out_ocf_ptr
=
output_tmp
.
data
<
float
>
();
out_ocf_ptr
=
output_tmp
.
data
<
float
>
();
}
}
...
@@ -119,7 +121,7 @@ void testIm2col() {
...
@@ -119,7 +121,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
input
=
input_tmp
;
}
else
{
}
else
{
TensorCopy
(
input_tmp
,
*
place
,
*
context
,
&
input
);
TensorCopy
(
input_tmp
,
*
place
,
*
context
,
&
input
,
true
);
}
}
col2im
(
*
context
,
output_cfo
,
dilation
,
stride
,
padding
,
&
input
);
col2im
(
*
context
,
output_cfo
,
dilation
,
stride
,
padding
,
&
input
);
...
@@ -128,7 +130,7 @@ void testIm2col() {
...
@@ -128,7 +130,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
in_ptr
=
input
.
data
<
float
>
();
in_ptr
=
input
.
data
<
float
>
();
}
else
{
}
else
{
TensorCopy
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
);
TensorCopy
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
,
true
);
in_ptr
=
input_tmp
.
data
<
float
>
();
in_ptr
=
input_tmp
.
data
<
float
>
();
}
}
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
...
@@ -140,7 +142,7 @@ void testIm2col() {
...
@@ -140,7 +142,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
input
=
input_tmp
;
}
else
{
}
else
{
TensorCopy
(
input_tmp
,
*
place
,
*
context
,
&
input
);
TensorCopy
(
input_tmp
,
*
place
,
*
context
,
&
input
,
true
);
}
}
col2im_ocf
(
*
context
,
output_ocf
,
dilation
,
stride
,
padding
,
&
input
);
col2im_ocf
(
*
context
,
output_ocf
,
dilation
,
stride
,
padding
,
&
input
);
...
@@ -148,7 +150,7 @@ void testIm2col() {
...
@@ -148,7 +150,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
in_ptr
=
input
.
data
<
float
>
();
in_ptr
=
input
.
data
<
float
>
();
}
else
{
}
else
{
TensorCopy
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
);
TensorCopy
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
,
true
);
in_ptr
=
input_tmp
.
data
<
float
>
();
in_ptr
=
input_tmp
.
data
<
float
>
();
}
}
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
...
...
paddle/fluid/operators/math/math_function_test.cu
浏览文件 @
bcf260e1
...
@@ -40,15 +40,15 @@ TEST(math_function, notrans_mul_trans_fp32) {
...
@@ -40,15 +40,15 @@ TEST(math_function, notrans_mul_trans_fp32) {
float
arr
[
6
]
=
{
0
,
1
,
2
,
3
,
4
,
5
};
float
arr
[
6
]
=
{
0
,
1
,
2
,
3
,
4
,
5
};
memcpy
(
input1_ptr
,
arr
,
6
*
sizeof
(
float
));
memcpy
(
input1_ptr
,
arr
,
6
*
sizeof
(
float
));
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
,
true
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input2_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input2_gpu
,
true
);
out_gpu
.
mutable_data
<
float
>
({
2
,
2
},
gpu_place
);
out_gpu
.
mutable_data
<
float
>
({
2
,
2
},
gpu_place
);
paddle
::
operators
::
math
::
matmul
<
CUDADeviceContext
,
float
>
(
paddle
::
operators
::
math
::
matmul
<
CUDADeviceContext
,
float
>
(
context
,
input1_gpu
,
false
,
input2_gpu
,
true
,
1
,
&
out_gpu
,
0
);
context
,
input1_gpu
,
false
,
input2_gpu
,
true
,
1
,
&
out_gpu
,
0
);
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
,
true
);
float
*
out_ptr
=
out
.
data
<
float
>
();
float
*
out_ptr
=
out
.
data
<
float
>
();
context
.
Wait
();
context
.
Wait
();
...
@@ -80,8 +80,8 @@ TEST(math_function, notrans_mul_trans_fp16) {
...
@@ -80,8 +80,8 @@ TEST(math_function, notrans_mul_trans_fp16) {
float16
*
input1_ptr
=
input1
.
mutable_data
<
float16
>
({
2
,
3
},
cpu_place
);
float16
*
input1_ptr
=
input1
.
mutable_data
<
float16
>
({
2
,
3
},
cpu_place
);
fill_fp16_data
(
input1_ptr
,
input1
.
numel
(),
{
0
,
1
,
2
,
3
,
4
,
5
});
fill_fp16_data
(
input1_ptr
,
input1
.
numel
(),
{
0
,
1
,
2
,
3
,
4
,
5
});
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
,
true
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input2_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input2_gpu
,
true
);
out_gpu
.
mutable_data
<
float16
>
({
2
,
2
},
gpu_place
);
out_gpu
.
mutable_data
<
float16
>
({
2
,
2
},
gpu_place
);
...
@@ -89,7 +89,7 @@ TEST(math_function, notrans_mul_trans_fp16) {
...
@@ -89,7 +89,7 @@ TEST(math_function, notrans_mul_trans_fp16) {
context
,
input1_gpu
,
false
,
input2_gpu
,
true
,
float16
(
1
),
&
out_gpu
,
context
,
input1_gpu
,
false
,
input2_gpu
,
true
,
float16
(
1
),
&
out_gpu
,
float16
(
0
));
float16
(
0
));
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
,
true
);
float16
*
out_ptr
=
out
.
data
<
float16
>
();
float16
*
out_ptr
=
out
.
data
<
float16
>
();
context
.
Wait
();
context
.
Wait
();
...
@@ -117,15 +117,15 @@ TEST(math_function, trans_mul_notrans_fp32) {
...
@@ -117,15 +117,15 @@ TEST(math_function, trans_mul_notrans_fp32) {
float
arr
[
6
]
=
{
0
,
1
,
2
,
3
,
4
,
5
};
float
arr
[
6
]
=
{
0
,
1
,
2
,
3
,
4
,
5
};
memcpy
(
input1_ptr
,
arr
,
6
*
sizeof
(
float
));
memcpy
(
input1_ptr
,
arr
,
6
*
sizeof
(
float
));
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
,
true
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input2_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input2_gpu
,
true
);
out_gpu
.
mutable_data
<
float
>
({
3
,
3
},
gpu_place
);
out_gpu
.
mutable_data
<
float
>
({
3
,
3
},
gpu_place
);
paddle
::
operators
::
math
::
matmul
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
paddle
::
operators
::
math
::
matmul
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
context
,
input1_gpu
,
true
,
input2_gpu
,
false
,
1
,
&
out_gpu
,
0
);
context
,
input1_gpu
,
true
,
input2_gpu
,
false
,
1
,
&
out_gpu
,
0
);
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
,
true
);
float
*
out_ptr
=
out
.
data
<
float
>
();
float
*
out_ptr
=
out
.
data
<
float
>
();
context
.
Wait
();
context
.
Wait
();
...
@@ -162,8 +162,8 @@ TEST(math_function, trans_mul_notrans_fp16) {
...
@@ -162,8 +162,8 @@ TEST(math_function, trans_mul_notrans_fp16) {
float16
*
input1_ptr
=
input1
.
mutable_data
<
float16
>
({
2
,
3
},
cpu_place
);
float16
*
input1_ptr
=
input1
.
mutable_data
<
float16
>
({
2
,
3
},
cpu_place
);
fill_fp16_data
(
input1_ptr
,
input1
.
numel
(),
{
0
,
1
,
2
,
3
,
4
,
5
});
fill_fp16_data
(
input1_ptr
,
input1
.
numel
(),
{
0
,
1
,
2
,
3
,
4
,
5
});
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
,
true
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input2_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input2_gpu
,
true
);
out_gpu
.
mutable_data
<
float16
>
({
3
,
3
},
gpu_place
);
out_gpu
.
mutable_data
<
float16
>
({
3
,
3
},
gpu_place
);
...
@@ -171,7 +171,7 @@ TEST(math_function, trans_mul_notrans_fp16) {
...
@@ -171,7 +171,7 @@ TEST(math_function, trans_mul_notrans_fp16) {
context
,
input1_gpu
,
true
,
input2_gpu
,
false
,
float16
(
1
),
&
out_gpu
,
context
,
input1_gpu
,
true
,
input2_gpu
,
false
,
float16
(
1
),
&
out_gpu
,
float16
(
0
));
float16
(
0
));
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
,
true
);
float16
*
out_ptr
=
out
.
data
<
float16
>
();
float16
*
out_ptr
=
out
.
data
<
float16
>
();
context
.
Wait
();
context
.
Wait
();
...
@@ -214,9 +214,9 @@ TEST(math_function, gemm_notrans_cublas_fp32) {
...
@@ -214,9 +214,9 @@ TEST(math_function, gemm_notrans_cublas_fp32) {
float
arr3
[
8
]
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
float
arr3
[
8
]
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
memcpy
(
input3_ptr
,
arr3
,
8
*
sizeof
(
float
));
memcpy
(
input3_ptr
,
arr3
,
8
*
sizeof
(
float
));
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
,
true
);
TensorCopy
(
input2
,
gpu_place
,
context
,
&
input2_gpu
);
TensorCopy
(
input2
,
gpu_place
,
context
,
&
input2_gpu
,
true
);
TensorCopy
(
input3
,
gpu_place
,
context
,
&
input3_gpu
);
TensorCopy
(
input3
,
gpu_place
,
context
,
&
input3_gpu
,
true
);
float
*
a
=
input1_gpu
.
data
<
float
>
();
float
*
a
=
input1_gpu
.
data
<
float
>
();
float
*
b
=
input2_gpu
.
data
<
float
>
();
float
*
b
=
input2_gpu
.
data
<
float
>
();
float
*
c
=
input3_gpu
.
mutable_data
<
float
>
(
gpu_place
);
float
*
c
=
input3_gpu
.
mutable_data
<
float
>
(
gpu_place
);
...
@@ -224,7 +224,7 @@ TEST(math_function, gemm_notrans_cublas_fp32) {
...
@@ -224,7 +224,7 @@ TEST(math_function, gemm_notrans_cublas_fp32) {
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
context
,
false
,
false
,
m
,
n
,
k
,
1
,
a
,
3
,
b
+
1
,
4
,
1
,
c
+
1
,
4
);
context
,
false
,
false
,
m
,
n
,
k
,
1
,
a
,
3
,
b
+
1
,
4
,
1
,
c
+
1
,
4
);
TensorCopy
(
input3_gpu
,
cpu_place
,
context
,
&
input3
);
TensorCopy
(
input3_gpu
,
cpu_place
,
context
,
&
input3
,
true
);
// numpy code:
// numpy code:
// a = np.arange(6).reshape(2, 3)
// a = np.arange(6).reshape(2, 3)
...
@@ -274,9 +274,9 @@ TEST(math_function, gemm_notrans_cublas_fp16) {
...
@@ -274,9 +274,9 @@ TEST(math_function, gemm_notrans_cublas_fp16) {
float16
*
input3_ptr
=
input3
.
mutable_data
<
float16
>
({
2
,
4
},
cpu_place
);
float16
*
input3_ptr
=
input3
.
mutable_data
<
float16
>
({
2
,
4
},
cpu_place
);
fill_fp16_data
(
input3_ptr
,
input3
.
numel
(),
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
});
fill_fp16_data
(
input3_ptr
,
input3
.
numel
(),
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
});
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
,
true
);
TensorCopy
(
input2
,
gpu_place
,
context
,
&
input2_gpu
);
TensorCopy
(
input2
,
gpu_place
,
context
,
&
input2_gpu
,
true
);
TensorCopy
(
input3
,
gpu_place
,
context
,
&
input3_gpu
);
TensorCopy
(
input3
,
gpu_place
,
context
,
&
input3_gpu
,
true
);
float16
*
a
=
input1_gpu
.
data
<
float16
>
();
float16
*
a
=
input1_gpu
.
data
<
float16
>
();
float16
*
b
=
input2_gpu
.
data
<
float16
>
();
float16
*
b
=
input2_gpu
.
data
<
float16
>
();
float16
*
c
=
input3_gpu
.
mutable_data
<
float16
>
(
gpu_place
);
float16
*
c
=
input3_gpu
.
mutable_data
<
float16
>
(
gpu_place
);
...
@@ -285,7 +285,7 @@ TEST(math_function, gemm_notrans_cublas_fp16) {
...
@@ -285,7 +285,7 @@ TEST(math_function, gemm_notrans_cublas_fp16) {
context
,
false
,
false
,
m
,
n
,
k
,
float16
(
1
),
a
,
3
,
b
+
1
,
4
,
float16
(
1
),
context
,
false
,
false
,
m
,
n
,
k
,
float16
(
1
),
a
,
3
,
b
+
1
,
4
,
float16
(
1
),
c
+
1
,
4
);
c
+
1
,
4
);
TensorCopy
(
input3_gpu
,
cpu_place
,
context
,
&
input3
);
TensorCopy
(
input3_gpu
,
cpu_place
,
context
,
&
input3
,
true
);
// numpy code:
// numpy code:
// a = np.arange(6).reshape(2, 3)
// a = np.arange(6).reshape(2, 3)
...
@@ -332,9 +332,9 @@ TEST(math_function, gemm_trans_cublas_fp32) {
...
@@ -332,9 +332,9 @@ TEST(math_function, gemm_trans_cublas_fp32) {
float
arr3
[
8
]
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
float
arr3
[
8
]
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
memcpy
(
input3_ptr
,
arr3
,
8
*
sizeof
(
float
));
memcpy
(
input3_ptr
,
arr3
,
8
*
sizeof
(
float
));
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
,
true
);
TensorCopy
(
input2
,
gpu_place
,
context
,
&
input2_gpu
);
TensorCopy
(
input2
,
gpu_place
,
context
,
&
input2_gpu
,
true
);
TensorCopy
(
input3
,
gpu_place
,
context
,
&
input3_gpu
);
TensorCopy
(
input3
,
gpu_place
,
context
,
&
input3_gpu
,
true
);
float
*
a
=
input1_gpu
.
data
<
float
>
();
float
*
a
=
input1_gpu
.
data
<
float
>
();
float
*
b
=
input2_gpu
.
data
<
float
>
();
float
*
b
=
input2_gpu
.
data
<
float
>
();
float
*
c
=
input3_gpu
.
mutable_data
<
float
>
(
gpu_place
);
float
*
c
=
input3_gpu
.
mutable_data
<
float
>
(
gpu_place
);
...
@@ -342,7 +342,7 @@ TEST(math_function, gemm_trans_cublas_fp32) {
...
@@ -342,7 +342,7 @@ TEST(math_function, gemm_trans_cublas_fp32) {
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
context
,
false
,
true
,
m
,
n
,
k
,
1
,
a
,
3
,
b
+
3
,
3
,
1
,
c
+
1
,
4
);
context
,
false
,
true
,
m
,
n
,
k
,
1
,
a
,
3
,
b
+
3
,
3
,
1
,
c
+
1
,
4
);
TensorCopy
(
input3_gpu
,
cpu_place
,
context
,
&
input3
);
TensorCopy
(
input3_gpu
,
cpu_place
,
context
,
&
input3
,
true
);
context
.
Wait
();
context
.
Wait
();
EXPECT_EQ
(
input3_ptr
[
0
],
0
);
EXPECT_EQ
(
input3_ptr
[
0
],
0
);
...
@@ -386,9 +386,9 @@ TEST(math_function, gemm_trans_cublas_fp16) {
...
@@ -386,9 +386,9 @@ TEST(math_function, gemm_trans_cublas_fp16) {
float16
*
input3_ptr
=
input3
.
mutable_data
<
float16
>
({
2
,
4
},
cpu_place
);
float16
*
input3_ptr
=
input3
.
mutable_data
<
float16
>
({
2
,
4
},
cpu_place
);
fill_fp16_data
(
input3_ptr
,
input3
.
numel
(),
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
});
fill_fp16_data
(
input3_ptr
,
input3
.
numel
(),
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
});
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
);
TensorCopy
(
input1
,
gpu_place
,
context
,
&
input1_gpu
,
true
);
TensorCopy
(
input2
,
gpu_place
,
context
,
&
input2_gpu
);
TensorCopy
(
input2
,
gpu_place
,
context
,
&
input2_gpu
,
true
);
TensorCopy
(
input3
,
gpu_place
,
context
,
&
input3_gpu
);
TensorCopy
(
input3
,
gpu_place
,
context
,
&
input3_gpu
,
true
);
float16
*
a
=
input1_gpu
.
data
<
float16
>
();
float16
*
a
=
input1_gpu
.
data
<
float16
>
();
float16
*
b
=
input2_gpu
.
data
<
float16
>
();
float16
*
b
=
input2_gpu
.
data
<
float16
>
();
float16
*
c
=
input3_gpu
.
mutable_data
<
float16
>
(
gpu_place
);
float16
*
c
=
input3_gpu
.
mutable_data
<
float16
>
(
gpu_place
);
...
@@ -397,7 +397,7 @@ TEST(math_function, gemm_trans_cublas_fp16) {
...
@@ -397,7 +397,7 @@ TEST(math_function, gemm_trans_cublas_fp16) {
context
,
false
,
true
,
m
,
n
,
k
,
float16
(
1
),
a
,
3
,
b
+
3
,
3
,
float16
(
1
),
context
,
false
,
true
,
m
,
n
,
k
,
float16
(
1
),
a
,
3
,
b
+
3
,
3
,
float16
(
1
),
c
+
1
,
4
);
c
+
1
,
4
);
TensorCopy
(
input3_gpu
,
cpu_place
,
context
,
&
input3
);
TensorCopy
(
input3_gpu
,
cpu_place
,
context
,
&
input3
,
true
);
context
.
Wait
();
context
.
Wait
();
EXPECT_EQ
(
static_cast
<
float
>
(
input3_ptr
[
0
]),
0
);
EXPECT_EQ
(
static_cast
<
float
>
(
input3_ptr
[
0
]),
0
);
...
@@ -441,14 +441,14 @@ void GemvTest(int m, int n, bool trans) {
...
@@ -441,14 +441,14 @@ void GemvTest(int m, int n, bool trans) {
data_b
[
i
]
=
static_cast
<
T
>
(
i
);
data_b
[
i
]
=
static_cast
<
T
>
(
i
);
}
}
TensorCopy
(
mat_a
,
gpu_place
,
context
,
&
g_mat_a
);
TensorCopy
(
mat_a
,
gpu_place
,
context
,
&
g_mat_a
,
true
);
TensorCopy
(
vec_b
,
gpu_place
,
context
,
&
g_vec_b
);
TensorCopy
(
vec_b
,
gpu_place
,
context
,
&
g_vec_b
,
true
);
paddle
::
operators
::
math
::
gemv
<
CUDADeviceContext
,
T
>
(
paddle
::
operators
::
math
::
gemv
<
CUDADeviceContext
,
T
>
(
context
,
trans
,
static_cast
<
int
>
(
m
),
static_cast
<
int
>
(
n
),
1.
,
g_data_a
,
context
,
trans
,
static_cast
<
int
>
(
m
),
static_cast
<
int
>
(
n
),
1.
,
g_data_a
,
g_data_b
,
0.
,
g_data_c
);
g_data_b
,
0.
,
g_data_c
);
TensorCopy
(
g_vec_c
,
cpu_place
,
context
,
&
vec_c
);
TensorCopy
(
g_vec_c
,
cpu_place
,
context
,
&
vec_c
,
true
);
if
(
!
trans
)
{
if
(
!
trans
)
{
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
...
...
paddle/fluid/operators/math/vol2col_test.cc
浏览文件 @
bcf260e1
...
@@ -71,7 +71,7 @@ void testVol2col() {
...
@@ -71,7 +71,7 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
input
=
input_tmp
;
}
else
{
}
else
{
paddle
::
framework
::
TensorCopy
(
input_tmp
,
*
place
,
*
context
,
&
input
);
paddle
::
framework
::
TensorCopy
(
input_tmp
,
*
place
,
*
context
,
&
input
,
true
);
}
}
output
.
mutable_data
<
float
>
({
1
,
filter_size
,
filter_size
,
filter_size
,
output
.
mutable_data
<
float
>
({
1
,
filter_size
,
filter_size
,
filter_size
,
output_depth
,
output_height
,
output_width
},
output_depth
,
output_height
,
output_width
},
...
@@ -85,7 +85,8 @@ void testVol2col() {
...
@@ -85,7 +85,8 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
out_cfo_ptr
=
output
.
data
<
float
>
();
out_cfo_ptr
=
output
.
data
<
float
>
();
}
else
{
}
else
{
TensorCopy
(
output
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
);
TensorCopy
(
output
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
,
true
);
out_cfo_ptr
=
output_tmp
.
data
<
float
>
();
out_cfo_ptr
=
output_tmp
.
data
<
float
>
();
}
}
...
@@ -99,7 +100,7 @@ void testVol2col() {
...
@@ -99,7 +100,7 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
input
=
input_tmp
;
}
else
{
}
else
{
TensorCopy
(
input_tmp
,
*
place
,
*
context
,
&
input
);
TensorCopy
(
input_tmp
,
*
place
,
*
context
,
&
input
,
true
);
}
}
paddle
::
operators
::
math
::
Col2VolFunctor
<
DeviceContext
,
float
>
col2vol
;
paddle
::
operators
::
math
::
Col2VolFunctor
<
DeviceContext
,
float
>
col2vol
;
...
@@ -109,7 +110,7 @@ void testVol2col() {
...
@@ -109,7 +110,7 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
in_ptr
=
input
.
data
<
float
>
();
in_ptr
=
input
.
data
<
float
>
();
}
else
{
}
else
{
TensorCopy
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
);
TensorCopy
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
,
true
);
in_ptr
=
input_tmp
.
data
<
float
>
();
in_ptr
=
input_tmp
.
data
<
float
>
();
}
}
...
...
paddle/fluid/operators/nccl_op_test.cu.cc
浏览文件 @
bcf260e1
...
@@ -228,10 +228,8 @@ TEST_F(NCCLTester, ncclReduceOp) {
...
@@ -228,10 +228,8 @@ TEST_F(NCCLTester, ncclReduceOp) {
result_tensor
->
Resize
(
kDims
);
result_tensor
->
Resize
(
kDims
);
auto
*
ct
=
result_tensor
->
mutable_data
<
float
>
(
cpu_place
);
auto
*
ct
=
result_tensor
->
mutable_data
<
float
>
(
cpu_place
);
paddle
::
memory
::
Copy
(
paddle
::
memory
::
Copy
(
cpu_place
,
ct
,
p
::
CUDAPlace
(
gpu_list_
[
kRoot
]),
rt
,
cpu_place
,
ct
,
p
::
CUDAPlace
(
gpu_list_
[
kRoot
]),
rt
,
recv_tensor
.
numel
()
*
sizeof
(
float
),
nullptr
);
recv_tensor
.
numel
()
*
sizeof
(
float
),
static_cast
<
p
::
CUDADeviceContext
*>
(
dev_ctxs_
[
kRoot
])
->
stream
());
for
(
int64_t
j
=
0
;
j
<
f
::
product
(
kDims
);
++
j
)
{
for
(
int64_t
j
=
0
;
j
<
f
::
product
(
kDims
);
++
j
)
{
ASSERT_NEAR
(
ct
[
j
],
expected_result
,
1e-5
);
ASSERT_NEAR
(
ct
[
j
],
expected_result
,
1e-5
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录