Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
1d756746
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1d756746
编写于
4月 21, 2018
作者:
Q
qiaolongfei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add-async-listen-and-serv-op
上级
0763ae9a
6402b59a
变更
45
展开全部
隐藏空白更改
内联
并排
Showing
45 changed file
with
801 addition
and
778 deletion
+801
-778
Dockerfile
Dockerfile
+3
-0
paddle/fluid/framework/block_desc.cc
paddle/fluid/framework/block_desc.cc
+1
-0
paddle/fluid/framework/data_device_transform_test.cu
paddle/fluid/framework/data_device_transform_test.cu
+10
-9
paddle/fluid/framework/data_layout_transform_test.cc
paddle/fluid/framework/data_layout_transform_test.cc
+21
-20
paddle/fluid/framework/data_type_transform_test.cc
paddle/fluid/framework/data_type_transform_test.cc
+76
-50
paddle/fluid/framework/data_type_transform_test.cu
paddle/fluid/framework/data_type_transform_test.cu
+119
-80
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+9
-8
paddle/fluid/framework/details/broadcast_op_handle.cc
paddle/fluid/framework/details/broadcast_op_handle.cc
+13
-8
paddle/fluid/framework/details/broadcast_op_handle_test.cc
paddle/fluid/framework/details/broadcast_op_handle_test.cc
+18
-6
paddle/fluid/framework/details/cow_ptr.h
paddle/fluid/framework/details/cow_ptr.h
+2
-2
paddle/fluid/framework/details/gather_op_handle.cc
paddle/fluid/framework/details/gather_op_handle.cc
+10
-5
paddle/fluid/framework/details/gather_op_handle_test.cc
paddle/fluid/framework/details/gather_op_handle_test.cc
+15
-6
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
+5
-5
paddle/fluid/framework/details/op_registry.h
paddle/fluid/framework/details/op_registry.h
+3
-0
paddle/fluid/framework/details/reduce_and_gather.h
paddle/fluid/framework/details/reduce_and_gather.h
+4
-4
paddle/fluid/framework/details/reduce_op_handle.cc
paddle/fluid/framework/details/reduce_op_handle.cc
+76
-68
paddle/fluid/framework/details/reduce_op_handle.h
paddle/fluid/framework/details/reduce_op_handle.h
+6
-3
paddle/fluid/framework/details/reduce_op_handle_test.cc
paddle/fluid/framework/details/reduce_op_handle_test.cc
+34
-23
paddle/fluid/framework/op_registry_test.cc
paddle/fluid/framework/op_registry_test.cc
+3
-2
paddle/fluid/framework/operator_test.cc
paddle/fluid/framework/operator_test.cc
+7
-9
paddle/fluid/framework/program_desc.cc
paddle/fluid/framework/program_desc.cc
+5
-1
paddle/fluid/framework/program_desc.h
paddle/fluid/framework/program_desc.h
+2
-0
paddle/fluid/framework/threadpool_test.cc
paddle/fluid/framework/threadpool_test.cc
+4
-4
paddle/fluid/inference/io.cc
paddle/fluid/inference/io.cc
+12
-11
paddle/fluid/inference/io.h
paddle/fluid/inference/io.h
+5
-5
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+2
-2
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+5
-0
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+199
-361
paddle/fluid/operators/activation_op.cu
paddle/fluid/operators/activation_op.cu
+0
-1
paddle/fluid/operators/activation_op.h
paddle/fluid/operators/activation_op.h
+48
-10
paddle/fluid/operators/dropout_op.cu
paddle/fluid/operators/dropout_op.cu
+29
-21
paddle/fluid/operators/dropout_op.h
paddle/fluid/operators/dropout_op.h
+6
-6
paddle/fluid/operators/dropout_op_test.cc
paddle/fluid/operators/dropout_op_test.cc
+12
-17
paddle/fluid/operators/mkldnn_activation_op.h
paddle/fluid/operators/mkldnn_activation_op.h
+1
-1
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+1
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+4
-0
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+2
-2
python/paddle/fluid/io.py
python/paddle/fluid/io.py
+4
-2
python/paddle/fluid/layer_helper.py
python/paddle/fluid/layer_helper.py
+5
-1
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+8
-3
python/paddle/fluid/tests/unittests/test_activation_op.py
python/paddle/fluid/tests/unittests/test_activation_op.py
+3
-13
python/paddle/fluid/tests/unittests/test_batch_norm_op.py
python/paddle/fluid/tests/unittests/test_batch_norm_op.py
+4
-1
python/paddle/fluid/tests/unittests/test_multi_file_reader.py
...on/paddle/fluid/tests/unittests/test_multi_file_reader.py
+0
-1
python/paddle/fluid/tests/unittests/test_multi_pass_reader.py
...on/paddle/fluid/tests/unittests/test_multi_pass_reader.py
+2
-4
python/paddle/fluid/tests/unittests/test_recordio_reader.py
python/paddle/fluid/tests/unittests/test_recordio_reader.py
+3
-3
未找到文件。
Dockerfile
浏览文件 @
1d756746
# A image for building paddle binaries
# A image for building paddle binaries
# Use cuda devel base image for both cpu and gpu environment
# Use cuda devel base image for both cpu and gpu environment
# When you modify it, please be aware of cudnn-runtime version
# and libcudnn.so.x in paddle/scripts/docker/build.sh
FROM
nvidia/cuda:8.0-cudnn7-devel-ubuntu16.04
FROM
nvidia/cuda:8.0-cudnn7-devel-ubuntu16.04
MAINTAINER
PaddlePaddle Authors <paddle-dev@baidu.com>
MAINTAINER
PaddlePaddle Authors <paddle-dev@baidu.com>
...
...
paddle/fluid/framework/block_desc.cc
浏览文件 @
1d756746
...
@@ -146,6 +146,7 @@ void BlockDesc::RemoveOp(size_t s, size_t e) {
...
@@ -146,6 +146,7 @@ void BlockDesc::RemoveOp(size_t s, size_t e) {
if
(
ops_
.
begin
()
+
s
==
ops_
.
end
()
||
ops_
.
begin
()
+
e
==
ops_
.
end
())
{
if
(
ops_
.
begin
()
+
s
==
ops_
.
end
()
||
ops_
.
begin
()
+
e
==
ops_
.
end
())
{
return
;
return
;
}
}
need_update_
=
true
;
ops_
.
erase
(
ops_
.
begin
()
+
s
,
ops_
.
begin
()
+
e
);
ops_
.
erase
(
ops_
.
begin
()
+
s
,
ops_
.
begin
()
+
e
);
}
}
...
...
paddle/fluid/framework/data_device_transform_test.cu
浏览文件 @
1d756746
...
@@ -103,9 +103,7 @@ static void BuildVar(const std::string& param_name,
...
@@ -103,9 +103,7 @@ static void BuildVar(const std::string& param_name,
}
}
TEST
(
Operator
,
CPUtoGPU
)
{
TEST
(
Operator
,
CPUtoGPU
)
{
using
namespace
paddle
::
framework
;
paddle
::
framework
::
InitDevices
(
true
);
using
namespace
paddle
::
platform
;
InitDevices
(
true
);
paddle
::
framework
::
Scope
scope
;
paddle
::
framework
::
Scope
scope
;
paddle
::
platform
::
CPUPlace
cpu_place
;
paddle
::
platform
::
CPUPlace
cpu_place
;
...
@@ -118,8 +116,9 @@ TEST(Operator, CPUtoGPU) {
...
@@ -118,8 +116,9 @@ TEST(Operator, CPUtoGPU) {
auto
cpu_op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
cpu_op_desc
);
auto
cpu_op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
cpu_op_desc
);
// prepare input
// prepare input
auto
*
in_t
=
scope
.
Var
(
"IN1"
)
->
GetMutable
<
LoDTensor
>
();
auto
*
in_t
=
scope
.
Var
(
"IN1"
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
auto
*
src_ptr
=
in_t
->
mutable_data
<
float
>
({
2
,
3
},
CPUPlace
());
auto
*
src_ptr
=
in_t
->
mutable_data
<
float
>
({
2
,
3
},
paddle
::
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
2
*
3
;
++
i
)
{
for
(
int
i
=
0
;
i
<
2
*
3
;
++
i
)
{
src_ptr
[
i
]
=
static_cast
<
float
>
(
i
);
src_ptr
[
i
]
=
static_cast
<
float
>
(
i
);
}
}
...
@@ -128,7 +127,7 @@ TEST(Operator, CPUtoGPU) {
...
@@ -128,7 +127,7 @@ TEST(Operator, CPUtoGPU) {
auto
*
output
=
scope
.
Var
(
"OUT1"
);
auto
*
output
=
scope
.
Var
(
"OUT1"
);
cpu_op
->
Run
(
scope
,
cpu_place
);
cpu_op
->
Run
(
scope
,
cpu_place
);
auto
*
output_ptr
=
output
->
Get
<
LoDTensor
>
().
data
<
float
>
();
auto
*
output_ptr
=
output
->
Get
<
paddle
::
framework
::
LoDTensor
>
().
data
<
float
>
();
for
(
int
i
=
0
;
i
<
2
*
3
;
++
i
)
{
for
(
int
i
=
0
;
i
<
2
*
3
;
++
i
)
{
ASSERT_EQ
(
output_ptr
[
i
],
static_cast
<
float
>
(
i
)
*
2
);
ASSERT_EQ
(
output_ptr
[
i
],
static_cast
<
float
>
(
i
)
*
2
);
}
}
...
@@ -153,12 +152,14 @@ TEST(Operator, CPUtoGPU) {
...
@@ -153,12 +152,14 @@ TEST(Operator, CPUtoGPU) {
VLOG
(
3
)
<<
"after gpu_op run"
;
VLOG
(
3
)
<<
"after gpu_op run"
;
// auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
// auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
DeviceContextPool
&
pool
=
DeviceContextPool
::
Instance
();
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
auto
dev_ctx
=
pool
.
Get
(
cuda_place
);
auto
dev_ctx
=
pool
.
Get
(
cuda_place
);
paddle
::
framework
::
Tensor
output_tensor
;
paddle
::
framework
::
Tensor
output_tensor
;
TensorCopy
(
output2
->
Get
<
LoDTensor
>
(),
paddle
::
platform
::
CPUPlace
(),
*
dev_ctx
,
paddle
::
framework
::
TensorCopy
(
output2
->
Get
<
paddle
::
framework
::
LoDTensor
>
(),
&
output_tensor
);
paddle
::
platform
::
CPUPlace
(),
*
dev_ctx
,
&
output_tensor
);
dev_ctx
->
Wait
();
dev_ctx
->
Wait
();
float
*
output2_ptr
=
output_tensor
.
data
<
float
>
();
float
*
output2_ptr
=
output_tensor
.
data
<
float
>
();
...
...
paddle/fluid/framework/data_layout_transform_test.cc
浏览文件 @
1d756746
...
@@ -18,27 +18,28 @@
...
@@ -18,27 +18,28 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
TEST
(
DataTransform
,
DataLayoutFunction
)
{
TEST
(
DataTransform
,
DataLayoutFunction
)
{
using
namespace
paddle
::
framework
;
auto
place
=
paddle
::
platform
::
CPUPlace
();
using
namespace
paddle
::
platform
;
paddle
::
framework
::
Tensor
in
=
paddle
::
framework
::
Tensor
();
paddle
::
framework
::
Tensor
out
=
paddle
::
framework
::
Tensor
();
auto
place
=
CPUPlace
();
in
.
mutable_data
<
double
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
,
1
,
2
}),
place
);
Tensor
in
=
Tensor
();
in
.
set_layout
(
paddle
::
framework
::
DataLayout
::
kNHWC
);
Tensor
out
=
Tensor
();
in
.
mutable_data
<
double
>
(
make_ddim
({
2
,
3
,
1
,
2
}),
place
);
auto
kernel_nhwc
=
paddle
::
framework
::
OpKernelType
(
in
.
set_layout
(
DataLayout
::
kNHWC
);
paddle
::
framework
::
proto
::
VarType
::
FP32
,
place
,
paddle
::
framework
::
DataLayout
::
kNHWC
,
auto
kernel_nhwc
=
OpKernelType
(
proto
::
VarType
::
FP32
,
place
,
paddle
::
framework
::
LibraryType
::
kPlain
);
DataLayout
::
kNHWC
,
LibraryType
::
kPlain
);
auto
kernel_ncwh
=
paddle
::
framework
::
OpKernelType
(
auto
kernel_ncwh
=
OpKernelType
(
proto
::
VarType
::
FP32
,
place
,
paddle
::
framework
::
proto
::
VarType
::
FP32
,
place
,
DataLayout
::
kNCHW
,
LibraryType
::
kPlain
);
paddle
::
framework
::
DataLayout
::
kNCHW
,
paddle
::
framework
::
LibraryType
::
kPlain
);
TransDataLayout
(
kernel_nhwc
,
kernel_ncwh
,
in
,
&
out
);
paddle
::
framework
::
TransDataLayout
(
kernel_nhwc
,
kernel_ncwh
,
in
,
&
out
);
EXPECT_TRUE
(
out
.
layout
()
==
DataLayout
::
kNCHW
);
EXPECT_TRUE
(
out
.
dims
()
==
make_ddim
({
2
,
2
,
3
,
1
}));
EXPECT_TRUE
(
out
.
layout
()
==
paddle
::
framework
::
DataLayout
::
kNCHW
);
EXPECT_TRUE
(
out
.
dims
()
==
paddle
::
framework
::
make_ddim
({
2
,
2
,
3
,
1
}));
TransDataLayout
(
kernel_ncwh
,
kernel_nhwc
,
in
,
&
out
);
TransDataLayout
(
kernel_ncwh
,
kernel_nhwc
,
in
,
&
out
);
EXPECT_TRUE
(
in
.
layout
()
==
DataLayout
::
kNHWC
);
EXPECT_TRUE
(
in
.
layout
()
==
paddle
::
framework
::
DataLayout
::
kNHWC
);
EXPECT_TRUE
(
in
.
dims
()
==
make_ddim
({
2
,
3
,
1
,
2
}));
EXPECT_TRUE
(
in
.
dims
()
==
paddle
::
framework
::
make_ddim
({
2
,
3
,
1
,
2
}));
}
}
paddle/fluid/framework/data_type_transform_test.cc
浏览文件 @
1d756746
...
@@ -17,43 +17,58 @@ limitations under the License. */
...
@@ -17,43 +17,58 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "gtest/gtest.h"
TEST
(
DataTypeTransform
,
CPUTransform
)
{
TEST
(
DataTypeTransform
,
CPUTransform
)
{
using
namespace
paddle
::
framework
;
auto
place
=
paddle
::
platform
::
CPUPlace
();
using
namespace
paddle
::
platform
;
auto
kernel_fp16
=
paddle
::
framework
::
OpKernelType
(
auto
place
=
CPUPlace
();
paddle
::
framework
::
proto
::
VarType
::
FP16
,
place
,
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
auto
kernel_fp16
=
OpKernelType
(
proto
::
VarType
::
FP16
,
place
,
paddle
::
framework
::
LibraryType
::
kPlain
);
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
auto
kernel_fp32
=
OpKernelType
(
proto
::
VarType
::
FP32
,
place
,
auto
kernel_fp32
=
paddle
::
framework
::
OpKernelType
(
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
paddle
::
framework
::
proto
::
VarType
::
FP32
,
place
,
auto
kernel_fp64
=
OpKernelType
(
proto
::
VarType
::
FP64
,
place
,
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
paddle
::
framework
::
LibraryType
::
kPlain
);
auto
kernel_int32
=
OpKernelType
(
proto
::
VarType
::
INT32
,
place
,
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
auto
kernel_fp64
=
paddle
::
framework
::
OpKernelType
(
auto
kernel_int64
=
OpKernelType
(
proto
::
VarType
::
INT64
,
place
,
paddle
::
framework
::
proto
::
VarType
::
FP64
,
place
,
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
auto
kernel_bool
=
OpKernelType
(
proto
::
VarType
::
BOOL
,
place
,
paddle
::
framework
::
LibraryType
::
kPlain
);
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
auto
kernel_int32
=
paddle
::
framework
::
OpKernelType
(
paddle
::
framework
::
proto
::
VarType
::
INT32
,
place
,
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
paddle
::
framework
::
LibraryType
::
kPlain
);
auto
kernel_int64
=
paddle
::
framework
::
OpKernelType
(
paddle
::
framework
::
proto
::
VarType
::
INT64
,
place
,
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
paddle
::
framework
::
LibraryType
::
kPlain
);
auto
kernel_bool
=
paddle
::
framework
::
OpKernelType
(
paddle
::
framework
::
proto
::
VarType
::
BOOL
,
place
,
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
paddle
::
framework
::
LibraryType
::
kPlain
);
// data type transform from float32
// data type transform from float32
{
{
Tensor
in
;
paddle
::
framework
::
Tensor
in
;
Tensor
out
;
paddle
::
framework
::
Tensor
out
;
float
*
ptr
=
in
.
mutable_data
<
float
>
(
make_ddim
({
2
,
3
}),
place
);
float
*
ptr
=
in
.
mutable_data
<
float
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
}),
place
);
int
data_number
=
2
*
3
;
int
data_number
=
2
*
3
;
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
ptr
[
i
]
=
i
/
3
;
ptr
[
i
]
=
i
/
3
;
}
}
TransDataType
(
kernel_fp32
,
kernel_fp64
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_fp32
,
kernel_fp64
,
in
,
&
out
);
double
*
out_data_double
=
out
.
data
<
double
>
();
double
*
out_data_double
=
out
.
data
<
double
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
out_data_double
[
i
],
static_cast
<
double
>
(
i
/
3
));
EXPECT_EQ
(
out_data_double
[
i
],
static_cast
<
double
>
(
i
/
3
));
}
}
TransDataType
(
kernel_fp32
,
kernel_int32
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_fp32
,
kernel_int32
,
in
,
&
out
);
int
*
out_data_int
=
out
.
data
<
int
>
();
int
*
out_data_int
=
out
.
data
<
int
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
out_data_int
[
i
],
static_cast
<
int
>
(
i
/
3
));
EXPECT_EQ
(
out_data_int
[
i
],
static_cast
<
int
>
(
i
/
3
));
...
@@ -62,10 +77,11 @@ TEST(DataTypeTransform, CPUTransform) {
...
@@ -62,10 +77,11 @@ TEST(DataTypeTransform, CPUTransform) {
// data type transform from/to float16
// data type transform from/to float16
{
{
Tensor
in
;
paddle
::
framework
::
Tensor
in
;
Tensor
out
;
paddle
::
framework
::
Tensor
out
;
float16
*
ptr
=
in
.
mutable_data
<
float16
>
(
make_ddim
({
2
,
3
}),
place
);
paddle
::
platform
::
float16
*
ptr
=
in
.
mutable_data
<
paddle
::
platform
::
float16
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
}),
place
);
int
data_number
=
2
*
3
;
int
data_number
=
2
*
3
;
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
...
@@ -73,94 +89,104 @@ TEST(DataTypeTransform, CPUTransform) {
...
@@ -73,94 +89,104 @@ TEST(DataTypeTransform, CPUTransform) {
}
}
// transform from float16 to other data types
// transform from float16 to other data types
TransDataType
(
kernel_fp16
,
kernel_fp32
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_fp16
,
kernel_fp32
,
in
,
&
out
);
float
*
out_data_float
=
out
.
data
<
float
>
();
float
*
out_data_float
=
out
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
out_data_float
[
i
],
static_cast
<
float
>
(
ptr
[
i
]));
EXPECT_EQ
(
out_data_float
[
i
],
static_cast
<
float
>
(
ptr
[
i
]));
}
}
TransDataType
(
kernel_fp16
,
kernel_fp64
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_fp16
,
kernel_fp64
,
in
,
&
out
);
double
*
out_data_double
=
out
.
data
<
double
>
();
double
*
out_data_double
=
out
.
data
<
double
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
out_data_double
[
i
],
static_cast
<
double
>
(
ptr
[
i
]));
EXPECT_EQ
(
out_data_double
[
i
],
static_cast
<
double
>
(
ptr
[
i
]));
}
}
TransDataType
(
kernel_fp16
,
kernel_int32
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_fp16
,
kernel_int32
,
in
,
&
out
);
int
*
out_data_int
=
out
.
data
<
int
>
();
int
*
out_data_int
=
out
.
data
<
int
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
out_data_int
[
i
],
static_cast
<
int
>
(
ptr
[
i
]));
EXPECT_EQ
(
out_data_int
[
i
],
static_cast
<
int
>
(
ptr
[
i
]));
}
}
TransDataType
(
kernel_fp16
,
kernel_int64
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_fp16
,
kernel_int64
,
in
,
&
out
);
int64_t
*
out_data_int64
=
out
.
data
<
int64_t
>
();
int64_t
*
out_data_int64
=
out
.
data
<
int64_t
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
out_data_int64
[
i
],
static_cast
<
int64_t
>
(
ptr
[
i
]));
EXPECT_EQ
(
out_data_int64
[
i
],
static_cast
<
int64_t
>
(
ptr
[
i
]));
}
}
TransDataType
(
kernel_fp16
,
kernel_bool
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_fp16
,
kernel_bool
,
in
,
&
out
);
bool
*
out_data_bool
=
out
.
data
<
bool
>
();
bool
*
out_data_bool
=
out
.
data
<
bool
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
out_data_bool
[
i
],
static_cast
<
bool
>
(
ptr
[
i
]));
EXPECT_EQ
(
out_data_bool
[
i
],
static_cast
<
bool
>
(
ptr
[
i
]));
}
}
// transform float to float16
// transform float to float16
float
*
in_data_float
=
in
.
mutable_data
<
float
>
(
make_ddim
({
2
,
3
}),
place
);
float
*
in_data_float
=
in
.
mutable_data
<
float
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
}),
place
);
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
in_data_float
[
i
]
=
i
;
in_data_float
[
i
]
=
i
;
}
}
TransDataType
(
kernel_fp32
,
kernel_fp16
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_fp32
,
kernel_fp16
,
in
,
&
out
);
ptr
=
out
.
data
<
float16
>
();
ptr
=
out
.
data
<
paddle
::
platform
::
float16
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
float16
>
(
in_data_float
[
i
]).
x
);
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
paddle
::
platform
::
float16
>
(
in_data_float
[
i
]).
x
);
}
}
// transform double to float16
// transform double to float16
double
*
in_data_double
=
in
.
mutable_data
<
double
>
(
make_ddim
({
2
,
3
}),
place
);
double
*
in_data_double
=
in
.
mutable_data
<
double
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
}),
place
);
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
in_data_double
[
i
]
=
i
;
in_data_double
[
i
]
=
i
;
}
}
TransDataType
(
kernel_fp64
,
kernel_fp16
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_fp64
,
kernel_fp16
,
in
,
&
out
);
ptr
=
out
.
data
<
float16
>
();
ptr
=
out
.
data
<
paddle
::
platform
::
float16
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
float16
>
(
in_data_double
[
i
]).
x
);
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
paddle
::
platform
::
float16
>
(
in_data_double
[
i
]).
x
);
}
}
// transform int to float16
// transform int to float16
int
*
in_data_int
=
in
.
mutable_data
<
int
>
(
make_ddim
({
2
,
3
}),
place
);
int
*
in_data_int
=
in
.
mutable_data
<
int
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
}),
place
);
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
in_data_int
[
i
]
=
i
;
in_data_int
[
i
]
=
i
;
}
}
TransDataType
(
kernel_int32
,
kernel_fp16
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_int32
,
kernel_fp16
,
in
,
&
out
);
ptr
=
out
.
data
<
float16
>
();
ptr
=
out
.
data
<
paddle
::
platform
::
float16
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
float16
>
(
in_data_int
[
i
]).
x
);
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
paddle
::
platform
::
float16
>
(
in_data_int
[
i
]).
x
);
}
}
// transform int64 to float16
// transform int64 to float16
int64_t
*
in_data_int64
=
in
.
mutable_data
<
int64_t
>
(
make_ddim
({
2
,
3
}),
place
);
int64_t
*
in_data_int64
=
in
.
mutable_data
<
int64_t
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
}),
place
);
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
in_data_int64
[
i
]
=
i
;
in_data_int64
[
i
]
=
i
;
}
}
TransDataType
(
kernel_int64
,
kernel_fp16
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_int64
,
kernel_fp16
,
in
,
&
out
);
ptr
=
out
.
data
<
float16
>
();
ptr
=
out
.
data
<
paddle
::
platform
::
float16
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
float16
>
(
in_data_int64
[
i
]).
x
);
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
paddle
::
platform
::
float16
>
(
in_data_int64
[
i
]).
x
);
}
}
// transform bool to float16
// transform bool to float16
bool
*
in_data_bool
=
in
.
mutable_data
<
bool
>
(
make_ddim
({
2
,
3
}),
place
);
bool
*
in_data_bool
=
in
.
mutable_data
<
bool
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
}),
place
);
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
in_data_bool
[
i
]
=
i
;
in_data_bool
[
i
]
=
i
;
}
}
TransDataType
(
kernel_bool
,
kernel_fp16
,
in
,
&
out
);
paddle
::
framework
::
TransDataType
(
kernel_bool
,
kernel_fp16
,
in
,
&
out
);
ptr
=
out
.
data
<
float16
>
();
ptr
=
out
.
data
<
paddle
::
platform
::
float16
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
float16
>
(
in_data_bool
[
i
]).
x
);
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
paddle
::
platform
::
float16
>
(
in_data_bool
[
i
]).
x
);
}
}
}
}
}
}
paddle/fluid/framework/data_type_transform_test.cu
浏览文件 @
1d756746
...
@@ -18,42 +18,58 @@ limitations under the License. */
...
@@ -18,42 +18,58 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "gtest/gtest.h"
TEST
(
DataTypeTransform
,
GPUTransform
)
{
TEST
(
DataTypeTransform
,
GPUTransform
)
{
using
namespace
paddle
::
framework
;
auto
cpu_place
=
paddle
::
platform
::
CPUPlace
();
using
namespace
paddle
::
platform
;
auto
gpu_place
=
paddle
::
platform
::
CUDAPlace
(
0
);
paddle
::
platform
::
CUDADeviceContext
context
(
gpu_place
);
auto
cpu_place
=
CPUPlace
();
auto
gpu_place
=
CUDAPlace
(
0
);
auto
kernel_fp16
=
paddle
::
framework
::
OpKernelType
(
CUDADeviceContext
context
(
gpu_place
);
paddle
::
framework
::
proto
::
VarType
::
FP16
,
gpu_place
,
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
auto
kernel_fp16
=
OpKernelType
(
proto
::
VarType
::
FP16
,
gpu_place
,
paddle
::
framework
::
LibraryType
::
kPlain
);
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
auto
kernel_fp32
=
OpKernelType
(
proto
::
VarType
::
FP32
,
gpu_place
,
auto
kernel_fp32
=
paddle
::
framework
::
OpKernelType
(
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
paddle
::
framework
::
proto
::
VarType
::
FP32
,
gpu_place
,
auto
kernel_fp64
=
OpKernelType
(
proto
::
VarType
::
FP64
,
gpu_place
,
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
paddle
::
framework
::
LibraryType
::
kPlain
);
auto
kernel_int32
=
OpKernelType
(
proto
::
VarType
::
INT32
,
gpu_place
,
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
auto
kernel_fp64
=
paddle
::
framework
::
OpKernelType
(
auto
kernel_int64
=
OpKernelType
(
proto
::
VarType
::
INT64
,
gpu_place
,
paddle
::
framework
::
proto
::
VarType
::
FP64
,
gpu_place
,
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
auto
kernel_bool
=
OpKernelType
(
proto
::
VarType
::
BOOL
,
gpu_place
,
paddle
::
framework
::
LibraryType
::
kPlain
);
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
auto
kernel_int32
=
paddle
::
framework
::
OpKernelType
(
paddle
::
framework
::
proto
::
VarType
::
INT32
,
gpu_place
,
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
paddle
::
framework
::
LibraryType
::
kPlain
);
auto
kernel_int64
=
paddle
::
framework
::
OpKernelType
(
paddle
::
framework
::
proto
::
VarType
::
INT64
,
gpu_place
,
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
paddle
::
framework
::
LibraryType
::
kPlain
);
auto
kernel_bool
=
paddle
::
framework
::
OpKernelType
(
paddle
::
framework
::
proto
::
VarType
::
BOOL
,
gpu_place
,
paddle
::
framework
::
DataLayout
::
kAnyLayout
,
paddle
::
framework
::
LibraryType
::
kPlain
);
// data type transform from float32
// data type transform from float32
{
{
Tensor
in
;
paddle
::
framework
::
Tensor
in
;
Tensor
in_gpu
;
paddle
::
framework
::
Tensor
in_gpu
;
Tensor
out_gpu
;
paddle
::
framework
::
Tensor
out_gpu
;
Tensor
out
;
paddle
::
framework
::
Tensor
out
;
float
*
in_ptr
=
in
.
mutable_data
<
float
>
(
make_ddim
({
2
,
3
}),
cpu_place
);
float
*
in_ptr
=
in
.
mutable_data
<
float
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
}),
cpu_place
);
float
arr
[
6
]
=
{
0
,
1
,
2
,
3
,
4
,
5
};
float
arr
[
6
]
=
{
0
,
1
,
2
,
3
,
4
,
5
};
int
data_number
=
sizeof
(
arr
)
/
sizeof
(
arr
[
0
]);
int
data_number
=
sizeof
(
arr
)
/
sizeof
(
arr
[
0
]);
memcpy
(
in_ptr
,
arr
,
sizeof
(
arr
));
memcpy
(
in_ptr
,
arr
,
sizeof
(
arr
));
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
paddle
::
framework
::
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
context
.
Wait
();
context
.
Wait
();
TransDataType
(
kernel_fp32
,
kernel_fp64
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_fp32
,
kernel_fp64
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
double
*
out_data_double
=
out
.
data
<
double
>
();
double
*
out_data_double
=
out
.
data
<
double
>
();
...
@@ -61,8 +77,9 @@ TEST(DataTypeTransform, GPUTransform) {
...
@@ -61,8 +77,9 @@ TEST(DataTypeTransform, GPUTransform) {
EXPECT_EQ
(
out_data_double
[
i
],
static_cast
<
double
>
(
arr
[
i
]));
EXPECT_EQ
(
out_data_double
[
i
],
static_cast
<
double
>
(
arr
[
i
]));
}
}
TransDataType
(
kernel_fp32
,
kernel_int32
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_fp32
,
kernel_int32
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
int
*
out_data_int
=
out
.
data
<
int
>
();
int
*
out_data_int
=
out
.
data
<
int
>
();
...
@@ -73,22 +90,27 @@ TEST(DataTypeTransform, GPUTransform) {
...
@@ -73,22 +90,27 @@ TEST(DataTypeTransform, GPUTransform) {
// data type transform from/to float16
// data type transform from/to float16
{
{
Tensor
in
;
paddle
::
framework
::
Tensor
in
;
Tensor
in_gpu
;
paddle
::
framework
::
Tensor
in_gpu
;
Tensor
out_gpu
;
paddle
::
framework
::
Tensor
out_gpu
;
Tensor
out
;
paddle
::
framework
::
Tensor
out
;
float16
*
ptr
=
in
.
mutable_data
<
float16
>
(
make_ddim
({
2
,
3
}),
cpu_place
);
paddle
::
platform
::
float16
*
ptr
=
in
.
mutable_data
<
paddle
::
platform
::
float16
>
(
float16
arr
[
6
]
=
{
float16
(
0
),
float16
(
1
),
float16
(
2
),
paddle
::
framework
::
make_ddim
({
2
,
3
}),
cpu_place
);
float16
(
3
),
float16
(
4
),
float16
(
5
)};
paddle
::
platform
::
float16
arr
[
6
]
=
{
paddle
::
platform
::
float16
(
0
),
paddle
::
platform
::
float16
(
1
),
paddle
::
platform
::
float16
(
2
),
paddle
::
platform
::
float16
(
3
),
paddle
::
platform
::
float16
(
4
),
paddle
::
platform
::
float16
(
5
)};
int
data_number
=
sizeof
(
arr
)
/
sizeof
(
arr
[
0
]);
int
data_number
=
sizeof
(
arr
)
/
sizeof
(
arr
[
0
]);
memcpy
(
ptr
,
arr
,
sizeof
(
arr
));
memcpy
(
ptr
,
arr
,
sizeof
(
arr
));
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
paddle
::
framework
::
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
context
.
Wait
();
context
.
Wait
();
// transform from float16 to other data types
// transform from float16 to other data types
TransDataType
(
kernel_fp16
,
kernel_fp32
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_fp16
,
kernel_fp32
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
float
*
out_data_float
=
out
.
data
<
float
>
();
float
*
out_data_float
=
out
.
data
<
float
>
();
...
@@ -96,8 +118,9 @@ TEST(DataTypeTransform, GPUTransform) {
...
@@ -96,8 +118,9 @@ TEST(DataTypeTransform, GPUTransform) {
EXPECT_EQ
(
out_data_float
[
i
],
static_cast
<
float
>
(
ptr
[
i
]));
EXPECT_EQ
(
out_data_float
[
i
],
static_cast
<
float
>
(
ptr
[
i
]));
}
}
TransDataType
(
kernel_fp16
,
kernel_fp64
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_fp16
,
kernel_fp64
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
double
*
out_data_double
=
out
.
data
<
double
>
();
double
*
out_data_double
=
out
.
data
<
double
>
();
...
@@ -105,8 +128,9 @@ TEST(DataTypeTransform, GPUTransform) {
...
@@ -105,8 +128,9 @@ TEST(DataTypeTransform, GPUTransform) {
EXPECT_EQ
(
out_data_double
[
i
],
static_cast
<
double
>
(
ptr
[
i
]));
EXPECT_EQ
(
out_data_double
[
i
],
static_cast
<
double
>
(
ptr
[
i
]));
}
}
TransDataType
(
kernel_fp16
,
kernel_int32
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_fp16
,
kernel_int32
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
int
*
out_data_int
=
out
.
data
<
int
>
();
int
*
out_data_int
=
out
.
data
<
int
>
();
...
@@ -114,8 +138,9 @@ TEST(DataTypeTransform, GPUTransform) {
...
@@ -114,8 +138,9 @@ TEST(DataTypeTransform, GPUTransform) {
EXPECT_EQ
(
out_data_int
[
i
],
static_cast
<
int
>
(
ptr
[
i
]));
EXPECT_EQ
(
out_data_int
[
i
],
static_cast
<
int
>
(
ptr
[
i
]));
}
}
TransDataType
(
kernel_fp16
,
kernel_int64
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_fp16
,
kernel_int64
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
int64_t
*
out_data_int64
=
out
.
data
<
int64_t
>
();
int64_t
*
out_data_int64
=
out
.
data
<
int64_t
>
();
...
@@ -123,8 +148,9 @@ TEST(DataTypeTransform, GPUTransform) {
...
@@ -123,8 +148,9 @@ TEST(DataTypeTransform, GPUTransform) {
EXPECT_EQ
(
out_data_int64
[
i
],
static_cast
<
int64_t
>
(
ptr
[
i
]));
EXPECT_EQ
(
out_data_int64
[
i
],
static_cast
<
int64_t
>
(
ptr
[
i
]));
}
}
TransDataType
(
kernel_fp16
,
kernel_bool
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_fp16
,
kernel_bool
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
bool
*
out_data_bool
=
out
.
data
<
bool
>
();
bool
*
out_data_bool
=
out
.
data
<
bool
>
();
...
@@ -133,90 +159,103 @@ TEST(DataTypeTransform, GPUTransform) {
...
@@ -133,90 +159,103 @@ TEST(DataTypeTransform, GPUTransform) {
}
}
// transform float to float16
// transform float to float16
float
*
in_data_float
=
in
.
mutable_data
<
float
>
(
make_ddim
({
2
,
3
}),
cpu_place
);
float
*
in_data_float
=
in
.
mutable_data
<
float
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
}),
cpu_place
);
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
in_data_float
[
i
]
=
i
;
in_data_float
[
i
]
=
i
;
}
}
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
paddle
::
framework
::
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
context
.
Wait
();
context
.
Wait
();
TransDataType
(
kernel_fp32
,
kernel_fp16
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_fp32
,
kernel_fp16
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
ptr
=
out
.
data
<
float16
>
();
ptr
=
out
.
data
<
paddle
::
platform
::
float16
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
float16
>
(
in_data_float
[
i
]).
x
);
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
paddle
::
platform
::
float16
>
(
in_data_float
[
i
]).
x
);
}
}
// transform double to float16
// transform double to float16
double
*
in_data_double
=
double
*
in_data_double
=
in
.
mutable_data
<
double
>
(
in
.
mutable_data
<
double
>
(
make_ddim
({
2
,
3
}),
cpu_place
);
paddle
::
framework
::
make_ddim
({
2
,
3
}),
cpu_place
);
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
in_data_double
[
i
]
=
i
;
in_data_double
[
i
]
=
i
;
}
}
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
paddle
::
framework
::
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
context
.
Wait
();
context
.
Wait
();
TransDataType
(
kernel_fp64
,
kernel_fp16
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_fp64
,
kernel_fp16
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
ptr
=
out
.
data
<
float16
>
();
ptr
=
out
.
data
<
paddle
::
platform
::
float16
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
float16
>
(
in_data_double
[
i
]).
x
);
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
paddle
::
platform
::
float16
>
(
in_data_double
[
i
]).
x
);
}
}
// transform int to float16
// transform int to float16
int
*
in_data_int
=
in
.
mutable_data
<
int
>
(
make_ddim
({
2
,
3
}),
cpu_place
);
int
*
in_data_int
=
in
.
mutable_data
<
int
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
}),
cpu_place
);
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
in_data_int
[
i
]
=
i
;
in_data_int
[
i
]
=
i
;
}
}
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
paddle
::
framework
::
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
context
.
Wait
();
context
.
Wait
();
TransDataType
(
kernel_int32
,
kernel_fp16
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_int32
,
kernel_fp16
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
ptr
=
out
.
data
<
float16
>
();
ptr
=
out
.
data
<
paddle
::
platform
::
float16
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
float16
>
(
in_data_int
[
i
]).
x
);
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
paddle
::
platform
::
float16
>
(
in_data_int
[
i
]).
x
);
}
}
// transform int64 to float16
// transform int64 to float16
int64_t
*
in_data_int64
=
int64_t
*
in_data_int64
=
in
.
mutable_data
<
int64_t
>
(
in
.
mutable_data
<
int64_t
>
(
make_ddim
({
2
,
3
}),
cpu_place
);
paddle
::
framework
::
make_ddim
({
2
,
3
}),
cpu_place
);
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
in_data_int64
[
i
]
=
i
;
in_data_int64
[
i
]
=
i
;
}
}
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
paddle
::
framework
::
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
context
.
Wait
();
context
.
Wait
();
TransDataType
(
kernel_int64
,
kernel_fp16
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_int64
,
kernel_fp16
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
ptr
=
out
.
data
<
float16
>
();
ptr
=
out
.
data
<
paddle
::
platform
::
float16
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
float16
>
(
in_data_int64
[
i
]).
x
);
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
paddle
::
platform
::
float16
>
(
in_data_int64
[
i
]).
x
);
}
}
// transform bool to float16
// transform bool to float16
bool
*
in_data_bool
=
in
.
mutable_data
<
bool
>
(
make_ddim
({
2
,
3
}),
cpu_place
);
bool
*
in_data_bool
=
in
.
mutable_data
<
bool
>
(
paddle
::
framework
::
make_ddim
({
2
,
3
}),
cpu_place
);
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
in_data_bool
[
i
]
=
i
;
in_data_bool
[
i
]
=
i
;
}
}
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
paddle
::
framework
::
TensorCopy
(
in
,
gpu_place
,
context
,
&
in_gpu
);
context
.
Wait
();
context
.
Wait
();
TransDataType
(
kernel_bool
,
kernel_fp16
,
in_gpu
,
&
out_gpu
);
paddle
::
framework
::
TransDataType
(
kernel_bool
,
kernel_fp16
,
in_gpu
,
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
&
out_gpu
);
paddle
::
framework
::
TensorCopy
(
out_gpu
,
cpu_place
,
context
,
&
out
);
context
.
Wait
();
context
.
Wait
();
ptr
=
out
.
data
<
float16
>
();
ptr
=
out
.
data
<
paddle
::
platform
::
float16
>
();
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
data_number
;
++
i
)
{
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
float16
>
(
in_data_bool
[
i
]).
x
);
EXPECT_EQ
(
ptr
[
i
].
x
,
static_cast
<
paddle
::
platform
::
float16
>
(
in_data_bool
[
i
]).
x
);
}
}
}
}
}
}
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
1d756746
...
@@ -8,27 +8,28 @@ cc_library(send_op_handle SRCS send_op_handle.cc DEPS framework_proto scope plac
...
@@ -8,27 +8,28 @@ cc_library(send_op_handle SRCS send_op_handle.cc DEPS framework_proto scope plac
cc_library
(
ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base
)
cc_library
(
ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base
)
cc_library
(
ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph
)
cc_library
(
ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph
)
cc_library
(
variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
nv_library
(
nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
nv_library
(
nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda
)
dynload_cuda
)
set
(
multi_devices_graph_builder_deps nccl_all_reduce_op_handle
)
set
(
multi_devices_graph_builder_deps nccl_all_reduce_op_handle
)
nv_library
(
reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base scope ddim dynload_cuda
)
nv_library
(
reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base
variable_visitor
scope ddim dynload_cuda
)
else
()
else
()
set
(
multi_devices_graph_builder_deps
)
set
(
multi_devices_graph_builder_deps
)
cc_library
(
reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base scope ddim
)
cc_library
(
reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base
variable_visitor
scope ddim
)
endif
()
endif
()
cc_library
(
broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
cc_library
(
multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
scale_loss_grad_op_handle send_op_handle
${
multi_devices_graph_builder_deps
}
)
scale_loss_grad_op_handle send_op_handle
${
multi_devices_graph_builder_deps
}
reduce_op_handle broadcast_op_handle
)
cc_library
(
ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph framework_proto
)
cc_library
(
ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph framework_proto
)
cc_library
(
threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
cc_library
(
threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
simple_threadpool device_context
)
simple_threadpool device_context
)
cc_library
(
variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows
)
cc_library
(
broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base variable_visitor scope ddim memory
)
cc_library
(
gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope variable_visitor ddim memory
)
cc_test
(
broadcast_op_test SRCS broadcast_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
cc_test
(
broadcast_op_test SRCS broadcast_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
device_context broadcast_op_handle
)
device_context broadcast_op_handle
)
cc_test
(
gather_op_test SRCS gather_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
cc_test
(
gather_op_test SRCS gather_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
...
...
paddle/fluid/framework/details/broadcast_op_handle.cc
浏览文件 @
1d756746
...
@@ -44,9 +44,15 @@ void BroadcastOpHandle::RunImpl() {
...
@@ -44,9 +44,15 @@ void BroadcastOpHandle::RunImpl() {
// &in_place;
// &in_place;
WaitInputVarGenerated
(
*
in_var_handle
);
WaitInputVarGenerated
(
*
in_var_handle
);
auto
*
in_var
=
local_scopes_
.
at
(
in_var_handle
->
scope_idx_
)
std
::
vector
<
const
Scope
*>
var_scopes
;
->
FindVar
(
in_var_handle
->
name_
);
for
(
auto
*
s
:
local_scopes_
)
{
var_scopes
.
emplace_back
(
s
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
());
}
auto
*
in_var
=
var_scopes
.
at
(
in_var_handle
->
scope_idx_
)
->
FindVar
(
in_var_handle
->
name_
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
Tensor
&
in_tensor
=
VariableVisitor
::
GetMutableTensor
(
in_var
);
Tensor
&
in_tensor
=
VariableVisitor
::
GetMutableTensor
(
in_var
);
for
(
auto
*
out
:
out_var_handles
)
{
for
(
auto
*
out
:
out_var_handles
)
{
...
@@ -55,17 +61,16 @@ void BroadcastOpHandle::RunImpl() {
...
@@ -55,17 +61,16 @@ void BroadcastOpHandle::RunImpl() {
}
}
auto
&
out_p
=
out
->
place_
;
auto
&
out_p
=
out
->
place_
;
auto
*
out_var
=
local_scopes_
.
at
(
out
->
scope_idx_
)
->
FindVar
(
out
->
name_
);
auto
*
out_var
=
var_scopes
.
at
(
out
->
scope_idx_
)
->
FindVar
(
out
->
name_
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
PADDLE_ENFORCE_EQ
(
out_p
.
which
(),
in_var_handle
->
place_
.
which
(),
PADDLE_ENFORCE_EQ
(
out_p
.
which
(),
in_var_handle
->
place_
.
which
(),
"Places must be all on CPU or all on CUDA."
);
"Places must be all on CPU or all on CUDA."
);
VariableVisitor
::
ShareDimsAndLoD
(
*
in_var
,
out_var
);
VariableVisitor
::
ShareDimsAndLoD
(
*
in_var
,
out_var
);
VariableVisitor
::
GetMutableTensor
(
out_var
)
VariableVisitor
::
GetMutableTensor
(
out_var
).
mutable_data
(
out_p
,
.
Resize
(
in_tensor
.
dims
())
in_tensor
.
type
());
.
mutable_data
(
out_p
,
in_tensor
.
type
());
auto
dev_ctx
=
dev_ctxes_
[
out_p
]
;
auto
dev_ctx
=
dev_ctxes_
.
at
(
out_p
)
;
RunAndRecordEvent
(
out_p
,
[
in_tensor
,
out_var
,
dev_ctx
,
out_p
]
{
RunAndRecordEvent
(
out_p
,
[
in_tensor
,
out_var
,
dev_ctx
,
out_p
]
{
paddle
::
framework
::
TensorCopy
(
paddle
::
framework
::
TensorCopy
(
in_tensor
,
out_p
,
*
(
dev_ctx
),
in_tensor
,
out_p
,
*
(
dev_ctx
),
...
...
paddle/fluid/framework/details/broadcast_op_handle_test.cc
浏览文件 @
1d756746
...
@@ -30,6 +30,7 @@ const f::DDim kDims = {20, 20};
...
@@ -30,6 +30,7 @@ const f::DDim kDims = {20, 20};
struct
TestBroadcastOpHandle
{
struct
TestBroadcastOpHandle
{
std
::
vector
<
std
::
unique_ptr
<
p
::
DeviceContext
>>
ctxs_
;
std
::
vector
<
std
::
unique_ptr
<
p
::
DeviceContext
>>
ctxs_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
Scope
*>
param_scopes_
;
Scope
g_scope_
;
Scope
g_scope_
;
std
::
unique_ptr
<
OpHandleBase
>
op_handle_
;
std
::
unique_ptr
<
OpHandleBase
>
op_handle_
;
std
::
vector
<
std
::
unique_ptr
<
VarHandleBase
>>
vars_
;
std
::
vector
<
std
::
unique_ptr
<
VarHandleBase
>>
vars_
;
...
@@ -72,11 +73,17 @@ struct TestBroadcastOpHandle {
...
@@ -72,11 +73,17 @@ struct TestBroadcastOpHandle {
void
InitBroadcastOp
(
size_t
input_scope_idx
)
{
void
InitBroadcastOp
(
size_t
input_scope_idx
)
{
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
local_scopes_
.
push_back
(
&
(
g_scope_
.
NewScope
()));
local_scopes_
.
push_back
(
&
(
g_scope_
.
NewScope
()));
local_scopes_
[
j
]
->
Var
(
"out"
);
Scope
&
local_scope
=
local_scopes_
.
back
()
->
NewScope
();
*
local_scopes_
.
back
()
->
Var
(
details
::
kLocalExecScopeName
)
->
GetMutable
<
Scope
*>
()
=
&
local_scope
;
local_scope
.
Var
(
"out"
);
param_scopes_
.
emplace_back
(
&
local_scope
);
}
}
local
_scopes_
[
input_scope_idx
]
->
Var
(
"input"
);
param
_scopes_
[
input_scope_idx
]
->
Var
(
"input"
);
op_handle_
.
reset
(
new
BroadcastOpHandle
(
local_scopes_
,
gpu_list_
));
op_handle_
.
reset
(
new
BroadcastOpHandle
(
local_scopes_
,
gpu_list_
));
auto
*
in_var_handle
=
auto
*
in_var_handle
=
new
VarHandle
(
1
,
input_scope_idx
,
"input"
,
gpu_list_
[
input_scope_idx
]);
new
VarHandle
(
1
,
input_scope_idx
,
"input"
,
gpu_list_
[
input_scope_idx
]);
vars_
.
emplace_back
(
in_var_handle
);
vars_
.
emplace_back
(
in_var_handle
);
...
@@ -105,7 +112,8 @@ struct TestBroadcastOpHandle {
...
@@ -105,7 +112,8 @@ struct TestBroadcastOpHandle {
}
}
void
TestBroadcastLodTensor
(
size_t
input_scope_idx
)
{
void
TestBroadcastLodTensor
(
size_t
input_scope_idx
)
{
auto
in_var
=
local_scopes_
[
input_scope_idx
]
->
Var
(
"input"
);
auto
in_var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
"input"
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
auto
in_lod_tensor
=
in_var
->
GetMutable
<
f
::
LoDTensor
>
();
auto
in_lod_tensor
=
in_var
->
GetMutable
<
f
::
LoDTensor
>
();
in_lod_tensor
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
in_lod_tensor
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
...
@@ -117,6 +125,7 @@ struct TestBroadcastOpHandle {
...
@@ -117,6 +125,7 @@ struct TestBroadcastOpHandle {
paddle
::
framework
::
TensorFromVector
<
float
>
(
paddle
::
framework
::
TensorFromVector
<
float
>
(
send_vector
,
*
(
ctxs_
[
input_scope_idx
]),
in_lod_tensor
);
send_vector
,
*
(
ctxs_
[
input_scope_idx
]),
in_lod_tensor
);
in_lod_tensor
->
set_lod
(
lod
);
in_lod_tensor
->
set_lod
(
lod
);
in_lod_tensor
->
Resize
(
kDims
);
op_handle_
->
Run
(
false
);
op_handle_
->
Run
(
false
);
...
@@ -124,7 +133,8 @@ struct TestBroadcastOpHandle {
...
@@ -124,7 +133,8 @@ struct TestBroadcastOpHandle {
p
::
CPUPlace
cpu_place
;
p
::
CPUPlace
cpu_place
;
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
auto
out_var
=
local_scopes_
[
j
]
->
Var
(
"out"
);
auto
out_var
=
param_scopes_
[
j
]
->
FindVar
(
"out"
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
auto
out_tensor
=
out_var
->
Get
<
f
::
LoDTensor
>
();
auto
out_tensor
=
out_var
->
Get
<
f
::
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
out_tensor
.
lod
(),
lod
,
"lod is not equal."
);
PADDLE_ENFORCE_EQ
(
out_tensor
.
lod
(),
lod
,
"lod is not equal."
);
...
@@ -139,7 +149,8 @@ struct TestBroadcastOpHandle {
...
@@ -139,7 +149,8 @@ struct TestBroadcastOpHandle {
}
}
void
TestBroadcastSelectedRows
(
size_t
input_scope_idx
)
{
void
TestBroadcastSelectedRows
(
size_t
input_scope_idx
)
{
auto
in_var
=
local_scopes_
[
input_scope_idx
]
->
Var
(
"input"
);
auto
in_var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
"input"
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
value
=
in_selected_rows
->
mutable_value
();
auto
value
=
in_selected_rows
->
mutable_value
();
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
...
@@ -162,7 +173,8 @@ struct TestBroadcastOpHandle {
...
@@ -162,7 +173,8 @@ struct TestBroadcastOpHandle {
p
::
CPUPlace
cpu_place
;
p
::
CPUPlace
cpu_place
;
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
auto
out_var
=
local_scopes_
[
j
]
->
Var
(
"out"
);
auto
out_var
=
param_scopes_
[
j
]
->
FindVar
(
"out"
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
auto
&
out_select_rows
=
out_var
->
Get
<
f
::
SelectedRows
>
();
auto
&
out_select_rows
=
out_var
->
Get
<
f
::
SelectedRows
>
();
auto
rt
=
out_select_rows
.
value
();
auto
rt
=
out_select_rows
.
value
();
...
...
paddle/fluid/framework/details/cow_ptr.h
浏览文件 @
1d756746
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
#pragma once
#pragma once
#include <memory>
#include <memory>
#include <thread>
#include <thread>
// NOLINT
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -23,7 +23,7 @@ namespace details {
...
@@ -23,7 +23,7 @@ namespace details {
// Change it to thread safe flags if needed.
// Change it to thread safe flags if needed.
class
ThreadUnsafeOwnershipFlags
{
class
ThreadUnsafeOwnershipFlags
{
public:
public:
ThreadUnsafeOwnershipFlags
(
bool
flag
)
:
flag_
(
flag
)
{}
explicit
ThreadUnsafeOwnershipFlags
(
bool
flag
)
:
flag_
(
flag
)
{}
ThreadUnsafeOwnershipFlags
(
const
ThreadUnsafeOwnershipFlags
&
other
)
=
delete
;
ThreadUnsafeOwnershipFlags
(
const
ThreadUnsafeOwnershipFlags
&
other
)
=
delete
;
ThreadUnsafeOwnershipFlags
&
operator
=
(
ThreadUnsafeOwnershipFlags
&
operator
=
(
...
...
paddle/fluid/framework/details/gather_op_handle.cc
浏览文件 @
1d756746
...
@@ -41,14 +41,19 @@ void GatherOpHandle::RunImpl() {
...
@@ -41,14 +41,19 @@ void GatherOpHandle::RunImpl() {
out_var_handle
=
out_var_handles
.
front
();
out_var_handle
=
out_var_handles
.
front
();
}
}
std
::
vector
<
const
Scope
*>
var_scopes
;
for
(
auto
*
s
:
local_scopes_
)
{
var_scopes
.
emplace_back
(
s
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
());
}
auto
in_0_handle
=
in_var_handles
[
0
];
auto
in_0_handle
=
in_var_handles
[
0
];
auto
pre_in_var
=
auto
pre_in_var
=
local_scopes_
[
in_0_handle
->
scope_idx_
]
->
FindVar
(
in_0_handle
->
name_
);
var_scopes
.
at
(
in_0_handle
->
scope_idx_
)
->
FindVar
(
in_0_handle
->
name_
);
auto
pre_place
=
in_0_handle
->
place_
;
PADDLE_ENFORCE_NOT_NULL
(
pre_in_var
);
PADDLE_ENFORCE
(
pre_in_var
->
IsType
<
framework
::
SelectedRows
>
(),
PADDLE_ENFORCE
(
pre_in_var
->
IsType
<
framework
::
SelectedRows
>
(),
"Currently, gather_op only can gather SelectedRows."
);
"Currently, gather_op only can gather SelectedRows."
);
auto
pre_place
=
in_0_handle
->
place_
;
PADDLE_ENFORCE_EQ
(
out_var_handle
->
place_
.
which
(),
pre_place
.
which
(),
PADDLE_ENFORCE_EQ
(
out_var_handle
->
place_
.
which
(),
pre_place
.
which
(),
"The place of input and output should be the same."
);
"The place of input and output should be the same."
);
...
@@ -67,7 +72,7 @@ void GatherOpHandle::RunImpl() {
...
@@ -67,7 +72,7 @@ void GatherOpHandle::RunImpl() {
PADDLE_ENFORCE_EQ
(
in_p
.
which
(),
pre_place
.
which
(),
PADDLE_ENFORCE_EQ
(
in_p
.
which
(),
pre_place
.
which
(),
"Places must be all on CPU or all on CUDA."
);
"Places must be all on CPU or all on CUDA."
);
auto
*
in_var
=
auto
*
in_var
=
local_scopes_
.
at
(
in_handle
->
scope_idx_
)
->
FindVar
(
in_handle
->
name_
);
var_scopes
.
at
(
in_handle
->
scope_idx_
)
->
FindVar
(
in_handle
->
name_
);
auto
&
in_sr
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
auto
&
in_sr
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
PADDLE_ENFORCE_EQ
(
in_sr
.
value
().
type
(),
pre_in
.
value
().
type
(),
PADDLE_ENFORCE_EQ
(
in_sr
.
value
().
type
(),
pre_in
.
value
().
type
(),
...
@@ -86,7 +91,7 @@ void GatherOpHandle::RunImpl() {
...
@@ -86,7 +91,7 @@ void GatherOpHandle::RunImpl() {
// write the output
// write the output
auto
&
out_place
=
out_var_handle
->
place_
;
auto
&
out_place
=
out_var_handle
->
place_
;
auto
out_scope_idx
=
out_var_handle
->
scope_idx_
;
auto
out_scope_idx
=
out_var_handle
->
scope_idx_
;
auto
out_var
=
local_scopes_
[
out_scope_idx
]
->
FindVar
(
out_var_handle
->
name_
);
auto
out_var
=
var_scopes
.
at
(
out_scope_idx
)
->
FindVar
(
out_var_handle
->
name_
);
auto
out
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
();
auto
out
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
();
out
->
set_height
(
pre_in
.
height
());
out
->
set_height
(
pre_in
.
height
());
...
...
paddle/fluid/framework/details/gather_op_handle_test.cc
浏览文件 @
1d756746
...
@@ -29,6 +29,7 @@ const f::DDim kDims = {20, 20};
...
@@ -29,6 +29,7 @@ const f::DDim kDims = {20, 20};
struct
TestGatherOpHandle
{
struct
TestGatherOpHandle
{
std
::
vector
<
std
::
unique_ptr
<
p
::
DeviceContext
>>
ctxs_
;
std
::
vector
<
std
::
unique_ptr
<
p
::
DeviceContext
>>
ctxs_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
Scope
*>
param_scopes_
;
Scope
g_scope_
;
Scope
g_scope_
;
std
::
unique_ptr
<
OpHandleBase
>
op_handle_
;
std
::
unique_ptr
<
OpHandleBase
>
op_handle_
;
std
::
vector
<
std
::
unique_ptr
<
VarHandleBase
>>
vars_
;
std
::
vector
<
std
::
unique_ptr
<
VarHandleBase
>>
vars_
;
...
@@ -71,9 +72,14 @@ struct TestGatherOpHandle {
...
@@ -71,9 +72,14 @@ struct TestGatherOpHandle {
void
InitGatherOp
(
size_t
input_scope_idx
)
{
void
InitGatherOp
(
size_t
input_scope_idx
)
{
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
local_scopes_
.
push_back
(
&
(
g_scope_
.
NewScope
()));
local_scopes_
.
push_back
(
&
(
g_scope_
.
NewScope
()));
local_scopes_
[
j
]
->
Var
(
"out"
);
Scope
&
local_scope
=
local_scopes_
.
back
()
->
NewScope
();
*
local_scopes_
.
back
()
->
Var
(
details
::
kLocalExecScopeName
)
->
GetMutable
<
Scope
*>
()
=
&
local_scope
;
local_scope
.
Var
(
"input"
);
param_scopes_
.
emplace_back
(
&
local_scope
);
}
}
local_scopes_
[
input_scope_idx
]
->
Var
(
"inp
ut"
);
param_scopes_
[
input_scope_idx
]
->
Var
(
"o
ut"
);
op_handle_
.
reset
(
new
GatherOpHandle
(
local_scopes_
,
gpu_list_
));
op_handle_
.
reset
(
new
GatherOpHandle
(
local_scopes_
,
gpu_list_
));
// add input
// add input
...
@@ -115,7 +121,8 @@ struct TestGatherOpHandle {
...
@@ -115,7 +121,8 @@ struct TestGatherOpHandle {
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
++
input_scope_idx
)
{
++
input_scope_idx
)
{
auto
in_var
=
local_scopes_
[
input_scope_idx
]
->
Var
(
"input"
);
auto
in_var
=
param_scopes_
.
at
(
input_scope_idx
)
->
FindVar
(
"input"
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
value
=
in_selected_rows
->
mutable_value
();
auto
value
=
in_selected_rows
->
mutable_value
();
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
...
@@ -128,10 +135,11 @@ struct TestGatherOpHandle {
...
@@ -128,10 +135,11 @@ struct TestGatherOpHandle {
value
->
Resize
(
kDims
);
value
->
Resize
(
kDims
);
}
}
auto
out_var
=
local_scopes_
[
output_scope_idx
]
->
Var
(
"out"
);
auto
out_var
=
param_scopes_
.
at
(
output_scope_idx
)
->
FindVar
(
"out"
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
auto
out_selected_rows
=
out_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
out_selected_rows
=
out_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_var
=
local_scopes_
[
output_scope_idx
]
->
Var
(
"input"
);
auto
in_var
=
param_scopes_
.
at
(
output_scope_idx
)
->
Find
Var
(
"input"
);
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
out_selected_rows
->
mutable_value
()
->
ShareDataWith
(
out_selected_rows
->
mutable_value
()
->
ShareDataWith
(
...
@@ -155,7 +163,8 @@ struct TestGatherOpHandle {
...
@@ -155,7 +163,8 @@ struct TestGatherOpHandle {
f
::
TensorCopy
(
rt
,
cpu_place
,
*
(
ctxs_
[
output_scope_idx
]),
&
result_tensor
);
f
::
TensorCopy
(
rt
,
cpu_place
,
*
(
ctxs_
[
output_scope_idx
]),
&
result_tensor
);
float
*
ct
=
result_tensor
.
data
<
float
>
();
float
*
ct
=
result_tensor
.
data
<
float
>
();
for
(
int64_t
j
=
0
;
j
<
f
::
product
(
kDims
);
++
j
)
{
for
(
int64_t
j
=
0
;
j
<
f
::
product
(
kDims
)
*
static_cast
<
int64_t
>
(
gpu_list_
.
size
());
++
j
)
{
ASSERT_NEAR
(
ct
[
j
],
send_vector
[
j
%
send_vector
.
size
()],
1e-5
);
ASSERT_NEAR
(
ct
[
j
],
send_vector
[
j
%
send_vector
.
size
()],
1e-5
);
}
}
}
}
...
...
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
浏览文件 @
1d756746
...
@@ -43,21 +43,21 @@ void NCCLAllReduceOpHandle::RunImpl() {
...
@@ -43,21 +43,21 @@ void NCCLAllReduceOpHandle::RunImpl() {
int
dtype
=
-
1
;
int
dtype
=
-
1
;
size_t
numel
=
0
;
size_t
numel
=
0
;
std
::
vector
<
LoDTensor
>
lod_tensors
;
std
::
vector
<
const
LoDTensor
*
>
lod_tensors
;
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
auto
*
s
=
local_scopes_
[
i
];
auto
*
s
=
local_scopes_
[
i
];
auto
&
local_scope
=
*
s
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
local_scope
=
*
s
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
lod_tensor
=
local_scope
.
FindVar
(
var_name
)
->
Get
<
LoDTensor
>
();
auto
&
lod_tensor
=
local_scope
.
FindVar
(
var_name
)
->
Get
<
LoDTensor
>
();
lod_tensors
.
emplace_back
(
lod_tensor
);
lod_tensors
.
emplace_back
(
&
lod_tensor
);
}
}
if
(
platform
::
is_gpu_place
(
lod_tensors
[
0
]
.
place
()))
{
if
(
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
std
::
vector
<
std
::
function
<
void
()
>>
all_reduce_calls
;
std
::
vector
<
std
::
function
<
void
()
>>
all_reduce_calls
;
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
auto
&
p
=
places_
[
i
];
auto
&
p
=
places_
[
i
];
auto
&
lod_tensor
=
lod_tensors
[
i
];
auto
&
lod_tensor
=
*
lod_tensors
[
i
];
void
*
buffer
=
const_cast
<
void
*>
(
lod_tensor
.
data
<
void
>
());
void
*
buffer
=
const_cast
<
void
*>
(
lod_tensor
.
data
<
void
>
());
if
(
dtype
==
-
1
)
{
if
(
dtype
==
-
1
)
{
...
@@ -93,7 +93,7 @@ void NCCLAllReduceOpHandle::RunImpl() {
...
@@ -93,7 +93,7 @@ void NCCLAllReduceOpHandle::RunImpl() {
// Reduce All Tensor to trg in CPU
// Reduce All Tensor to trg in CPU
ReduceLoDTensor
func
(
lod_tensors
,
&
trg
);
ReduceLoDTensor
func
(
lod_tensors
,
&
trg
);
VisitDataType
(
ToDataType
(
lod_tensors
[
0
]
.
type
()),
func
);
VisitDataType
(
ToDataType
(
lod_tensors
[
0
]
->
type
()),
func
);
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
auto
&
scope
=
auto
&
scope
=
...
...
paddle/fluid/framework/details/op_registry.h
浏览文件 @
1d756746
...
@@ -14,6 +14,9 @@ limitations under the License. */
...
@@ -14,6 +14,9 @@ limitations under the License. */
#pragma once
#pragma once
#include <string>
#include <tuple>
#include <vector>
#include "paddle/fluid/framework/grad_op_desc_maker.h"
#include "paddle/fluid/framework/grad_op_desc_maker.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/op_proto_maker.h"
...
...
paddle/fluid/framework/details/reduce_and_gather.h
浏览文件 @
1d756746
...
@@ -24,23 +24,23 @@ namespace framework {
...
@@ -24,23 +24,23 @@ namespace framework {
namespace
details
{
namespace
details
{
struct
ReduceLoDTensor
{
struct
ReduceLoDTensor
{
const
std
::
vector
<
LoDTensor
>
&
src_tensors_
;
const
std
::
vector
<
const
LoDTensor
*
>
&
src_tensors_
;
LoDTensor
&
dst_tensor_
;
LoDTensor
&
dst_tensor_
;
ReduceLoDTensor
(
const
std
::
vector
<
LoDTensor
>
&
src
,
LoDTensor
*
dst
)
ReduceLoDTensor
(
const
std
::
vector
<
const
LoDTensor
*
>
&
src
,
LoDTensor
*
dst
)
:
src_tensors_
(
src
),
dst_tensor_
(
*
dst
)
{}
:
src_tensors_
(
src
),
dst_tensor_
(
*
dst
)
{}
template
<
typename
T
>
template
<
typename
T
>
void
operator
()()
const
{
void
operator
()()
const
{
PADDLE_ENFORCE
(
!
src_tensors_
.
empty
());
PADDLE_ENFORCE
(
!
src_tensors_
.
empty
());
auto
&
t0
=
src_tensors_
[
0
];
auto
&
t0
=
*
src_tensors_
[
0
];
PADDLE_ENFORCE_NE
(
t0
.
numel
(),
0
);
PADDLE_ENFORCE_NE
(
t0
.
numel
(),
0
);
dst_tensor_
.
Resize
(
t0
.
dims
());
dst_tensor_
.
Resize
(
t0
.
dims
());
T
*
dst
=
dst_tensor_
.
mutable_data
<
T
>
(
platform
::
CPUPlace
());
T
*
dst
=
dst_tensor_
.
mutable_data
<
T
>
(
platform
::
CPUPlace
());
std
::
copy
(
t0
.
data
<
T
>
(),
t0
.
data
<
T
>
()
+
t0
.
numel
(),
dst
);
std
::
copy
(
t0
.
data
<
T
>
(),
t0
.
data
<
T
>
()
+
t0
.
numel
(),
dst
);
for
(
size_t
i
=
1
;
i
<
src_tensors_
.
size
();
++
i
)
{
for
(
size_t
i
=
1
;
i
<
src_tensors_
.
size
();
++
i
)
{
auto
&
t
=
src_tensors_
[
i
];
auto
&
t
=
*
src_tensors_
[
i
];
PADDLE_ENFORCE_EQ
(
t
.
dims
(),
t0
.
dims
());
PADDLE_ENFORCE_EQ
(
t
.
dims
(),
t0
.
dims
());
PADDLE_ENFORCE_EQ
(
t
.
type
(),
t0
.
type
());
PADDLE_ENFORCE_EQ
(
t
.
type
(),
t0
.
type
());
std
::
transform
(
t
.
data
<
T
>
(),
t
.
data
<
T
>
()
+
t
.
numel
(),
dst
,
dst
,
std
::
transform
(
t
.
data
<
T
>
(),
t
.
data
<
T
>
()
+
t
.
numel
(),
dst
,
dst
,
...
...
paddle/fluid/framework/details/reduce_op_handle.cc
浏览文件 @
1d756746
...
@@ -13,7 +13,9 @@
...
@@ -13,7 +13,9 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -21,85 +23,84 @@ namespace details {
...
@@ -21,85 +23,84 @@ namespace details {
void
ReduceOpHandle
::
RunImpl
()
{
void
ReduceOpHandle
::
RunImpl
()
{
// the input and output may have dummy var.
// the input and output may have dummy var.
std
::
vector
<
VarHandle
*>
in_var_handles
=
GetValidVarHandles
(
inputs_
);
auto
in_var_handles
=
DynamicCast
<
VarHandle
>
(
inputs_
);
std
::
vector
<
VarHandle
*>
out_var_handles
=
GetValidVarHandles
(
outputs_
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
places_
.
size
(),
in_var_handles
.
size
(),
places_
.
size
(),
"The number of output should equal to the number of places."
);
"The number of output should equal to the number of places."
);
PADDLE_ENFORCE_EQ
(
out_var_handles
.
size
(),
1
,
"The number of output should be one."
);
// Wait input done, this Wait is asynchronous operation
VarHandle
*
out_var_handle
;
WaitEvents
(
in_var_handles
);
{
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
outputs_
);
PADDLE_ENFORCE_EQ
(
out_var_handles
.
size
(),
1
,
"The number of output should be one."
);
out_var_handle
=
out_var_handles
.
front
();
}
// check in the same place
auto
in_0_handle
=
in_var_handles
[
0
];
auto
in_0_handle
=
in_var_handles
[
0
];
auto
pre_place
=
in_0_handle
->
place_
;
std
::
vector
<
const
Scope
*>
var_scopes
;
for
(
auto
*
s
:
local_scopes_
)
{
var_scopes
.
emplace_back
(
s
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
());
}
auto
pre_in_var
=
var_scopes
.
at
(
in_0_handle
->
scope_idx_
)
->
FindVar
(
in_0_handle
->
name_
);
PADDLE_ENFORCE_NOT_NULL
(
pre_in_var
);
// Wait input done, this Wait is asynchronous operation
WaitInputVarGenerated
(
in_var_handles
);
auto
pre_place
=
in_0_handle
->
place_
;
std
::
vector
<
platform
::
Place
>
in_places
;
std
::
vector
<
platform
::
Place
>
in_places
;
auto
pre_in_tensor
=
VariableVisitor
::
GetMutableTensor
(
pre_in_var
);
for
(
auto
*
in_handle
:
in_var_handles
)
{
for
(
auto
*
in_handle
:
in_var_handles
)
{
auto
in_p
=
in_handle
->
place_
;
auto
in_p
=
in_handle
->
place_
;
PADDLE_ENFORCE_EQ
(
in_p
.
which
(),
pre_place
.
which
(),
PADDLE_ENFORCE_EQ
(
in_p
.
which
(),
pre_place
.
which
(),
"Places must be all on CPU or all on CUDA."
);
"Places must be all on CPU or all on CUDA."
);
in_places
.
emplace_back
(
in_p
);
in_places
.
emplace_back
(
in_p
);
}
auto
out_var
=
local_scopes_
[
out_var_handles
[
0
]
->
scope_idx_
]
->
FindVar
(
auto
in_var
=
out_var_handles
[
0
]
->
name_
);
var_scopes
.
at
(
in_handle
->
scope_idx_
)
->
FindVar
(
in_handle
->
name_
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
auto
pre_in_var
=
auto
in_tensor
=
VariableVisitor
::
GetMutableTensor
(
in_var
);
local_scopes_
[
in_0_handle
->
scope_idx_
]
->
FindVar
(
in_0_handle
->
name_
);
PADDLE_ENFORCE_EQ
(
in_tensor
.
type
(),
pre_in_tensor
.
type
(),
"The type of input is not consistent."
);
if
(
pre_in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
}
auto
&
pre_in
=
pre_in_var
->
Get
<
framework
::
SelectedRows
>
();
std
::
vector
<
const
SelectedRows
*>
in_selected_rows
;
for
(
auto
*
in_handle
:
in_var_handles
)
{
auto
out_var
=
auto
in_var
=
var_scopes
.
at
(
out_var_handle
->
scope_idx_
)
->
FindVar
(
out_var_handle
->
name_
);
local_scopes_
.
at
(
in_handle
->
scope_idx_
)
->
FindVar
(
in_handle
->
name_
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
auto
&
in_sr
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
PADDLE_ENFORCE_EQ
(
in_sr
.
value
().
type
(),
pre_in
.
value
().
type
(),
if
(
pre_in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
"The type of input is not consistent."
);
std
::
vector
<
const
SelectedRows
*>
in_selected_rows
=
GetInputValues
<
SelectedRows
>
(
in_var_handles
,
var_scopes
);
in_selected_rows
.
emplace_back
(
&
in_sr
);
}
auto
trg
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
();
GatherSelectedRows
(
in_selected_rows
,
in_places
,
dev_ctxes_
,
GatherSelectedRows
(
in_selected_rows
,
in_places
,
dev_ctxes_
,
out_var_handles
[
0
]
->
place_
,
trg
);
out_var_handle
->
place_
,
out_var
->
GetMutable
<
framework
::
SelectedRows
>
());
}
else
{
}
else
{
auto
pre_in
=
pre_in_var
->
Get
<
framework
::
LoDTensor
>
();
std
::
vector
<
const
LoDTensor
*>
lod_tensors
=
std
::
vector
<
LoDTensor
>
lod_tensors
;
GetInputValues
<
LoDTensor
>
(
in_var_handles
,
var_scopes
);
// can be refined
for
(
auto
*
in_handle
:
in_var_handles
)
{
auto
in_var
=
local_scopes_
.
at
(
in_handle
->
scope_idx_
)
->
FindVar
(
in_handle
->
name_
);
auto
&
in_sr
=
in_var
->
Get
<
framework
::
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
in_sr
.
type
(),
pre_in
.
type
(),
"The type of input is not consistent."
);
lod_tensors
.
emplace_back
(
in_sr
);
}
auto
trg
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
trg
->
Resize
(
pre_in
.
dims
());
trg
->
mutable_data
(
out_var_handles
[
0
]
->
place_
,
pre_in
.
type
());
if
(
paddle
::
platform
::
is_cpu_place
(
pre_place
))
{
if
(
paddle
::
platform
::
is_cpu_place
(
pre_place
))
{
ReduceLoDTensor
func
(
lod_tensors
,
trg
);
ReduceLoDTensor
func
(
lod_tensors
,
VisitDataType
(
ToDataType
(
lod_tensors
[
0
].
type
()),
func
);
out_var
->
GetMutable
<
framework
::
LoDTensor
>
());
VisitDataType
(
ToDataType
(
lod_tensors
[
0
]
->
type
()),
func
);
}
else
if
(
paddle
::
platform
::
is_gpu_place
(
pre_place
))
{
}
else
if
(
paddle
::
platform
::
is_gpu_place
(
pre_place
))
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
auto
out_p
=
out_var_handles
[
0
]
->
place_
;
auto
pre_in
=
pre_in_var
->
Get
<
framework
::
LoDTensor
>
();
int
root
=
boost
::
get
<
platform
::
CUDAPlace
>
(
out_p
).
device
;
VariableVisitor
::
ShareDimsAndLoD
(
*
pre_in_var
,
out_var
);
VariableVisitor
::
GetMutableTensor
(
out_var
).
mutable_data
(
out_var_handle
->
place_
,
pre_in
.
type
());
auto
out_p
=
out_var_handle
->
place_
;
int
root
=
boost
::
get
<
platform
::
CUDAPlace
>
(
out_p
).
device
;
std
::
vector
<
std
::
function
<
void
()
>>
all_reduce_calls
;
std
::
vector
<
std
::
function
<
void
()
>>
all_reduce_calls
;
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
var_scopes
.
size
();
++
i
)
{
auto
&
p
=
in_places
[
i
];
auto
&
p
=
in_places
[
i
];
auto
&
lod_tensor
=
lod_tensors
[
i
];
auto
&
lod_tensor
=
*
lod_tensors
[
i
];
int
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
p
).
device
;
int
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
p
).
device
;
auto
&
nccl_ctx
=
nccl_ctxs_
->
at
(
dev_id
);
auto
&
nccl_ctx
=
nccl_ctxs_
->
at
(
dev_id
);
...
@@ -109,14 +110,16 @@ void ReduceOpHandle::RunImpl() {
...
@@ -109,14 +110,16 @@ void ReduceOpHandle::RunImpl() {
void
*
buffer
=
const_cast
<
void
*>
(
lod_tensor
.
data
<
void
>
());
void
*
buffer
=
const_cast
<
void
*>
(
lod_tensor
.
data
<
void
>
());
void
*
recvbuffer
=
nullptr
;
void
*
recvbuffer
=
nullptr
;
if
(
root
==
dev_id
)
{
if
(
root
==
dev_id
)
{
recvbuffer
=
trg
->
mutable_data
(
out_var_handles
[
0
]
->
place_
);
recvbuffer
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
()
->
mutable_data
(
out_var_handle
->
place_
);
}
}
int
type
=
platform
::
ToNCCLDataType
(
lod_tensor
.
type
());
all_reduce_calls
.
emplace_back
([
=
]
{
all_reduce_calls
.
emplace_back
([
=
]
{
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclReduce
(
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclReduce
(
buffer
,
recvbuffer
,
static_cast
<
size_t
>
(
lod_tensor
.
numel
()),
buffer
,
recvbuffer
,
static_cast
<
size_t
>
(
lod_tensor
.
numel
()),
platform
::
ToNCCLDataType
(
lod_tensor
.
type
()),
ncclSum
,
root
,
comm
,
static_cast
<
ncclDataType_t
>
(
type
),
ncclSum
,
root
,
comm
,
stream
));
stream
));
});
});
}
}
...
@@ -135,26 +138,31 @@ void ReduceOpHandle::RunImpl() {
...
@@ -135,26 +138,31 @@ void ReduceOpHandle::RunImpl() {
}
}
}
}
void
ReduceOpHandle
::
WaitEvents
(
template
<
typename
T
>
const
std
::
vector
<
VarHandle
*>
&
in_var_handles
)
{
std
::
vector
<
const
T
*>
ReduceOpHandle
::
GetInputValues
(
if
(
in_var_handles
[
0
]
->
generated_op_
)
{
const
std
::
vector
<
VarHandle
*>
&
in_var_handles
,
for
(
auto
*
in
:
in_var_handles
)
{
const
std
::
vector
<
const
Scope
*>
&
var_scopes
)
const
{
in_var_handles
[
0
]
->
generated_op_
->
Wait
(
dev_ctxes_
[
in
->
place_
]);
std
::
vector
<
const
T
*>
in_selected_rows
;
}
for
(
auto
*
in_handle
:
in_var_handles
)
{
auto
&
in_sr
=
var_scopes
.
at
(
in_handle
->
scope_idx_
)
->
FindVar
(
in_handle
->
name_
)
->
Get
<
T
>
();
in_selected_rows
.
emplace_back
(
&
in_sr
);
}
}
return
in_selected_rows
;
}
}
std
::
vector
<
VarHandle
*>
ReduceOpHandle
::
GetValidVarHandles
(
void
ReduceOpHandle
::
WaitInputVarGenerated
(
const
std
::
vector
<
VarHandle
Base
*>
&
input
s
)
{
const
std
::
vector
<
VarHandle
*>
&
in_var_handle
s
)
{
std
::
vector
<
VarHandle
*>
in_var_handles
;
for
(
auto
*
in
:
in_var_handles
)
{
for
(
auto
*
in
:
inputs
)
{
if
(
in
->
generated_op_
)
{
auto
*
in_handle
=
dynamic_cast
<
VarHandle
*>
(
in
);
for
(
auto
pair
:
dev_ctxes_
)
{
if
(
in_handle
)
{
in
->
generated_op_
->
Wait
(
pair
.
second
);
in_var_handles
.
push_back
(
in_handle
);
}
}
}
}
}
return
in_var_handles
;
}
}
std
::
string
ReduceOpHandle
::
Name
()
const
{
return
"reduce"
;
}
std
::
string
ReduceOpHandle
::
Name
()
const
{
return
"reduce"
;
}
}
// namespace details
}
// namespace details
}
// namespace framework
}
// namespace framework
...
...
paddle/fluid/framework/details/reduce_op_handle.h
浏览文件 @
1d756746
...
@@ -59,10 +59,13 @@ struct ReduceOpHandle : public OpHandleBase {
...
@@ -59,10 +59,13 @@ struct ReduceOpHandle : public OpHandleBase {
protected:
protected:
void
RunImpl
()
override
;
void
RunImpl
()
override
;
std
::
vector
<
VarHandle
*>
GetValidVarHandles
(
const
std
::
vector
<
VarHandleBase
*>
&
inputs
);
void
WaitEvents
(
const
std
::
vector
<
VarHandle
*>
&
in_var_handles
);
void
WaitInputVarGenerated
(
const
std
::
vector
<
VarHandle
*>
&
in_var_handles
);
template
<
typename
T
>
std
::
vector
<
const
T
*>
GetInputValues
(
const
std
::
vector
<
VarHandle
*>
&
in_var_handles
,
const
std
::
vector
<
const
Scope
*>
&
var_scopes
)
const
;
};
};
}
// namespace details
}
// namespace details
...
...
paddle/fluid/framework/details/reduce_op_handle_test.cc
浏览文件 @
1d756746
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -30,6 +29,7 @@ struct TestReduceOpHandle {
...
@@ -30,6 +29,7 @@ struct TestReduceOpHandle {
bool
use_gpu_
;
bool
use_gpu_
;
Scope
g_scope_
;
Scope
g_scope_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
Scope
*>
param_scopes_
;
std
::
unique_ptr
<
OpHandleBase
>
op_handle_
;
std
::
unique_ptr
<
OpHandleBase
>
op_handle_
;
std
::
vector
<
std
::
unique_ptr
<
VarHandleBase
>>
vars_
;
std
::
vector
<
std
::
unique_ptr
<
VarHandleBase
>>
vars_
;
std
::
vector
<
p
::
Place
>
gpu_list_
;
std
::
vector
<
p
::
Place
>
gpu_list_
;
...
@@ -83,12 +83,18 @@ struct TestReduceOpHandle {
...
@@ -83,12 +83,18 @@ struct TestReduceOpHandle {
}
}
}
}
void
InitReduceOp
(
size_t
input_scope_idx
)
{
void
InitReduceOp
(
size_t
out_scope_idx
)
{
// init scope
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
local_scopes_
.
push_back
(
&
(
g_scope_
.
NewScope
()));
local_scopes_
.
push_back
(
&
(
g_scope_
.
NewScope
()));
local_scopes_
[
j
]
->
Var
(
"out"
);
Scope
&
local_scope
=
local_scopes_
.
back
()
->
NewScope
();
*
local_scopes_
.
back
()
->
Var
(
details
::
kLocalExecScopeName
)
->
GetMutable
<
Scope
*>
()
=
&
local_scope
;
local_scope
.
Var
(
"input"
);
param_scopes_
.
emplace_back
(
&
local_scope
);
}
}
local_scopes_
[
input_scope_idx
]
->
Var
(
"inp
ut"
);
param_scopes_
[
out_scope_idx
]
->
Var
(
"o
ut"
);
if
(
use_gpu_
)
{
if
(
use_gpu_
)
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
@@ -106,6 +112,7 @@ struct TestReduceOpHandle {
...
@@ -106,6 +112,7 @@ struct TestReduceOpHandle {
#endif
#endif
}
}
// init op handle
// add input
// add input
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
if
(
!
use_gpu_
)
{
if
(
!
use_gpu_
)
{
...
@@ -126,7 +133,7 @@ struct TestReduceOpHandle {
...
@@ -126,7 +133,7 @@ struct TestReduceOpHandle {
// add output
// add output
auto
*
out_var_handle
=
auto
*
out_var_handle
=
new
VarHandle
(
2
,
input_scope_idx
,
"out"
,
gpu_list_
[
inp
ut_scope_idx
]);
new
VarHandle
(
2
,
out_scope_idx
,
"out"
,
gpu_list_
[
o
ut_scope_idx
]);
vars_
.
emplace_back
(
out_var_handle
);
vars_
.
emplace_back
(
out_var_handle
);
op_handle_
->
AddOutput
(
out_var_handle
);
op_handle_
->
AddOutput
(
out_var_handle
);
...
@@ -148,7 +155,8 @@ struct TestReduceOpHandle {
...
@@ -148,7 +155,8 @@ struct TestReduceOpHandle {
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
++
input_scope_idx
)
{
++
input_scope_idx
)
{
auto
in_var
=
local_scopes_
[
input_scope_idx
]
->
Var
(
"input"
);
auto
in_var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
"input"
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
value
=
in_selected_rows
->
mutable_value
();
auto
value
=
in_selected_rows
->
mutable_value
();
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
...
@@ -161,10 +169,11 @@ struct TestReduceOpHandle {
...
@@ -161,10 +169,11 @@ struct TestReduceOpHandle {
value
->
Resize
(
kDims
);
value
->
Resize
(
kDims
);
}
}
auto
out_var
=
local_scopes_
[
output_scope_idx
]
->
Var
(
"out"
);
auto
out_var
=
param_scopes_
[
output_scope_idx
]
->
FindVar
(
"out"
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
auto
out_selected_rows
=
out_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
out_selected_rows
=
out_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_var
=
local_scopes_
[
output_scope_idx
]
->
Var
(
"input"
);
auto
in_var
=
param_scopes_
[
output_scope_idx
]
->
Find
Var
(
"input"
);
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
out_selected_rows
->
mutable_value
()
->
ShareDataWith
(
out_selected_rows
->
mutable_value
()
->
ShareDataWith
(
...
@@ -202,7 +211,8 @@ struct TestReduceOpHandle {
...
@@ -202,7 +211,8 @@ struct TestReduceOpHandle {
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
++
input_scope_idx
)
{
++
input_scope_idx
)
{
auto
in_var
=
local_scopes_
[
input_scope_idx
]
->
Var
(
"input"
);
auto
in_var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
"input"
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
auto
in_lod_tensor
=
in_var
->
GetMutable
<
f
::
LoDTensor
>
();
auto
in_lod_tensor
=
in_var
->
GetMutable
<
f
::
LoDTensor
>
();
in_lod_tensor
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
in_lod_tensor
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
in_lod_tensor
->
set_lod
(
lod
);
in_lod_tensor
->
set_lod
(
lod
);
...
@@ -211,10 +221,11 @@ struct TestReduceOpHandle {
...
@@ -211,10 +221,11 @@ struct TestReduceOpHandle {
send_vector
,
*
(
ctxs_
[
input_scope_idx
]),
in_lod_tensor
);
send_vector
,
*
(
ctxs_
[
input_scope_idx
]),
in_lod_tensor
);
}
}
auto
out_var
=
local_scopes_
[
output_scope_idx
]
->
Var
(
"out"
);
auto
out_var
=
param_scopes_
[
output_scope_idx
]
->
FindVar
(
"out"
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
auto
out_lodtensor
=
out_var
->
GetMutable
<
f
::
LoDTensor
>
();
auto
out_lodtensor
=
out_var
->
GetMutable
<
f
::
LoDTensor
>
();
auto
in_var
=
local_scopes_
[
output_scope_idx
]
->
Var
(
"input"
);
auto
in_var
=
param_scopes_
[
output_scope_idx
]
->
Find
Var
(
"input"
);
auto
in_lodtensor
=
in_var
->
Get
<
f
::
LoDTensor
>
();
auto
in_lodtensor
=
in_var
->
Get
<
f
::
LoDTensor
>
();
out_lodtensor
->
ShareDataWith
(
in_lodtensor
);
out_lodtensor
->
ShareDataWith
(
in_lodtensor
);
...
@@ -239,34 +250,34 @@ struct TestReduceOpHandle {
...
@@ -239,34 +250,34 @@ struct TestReduceOpHandle {
TEST
(
ReduceTester
,
TestCPUReduceTestSelectedRows
)
{
TEST
(
ReduceTester
,
TestCPUReduceTestSelectedRows
)
{
TestReduceOpHandle
test_op
;
TestReduceOpHandle
test_op
;
size_t
inp
ut_scope_idx
=
0
;
size_t
o
ut_scope_idx
=
0
;
test_op
.
InitCtxOnGpu
(
false
);
test_op
.
InitCtxOnGpu
(
false
);
test_op
.
InitReduceOp
(
inp
ut_scope_idx
);
test_op
.
InitReduceOp
(
o
ut_scope_idx
);
test_op
.
TestReduceSelectedRows
(
inp
ut_scope_idx
);
test_op
.
TestReduceSelectedRows
(
o
ut_scope_idx
);
}
}
TEST
(
ReduceTester
,
TestCPUReduceTestLodTensor
)
{
TEST
(
ReduceTester
,
TestCPUReduceTestLodTensor
)
{
TestReduceOpHandle
test_op
;
TestReduceOpHandle
test_op
;
size_t
inp
ut_scope_idx
=
0
;
size_t
o
ut_scope_idx
=
0
;
test_op
.
InitCtxOnGpu
(
false
);
test_op
.
InitCtxOnGpu
(
false
);
test_op
.
InitReduceOp
(
inp
ut_scope_idx
);
test_op
.
InitReduceOp
(
o
ut_scope_idx
);
test_op
.
TestReduceLodTensors
(
inp
ut_scope_idx
);
test_op
.
TestReduceLodTensors
(
o
ut_scope_idx
);
}
}
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
TEST
(
ReduceTester
,
TestGPUReduceTestSelectedRows
)
{
TEST
(
ReduceTester
,
TestGPUReduceTestSelectedRows
)
{
TestReduceOpHandle
test_op
;
TestReduceOpHandle
test_op
;
size_t
inp
ut_scope_idx
=
0
;
size_t
o
ut_scope_idx
=
0
;
test_op
.
InitCtxOnGpu
(
true
);
test_op
.
InitCtxOnGpu
(
true
);
test_op
.
InitReduceOp
(
inp
ut_scope_idx
);
test_op
.
InitReduceOp
(
o
ut_scope_idx
);
test_op
.
TestReduceSelectedRows
(
inp
ut_scope_idx
);
test_op
.
TestReduceSelectedRows
(
o
ut_scope_idx
);
}
}
TEST
(
ReduceTester
,
TestGPUReduceTestLodTensor
)
{
TEST
(
ReduceTester
,
TestGPUReduceTestLodTensor
)
{
TestReduceOpHandle
test_op
;
TestReduceOpHandle
test_op
;
size_t
inp
ut_scope_idx
=
0
;
size_t
o
ut_scope_idx
=
0
;
test_op
.
InitCtxOnGpu
(
true
);
test_op
.
InitCtxOnGpu
(
true
);
test_op
.
InitReduceOp
(
inp
ut_scope_idx
);
test_op
.
InitReduceOp
(
o
ut_scope_idx
);
test_op
.
TestReduceLodTensors
(
inp
ut_scope_idx
);
test_op
.
TestReduceLodTensors
(
o
ut_scope_idx
);
}
}
#endif
#endif
...
...
paddle/fluid/framework/op_registry_test.cc
浏览文件 @
1d756746
...
@@ -202,8 +202,9 @@ class CosineOpComplete : public paddle::framework::CosineOp {
...
@@ -202,8 +202,9 @@ class CosineOpComplete : public paddle::framework::CosineOp {
};
};
TEST
(
OperatorRegistrar
,
Test
)
{
TEST
(
OperatorRegistrar
,
Test
)
{
using
namespace
paddle
::
framework
;
paddle
::
framework
::
OperatorRegistrar
<
OperatorRegistrar
<
CosineOpComplete
,
CosineOpProtoAndCheckerMaker
>
reg
(
"cos"
);
CosineOpComplete
,
paddle
::
framework
::
CosineOpProtoAndCheckerMaker
>
reg
(
"cos"
);
}
}
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/framework/operator_test.cc
浏览文件 @
1d756746
...
@@ -226,10 +226,8 @@ REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel,
...
@@ -226,10 +226,8 @@ REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel,
// test with multi inputs
// test with multi inputs
TEST
(
OpKernel
,
multi_inputs
)
{
TEST
(
OpKernel
,
multi_inputs
)
{
using
namespace
paddle
::
framework
;
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
(
true
);
proto
::
OpDesc
op_desc
;
p
addle
::
framework
::
p
roto
::
OpDesc
op_desc
;
op_desc
.
set_type
(
"op_multi_inputs_with_kernel"
);
op_desc
.
set_type
(
"op_multi_inputs_with_kernel"
);
BuildVar
(
"xs"
,
{
"x0"
,
"x1"
,
"x2"
},
op_desc
.
add_inputs
());
BuildVar
(
"xs"
,
{
"x0"
,
"x1"
,
"x2"
},
op_desc
.
add_inputs
());
...
@@ -243,12 +241,12 @@ TEST(OpKernel, multi_inputs) {
...
@@ -243,12 +241,12 @@ TEST(OpKernel, multi_inputs) {
paddle
::
platform
::
CPUPlace
cpu_place
;
paddle
::
platform
::
CPUPlace
cpu_place
;
paddle
::
framework
::
Scope
scope
;
paddle
::
framework
::
Scope
scope
;
scope
.
Var
(
"x0"
)
->
GetMutable
<
LoDTensor
>
();
scope
.
Var
(
"x0"
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
scope
.
Var
(
"x1"
)
->
GetMutable
<
LoDTensor
>
();
scope
.
Var
(
"x1"
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
scope
.
Var
(
"x2"
)
->
GetMutable
<
LoDTensor
>
();
scope
.
Var
(
"x2"
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
scope
.
Var
(
"k0"
)
->
GetMutable
<
LoDTensor
>
();
scope
.
Var
(
"k0"
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
scope
.
Var
(
"y0"
)
->
GetMutable
<
LoDTensor
>
();
scope
.
Var
(
"y0"
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
scope
.
Var
(
"y1"
)
->
GetMutable
<
LoDTensor
>
();
scope
.
Var
(
"y1"
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
auto
op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
op_desc
);
auto
op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
op_desc
);
op
->
Run
(
scope
,
cpu_place
);
op
->
Run
(
scope
,
cpu_place
);
...
...
paddle/fluid/framework/program_desc.cc
浏览文件 @
1d756746
...
@@ -27,10 +27,14 @@ BlockDesc *ProgramDesc::AppendBlock(const BlockDesc &parent) {
...
@@ -27,10 +27,14 @@ BlockDesc *ProgramDesc::AppendBlock(const BlockDesc &parent) {
return
blocks_
.
back
().
get
();
return
blocks_
.
back
().
get
();
}
}
proto
::
ProgramDesc
*
ProgramDesc
::
Proto
()
{
void
ProgramDesc
::
Flush
()
{
for
(
auto
&
block
:
blocks_
)
{
for
(
auto
&
block
:
blocks_
)
{
block
->
Flush
();
block
->
Flush
();
}
}
}
proto
::
ProgramDesc
*
ProgramDesc
::
Proto
()
{
Flush
();
return
&
desc_
;
return
&
desc_
;
}
}
...
...
paddle/fluid/framework/program_desc.h
浏览文件 @
1d756746
...
@@ -51,6 +51,8 @@ class ProgramDesc {
...
@@ -51,6 +51,8 @@ class ProgramDesc {
size_t
Size
()
const
{
return
blocks_
.
size
();
}
size_t
Size
()
const
{
return
blocks_
.
size
();
}
void
Flush
();
proto
::
ProgramDesc
*
Proto
();
proto
::
ProgramDesc
*
Proto
();
// The output variable of feed_op is referenced as feed_target.
// The output variable of feed_op is referenced as feed_target.
...
...
paddle/fluid/framework/threadpool_test.cc
浏览文件 @
1d756746
...
@@ -15,14 +15,14 @@ limitations under the License. */
...
@@ -15,14 +15,14 @@ limitations under the License. */
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <atomic>
#include <atomic>
#include "threadpool.h"
#include "
paddle/fluid/framework/
threadpool.h"
namespace
framework
=
paddle
::
framework
;
namespace
framework
=
paddle
::
framework
;
void
do_sum
(
framework
::
ThreadPool
*
pool
,
std
::
atomic
<
int
>
&
sum
,
int
cnt
)
{
void
do_sum
(
framework
::
ThreadPool
*
pool
,
std
::
atomic
<
int
>
*
sum
,
int
cnt
)
{
std
::
vector
<
std
::
future
<
void
>>
fs
;
std
::
vector
<
std
::
future
<
void
>>
fs
;
for
(
int
i
=
0
;
i
<
cnt
;
++
i
)
{
for
(
int
i
=
0
;
i
<
cnt
;
++
i
)
{
fs
.
push_back
(
framework
::
Async
([
&
sum
]()
{
sum
.
fetch_add
(
1
);
}));
fs
.
push_back
(
framework
::
Async
([
sum
]()
{
sum
->
fetch_add
(
1
);
}));
}
}
}
}
...
@@ -46,7 +46,7 @@ TEST(ThreadPool, ConcurrentRun) {
...
@@ -46,7 +46,7 @@ TEST(ThreadPool, ConcurrentRun) {
int
n
=
50
;
int
n
=
50
;
// sum = (n * (n + 1)) / 2
// sum = (n * (n + 1)) / 2
for
(
int
i
=
1
;
i
<=
n
;
++
i
)
{
for
(
int
i
=
1
;
i
<=
n
;
++
i
)
{
std
::
thread
t
(
do_sum
,
pool
,
std
::
ref
(
sum
)
,
i
);
std
::
thread
t
(
do_sum
,
pool
,
&
sum
,
i
);
threads
.
push_back
(
std
::
move
(
t
));
threads
.
push_back
(
std
::
move
(
t
));
}
}
for
(
auto
&
t
:
threads
)
{
for
(
auto
&
t
:
threads
)
{
...
...
paddle/fluid/inference/io.cc
浏览文件 @
1d756746
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/inference/io.h"
#include <algorithm>
#include <fstream>
#include <fstream>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
...
@@ -27,14 +28,14 @@ namespace inference {
...
@@ -27,14 +28,14 @@ namespace inference {
// linking the inference shared library.
// linking the inference shared library.
void
Init
(
bool
init_p2p
)
{
framework
::
InitDevices
(
init_p2p
);
}
void
Init
(
bool
init_p2p
)
{
framework
::
InitDevices
(
init_p2p
);
}
void
ReadBinaryFile
(
const
std
::
string
&
filename
,
std
::
string
&
contents
)
{
void
ReadBinaryFile
(
const
std
::
string
&
filename
,
std
::
string
*
contents
)
{
std
::
ifstream
fin
(
filename
,
std
::
ios
::
in
|
std
::
ios
::
binary
);
std
::
ifstream
fin
(
filename
,
std
::
ios
::
in
|
std
::
ios
::
binary
);
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
fin
),
"Cannot open file %s"
,
filename
);
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
fin
),
"Cannot open file %s"
,
filename
);
fin
.
seekg
(
0
,
std
::
ios
::
end
);
fin
.
seekg
(
0
,
std
::
ios
::
end
);
contents
.
clear
();
contents
->
clear
();
contents
.
resize
(
fin
.
tellg
());
contents
->
resize
(
fin
.
tellg
());
fin
.
seekg
(
0
,
std
::
ios
::
beg
);
fin
.
seekg
(
0
,
std
::
ios
::
beg
);
fin
.
read
(
&
contents
[
0
],
contents
.
size
());
fin
.
read
(
&
(
contents
->
at
(
0
)),
contents
->
size
());
fin
.
close
();
fin
.
close
();
}
}
...
@@ -47,7 +48,7 @@ bool IsPersistable(const framework::VarDesc* var) {
...
@@ -47,7 +48,7 @@ bool IsPersistable(const framework::VarDesc* var) {
return
false
;
return
false
;
}
}
void
LoadPersistables
(
framework
::
Executor
&
executor
,
framework
::
Scope
&
scope
,
void
LoadPersistables
(
framework
::
Executor
*
executor
,
framework
::
Scope
*
scope
,
const
framework
::
ProgramDesc
&
main_program
,
const
framework
::
ProgramDesc
&
main_program
,
const
std
::
string
&
dirname
,
const
std
::
string
&
dirname
,
const
std
::
string
&
param_filename
)
{
const
std
::
string
&
param_filename
)
{
...
@@ -92,18 +93,18 @@ void LoadPersistables(framework::Executor& executor, framework::Scope& scope,
...
@@ -92,18 +93,18 @@ void LoadPersistables(framework::Executor& executor, framework::Scope& scope,
op
->
CheckAttrs
();
op
->
CheckAttrs
();
}
}
executor
.
Run
(
*
load_program
,
&
scope
,
0
,
true
,
true
);
executor
->
Run
(
*
load_program
,
scope
,
0
,
true
,
true
);
delete
load_program
;
delete
load_program
;
}
}
std
::
unique_ptr
<
framework
::
ProgramDesc
>
Load
(
framework
::
Executor
&
executor
,
std
::
unique_ptr
<
framework
::
ProgramDesc
>
Load
(
framework
::
Executor
*
executor
,
framework
::
Scope
&
scope
,
framework
::
Scope
*
scope
,
const
std
::
string
&
dirname
)
{
const
std
::
string
&
dirname
)
{
std
::
string
model_filename
=
dirname
+
"/__model__"
;
std
::
string
model_filename
=
dirname
+
"/__model__"
;
std
::
string
program_desc_str
;
std
::
string
program_desc_str
;
VLOG
(
3
)
<<
"loading model from "
<<
model_filename
;
VLOG
(
3
)
<<
"loading model from "
<<
model_filename
;
ReadBinaryFile
(
model_filename
,
program_desc_str
);
ReadBinaryFile
(
model_filename
,
&
program_desc_str
);
std
::
unique_ptr
<
framework
::
ProgramDesc
>
main_program
(
std
::
unique_ptr
<
framework
::
ProgramDesc
>
main_program
(
new
framework
::
ProgramDesc
(
program_desc_str
));
new
framework
::
ProgramDesc
(
program_desc_str
));
...
@@ -113,11 +114,11 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor& executor,
...
@@ -113,11 +114,11 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor& executor,
}
}
std
::
unique_ptr
<
framework
::
ProgramDesc
>
Load
(
std
::
unique_ptr
<
framework
::
ProgramDesc
>
Load
(
framework
::
Executor
&
executor
,
framework
::
Scope
&
scope
,
framework
::
Executor
*
executor
,
framework
::
Scope
*
scope
,
const
std
::
string
&
prog_filename
,
const
std
::
string
&
param_filename
)
{
const
std
::
string
&
prog_filename
,
const
std
::
string
&
param_filename
)
{
std
::
string
model_filename
=
prog_filename
;
std
::
string
model_filename
=
prog_filename
;
std
::
string
program_desc_str
;
std
::
string
program_desc_str
;
ReadBinaryFile
(
model_filename
,
program_desc_str
);
ReadBinaryFile
(
model_filename
,
&
program_desc_str
);
std
::
unique_ptr
<
framework
::
ProgramDesc
>
main_program
(
std
::
unique_ptr
<
framework
::
ProgramDesc
>
main_program
(
new
framework
::
ProgramDesc
(
program_desc_str
));
new
framework
::
ProgramDesc
(
program_desc_str
));
...
...
paddle/fluid/inference/io.h
浏览文件 @
1d756746
...
@@ -27,17 +27,17 @@ namespace inference {
...
@@ -27,17 +27,17 @@ namespace inference {
void
Init
(
bool
init_p2p
);
void
Init
(
bool
init_p2p
);
void
LoadPersistables
(
framework
::
Executor
&
executor
,
framework
::
Scope
&
scope
,
void
LoadPersistables
(
framework
::
Executor
*
executor
,
framework
::
Scope
*
scope
,
const
framework
::
ProgramDesc
&
main_program
,
const
framework
::
ProgramDesc
&
main_program
,
const
std
::
string
&
dirname
,
const
std
::
string
&
dirname
,
const
std
::
string
&
param_filename
);
const
std
::
string
&
param_filename
);
std
::
unique_ptr
<
framework
::
ProgramDesc
>
Load
(
framework
::
Executor
&
executor
,
std
::
unique_ptr
<
framework
::
ProgramDesc
>
Load
(
framework
::
Executor
*
executor
,
framework
::
Scope
&
scope
,
framework
::
Scope
*
scope
,
const
std
::
string
&
dirname
);
const
std
::
string
&
dirname
);
std
::
unique_ptr
<
framework
::
ProgramDesc
>
Load
(
framework
::
Executor
&
executor
,
std
::
unique_ptr
<
framework
::
ProgramDesc
>
Load
(
framework
::
Executor
*
executor
,
framework
::
Scope
&
scope
,
framework
::
Scope
*
scope
,
const
std
::
string
&
prog_filename
,
const
std
::
string
&
prog_filename
,
const
std
::
string
&
param_filename
);
const
std
::
string
&
param_filename
);
...
...
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
1d756746
...
@@ -133,12 +133,12 @@ void TestInference(const std::string& dirname,
...
@@ -133,12 +133,12 @@ void TestInference(const std::string& dirname,
std
::
string
prog_filename
=
"__model_combined__"
;
std
::
string
prog_filename
=
"__model_combined__"
;
std
::
string
param_filename
=
"__params_combined__"
;
std
::
string
param_filename
=
"__params_combined__"
;
inference_program
=
paddle
::
inference
::
Load
(
inference_program
=
paddle
::
inference
::
Load
(
executor
,
*
scope
,
dirname
+
"/"
+
prog_filename
,
&
executor
,
scope
,
dirname
+
"/"
+
prog_filename
,
dirname
+
"/"
+
param_filename
);
dirname
+
"/"
+
param_filename
);
}
else
{
}
else
{
// Parameters are saved in separate files sited in the specified
// Parameters are saved in separate files sited in the specified
// `dirname`.
// `dirname`.
inference_program
=
paddle
::
inference
::
Load
(
executor
,
*
scope
,
dirname
);
inference_program
=
paddle
::
inference
::
Load
(
&
executor
,
scope
,
dirname
);
}
}
}
}
// Disable the profiler and print the timing information
// Disable the profiler and print the timing information
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
1d756746
...
@@ -163,7 +163,12 @@ function(op_library TARGET)
...
@@ -163,7 +163,12 @@ function(op_library TARGET)
# pybind USE_OP
# pybind USE_OP
if
(
${
pybind_flag
}
EQUAL 0
)
if
(
${
pybind_flag
}
EQUAL 0
)
# NOTE(*): activation use macro to regist the kernels, set use_op manually.
if
(
${
TARGET
}
STREQUAL
"activation"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(relu);
\n
"
)
else
()
file
(
APPEND
${
pybind_file
}
"USE_OP(
${
TARGET
}
);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(
${
TARGET
}
);
\n
"
)
endif
()
endif
()
endif
()
endfunction
()
endfunction
()
...
...
paddle/fluid/operators/activation_op.cc
浏览文件 @
1d756746
此差异已折叠。
点击以展开。
paddle/fluid/operators/activation_op.cu
浏览文件 @
1d756746
...
@@ -9,7 +9,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -9,7 +9,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/float16.h"
...
...
paddle/fluid/operators/activation_op.h
浏览文件 @
1d756746
...
@@ -10,6 +10,9 @@ See the License for the specific language governing permissions and
...
@@ -10,6 +10,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <glog/logging.h>
#include <string>
#include <unordered_set>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
...
@@ -25,6 +28,16 @@ limitations under the License. */
...
@@ -25,6 +28,16 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
/* Use ugly global variable, for the using in python layer side
Please refer to the layer_helper.py and get the details.
*/
static
std
::
unordered_set
<
std
::
string
>
InplaceOpSet
=
{
"sigmoid"
,
"exp"
,
"relu"
,
"tanh"
,
"sqrt"
,
"ceil"
,
"floor"
,
"reciprocal"
,
"relu6"
,
"soft_relu"
,
"hard_sigmoid"
,
};
static
bool
IsInplace
(
std
::
string
op
)
{
return
InplaceOpSet
.
count
(
op
);
}
template
<
typename
DeviceContext
,
typename
Functor
>
template
<
typename
DeviceContext
,
typename
Functor
>
class
ActivationKernel
class
ActivationKernel
:
public
framework
::
OpKernel
<
typename
Functor
::
ELEMENT_TYPE
>
{
:
public
framework
::
OpKernel
<
typename
Functor
::
ELEMENT_TYPE
>
{
...
@@ -60,7 +73,6 @@ class ActivationGradKernel
...
@@ -60,7 +73,6 @@ class ActivationGradKernel
public:
public:
using
T
=
typename
Functor
::
ELEMENT_TYPE
;
using
T
=
typename
Functor
::
ELEMENT_TYPE
;
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
X
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
Out
=
context
.
Input
<
framework
::
Tensor
>
(
"Out"
);
auto
*
Out
=
context
.
Input
<
framework
::
Tensor
>
(
"Out"
);
auto
*
dOut
=
auto
*
dOut
=
context
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
context
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
...
@@ -68,7 +80,6 @@ class ActivationGradKernel
...
@@ -68,7 +80,6 @@ class ActivationGradKernel
dX
->
mutable_data
<
T
>
(
context
.
GetPlace
());
dX
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
dout
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dOut
);
auto
dout
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dOut
);
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
X
);
auto
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
Out
);
auto
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
Out
);
auto
dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dX
);
auto
dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dX
);
auto
*
place
=
auto
*
place
=
...
@@ -78,7 +89,16 @@ class ActivationGradKernel
...
@@ -78,7 +89,16 @@ class ActivationGradKernel
for
(
auto
&
attr
:
attrs
)
{
for
(
auto
&
attr
:
attrs
)
{
*
attr
.
second
=
context
.
Attr
<
float
>
(
attr
.
first
);
*
attr
.
second
=
context
.
Attr
<
float
>
(
attr
.
first
);
}
}
functor
(
*
place
,
x
,
out
,
dout
,
dx
);
bool
inplace
=
functor
.
Inplace
();
if
(
!
inplace
)
{
auto
*
X
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
X
);
functor
(
*
place
,
x
,
out
,
dout
,
dx
);
}
else
{
VLOG
(
10
)
<<
" Inplace activation "
;
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dX
);
functor
(
*
place
,
x
,
out
,
dout
,
dx
);
}
}
}
};
};
...
@@ -89,6 +109,14 @@ struct BaseActivationFunctor {
...
@@ -89,6 +109,14 @@ struct BaseActivationFunctor {
using
AttrPair
=
std
::
vector
<
std
::
pair
<
const
char
*
,
float
*>>
;
using
AttrPair
=
std
::
vector
<
std
::
pair
<
const
char
*
,
float
*>>
;
AttrPair
GetAttrs
()
{
return
AttrPair
();
}
AttrPair
GetAttrs
()
{
return
AttrPair
();
}
/* NOTE(*): Output reuse X memory if X is not dependented by its Gradient.
For example, sigmoid op's gradient didn't involve x, so its output can
reuse
input memory. But abs op's gradient use x, it can not be inplaced.
gradient did use x.
*/
bool
Inplace
()
const
{
return
false
;
}
};
};
// sigmoid(x) = 1 / (1 + exp(-x))
// sigmoid(x) = 1 / (1 + exp(-x))
...
@@ -102,6 +130,7 @@ struct SigmoidFunctor : public BaseActivationFunctor<T> {
...
@@ -102,6 +130,7 @@ struct SigmoidFunctor : public BaseActivationFunctor<T> {
template
<
typename
T
>
template
<
typename
T
>
struct
SigmoidGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
struct
SigmoidGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
bool
Inplace
()
const
{
return
IsInplace
(
"sigmoid"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
...
@@ -156,6 +185,7 @@ struct ExpFunctor : public BaseActivationFunctor<T> {
...
@@ -156,6 +185,7 @@ struct ExpFunctor : public BaseActivationFunctor<T> {
template
<
typename
T
>
template
<
typename
T
>
struct
ExpGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
struct
ExpGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
bool
Inplace
()
const
{
return
IsInplace
(
"exp"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
...
@@ -174,10 +204,11 @@ struct ReluFunctor : public BaseActivationFunctor<T> {
...
@@ -174,10 +204,11 @@ struct ReluFunctor : public BaseActivationFunctor<T> {
template
<
typename
T
>
template
<
typename
T
>
struct
ReluGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
struct
ReluGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
bool
Inplace
()
const
{
return
IsInplace
(
"relu"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
(
x
>
static_cast
<
T
>
(
0
)).
template
cast
<
T
>();
dx
.
device
(
d
)
=
dout
*
(
out
>
static_cast
<
T
>
(
0
)).
template
cast
<
T
>();
}
}
};
};
...
@@ -192,6 +223,7 @@ struct TanhFunctor : public BaseActivationFunctor<T> {
...
@@ -192,6 +223,7 @@ struct TanhFunctor : public BaseActivationFunctor<T> {
template
<
typename
T
>
template
<
typename
T
>
struct
TanhGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
struct
TanhGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
bool
Inplace
()
const
{
return
IsInplace
(
"tanh"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
...
@@ -297,6 +329,7 @@ struct SqrtFunctor : public BaseActivationFunctor<T> {
...
@@ -297,6 +329,7 @@ struct SqrtFunctor : public BaseActivationFunctor<T> {
template
<
typename
T
>
template
<
typename
T
>
struct
SqrtGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
struct
SqrtGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
bool
Inplace
()
const
{
return
IsInplace
(
"sqrt"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
...
@@ -316,10 +349,11 @@ struct CeilFunctor : public BaseActivationFunctor<T> {
...
@@ -316,10 +349,11 @@ struct CeilFunctor : public BaseActivationFunctor<T> {
template
<
typename
T
>
template
<
typename
T
>
struct
ZeroGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
struct
ZeroGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
bool
Inplace
()
const
{
return
IsInplace
(
"ceil"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
static_cast
<
T
>
(
0
)
/
x
;
dx
.
device
(
d
)
=
static_cast
<
T
>
(
0
)
/
out
;
}
}
};
};
...
@@ -432,6 +466,7 @@ struct ReciprocalFunctor : public BaseActivationFunctor<T> {
...
@@ -432,6 +466,7 @@ struct ReciprocalFunctor : public BaseActivationFunctor<T> {
template
<
typename
T
>
template
<
typename
T
>
struct
ReciprocalGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
struct
ReciprocalGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
bool
Inplace
()
const
{
return
IsInplace
(
"reciprocal"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
...
@@ -531,12 +566,14 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
...
@@ -531,12 +566,14 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
return
{{
"threshold"
,
&
threshold
}};
}
}
bool
Inplace
()
const
{
return
IsInplace
(
"relu6"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
dx
.
device
(
d
)
=
((
x
>
static_cast
<
T
>
(
0
))
*
(
x
<
static_cast
<
T
>
(
threshold
)))
dout
*
.
template
cast
<
T
>();
((
out
>
static_cast
<
T
>
(
0
))
*
(
out
<
static_cast
<
T
>
(
threshold
)))
.
template
cast
<
T
>();
}
}
};
};
...
@@ -611,11 +648,12 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
...
@@ -611,11 +648,12 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
return
{{
"threshold"
,
&
threshold
}};
}
}
bool
Inplace
()
const
{
return
IsInplace
(
"soft_relu"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
tmp
=
static_cast
<
T
>
(
threshold
);
auto
tmp
=
static_cast
<
T
>
(
threshold
);
auto
temp
=
((
x
>
-
tmp
)
*
(
x
<
tmp
)).
template
cast
<
T
>().
eval
();
auto
temp
=
((
out
>
-
tmp
)
*
(
out
<
tmp
)).
template
cast
<
T
>().
eval
();
dx
.
device
(
d
)
=
dout
*
(
static_cast
<
T
>
(
1
)
-
(
-
out
).
exp
())
*
temp
;
dx
.
device
(
d
)
=
dout
*
(
static_cast
<
T
>
(
1
)
-
(
-
out
).
exp
())
*
temp
;
}
}
};
};
...
@@ -791,7 +829,7 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
...
@@ -791,7 +829,7 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"slope"
,
&
slope
},
{
"offset"
,
&
offset
}};
return
{{
"slope"
,
&
slope
},
{
"offset"
,
&
offset
}};
}
}
bool
Inplace
()
{
return
IsInplace
(
"hard_sigmoid"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
...
...
paddle/fluid/operators/dropout_op.cu
浏览文件 @
1d756746
...
@@ -24,12 +24,34 @@ namespace paddle {
...
@@ -24,12 +24,34 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
>
__global__
void
RandomGenerator
(
const
size_t
n
,
const
T
*
src
,
__global__
void
RandomGenerator
(
const
size_t
n
,
const
int
seed
,
const
T
*
cpu_mask_data
,
T
*
mask_data
,
T
*
dst
)
{
const
float
dropout_prob
,
const
T
*
src
,
T
*
mask_data
,
T
*
dst
)
{
thrust
::
minstd_rand
rng
;
rng
.
seed
(
seed
);
thrust
::
uniform_real_distribution
<
float
>
dist
(
0
,
1
);
int
idx
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
int
idx
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
int
step_size
=
0
;
T
mask
;
T
dest
;
for
(;
idx
<
n
;
idx
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(;
idx
<
n
;
idx
+=
blockDim
.
x
*
gridDim
.
x
)
{
mask_data
[
idx
]
=
cpu_mask_data
[
idx
];
T
s
=
src
[
idx
];
dst
[
idx
]
=
mask_data
[
idx
]
*
src
[
idx
];
if
(
step_size
==
0
)
{
rng
.
discard
(
idx
);
step_size
=
blockDim
.
x
*
gridDim
.
x
;
}
else
{
rng
.
discard
(
step_size
);
}
if
(
dist
(
rng
)
<
dropout_prob
)
{
mask
=
static_cast
<
T
>
(
0
);
}
else
{
mask
=
static_cast
<
T
>
(
1
);
}
dest
=
s
*
mask
;
mask_data
[
idx
]
=
mask
;
dst
[
idx
]
=
dest
;
}
}
}
}
...
@@ -56,27 +78,15 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
...
@@ -56,27 +78,15 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
std
::
random_device
rnd
;
std
::
random_device
rnd
;
int
seed
=
int
seed
=
context
.
Attr
<
bool
>
(
"fix_seed"
)
?
context
.
Attr
<
int
>
(
"seed"
)
:
rnd
();
context
.
Attr
<
bool
>
(
"fix_seed"
)
?
context
.
Attr
<
int
>
(
"seed"
)
:
rnd
();
std
::
minstd_rand
engine
;
engine
.
seed
(
seed
);
std
::
uniform_real_distribution
<
float
>
dist
(
0
,
1
);
framework
::
Vector
<
T
>
cpu_mask
(
size
);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
if
(
dist
(
engine
)
<
dropout_prob
)
{
cpu_mask
[
i
]
=
static_cast
<
T
>
(
0
);
}
else
{
cpu_mask
[
i
]
=
static_cast
<
T
>
(
1
);
}
}
int
threads
=
512
;
int
threads
=
512
;
int
grid
=
(
x
->
numel
()
+
threads
-
1
)
/
threads
;
int
grid
=
(
x
->
numel
()
+
threads
-
1
)
/
threads
;
RandomGenerator
<
RandomGenerator
<
T
><<<
grid
,
threads
,
0
,
context
.
cuda_device_context
().
stream
()
>>>
(
T
><<<
grid
,
threads
,
0
,
context
.
cuda_device_context
().
stream
()
>>>
(
size
,
x_data
,
cpu_mask
.
CUDAData
(
context
.
GetPlace
()),
mask_data
,
size
,
seed
,
dropout_prob
,
x_data
,
mask_data
,
y_data
);
y_data
);
}
else
{
}
else
{
auto
X
=
Eigen
Vector
<
T
>::
Flatten
(
*
x
);
auto
X
=
Eigen
Matrix
<
T
>::
Reshape
(
*
x
,
1
);
auto
Y
=
Eigen
Vector
<
T
>::
Flatten
(
*
y
);
auto
Y
=
Eigen
Matrix
<
T
>::
Reshape
(
*
y
,
1
);
Y
.
device
(
place
)
=
X
*
static_cast
<
T
>
(
1.0
f
-
dropout_prob
);
Y
.
device
(
place
)
=
X
*
static_cast
<
T
>
(
1.0
f
-
dropout_prob
);
}
}
}
}
...
@@ -89,8 +99,6 @@ namespace ops = paddle::operators;
...
@@ -89,8 +99,6 @@ namespace ops = paddle::operators;
namespace
plat
=
paddle
::
platform
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
dropout
,
ops
::
GPUDropoutKernel
<
plat
::
CUDADeviceContext
,
float
>
,
dropout
,
ops
::
GPUDropoutKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
GPUDropoutKernel
<
plat
::
CUDADeviceContext
,
double
>
,
ops
::
GPUDropoutKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
ops
::
GPUDropoutKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
dropout_grad
,
REGISTER_OP_CUDA_KERNEL
(
dropout_grad
,
ops
::
DropoutGradKernel
<
plat
::
CUDADeviceContext
,
double
>
,
ops
::
DropoutGradKernel
<
plat
::
CUDADeviceContext
,
float
>
);
ops
::
DropoutGradKernel
<
plat
::
CUDADeviceContext
,
float
>
);
paddle/fluid/operators/dropout_op.h
浏览文件 @
1d756746
...
@@ -24,7 +24,7 @@ namespace operators {
...
@@ -24,7 +24,7 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
typename
IndexType
=
Eigen
::
DenseIndex
>
using
Eigen
Vector
=
framework
::
EigenVector
<
T
,
MajorType
,
IndexType
>
;
using
Eigen
Matrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
CPUDropoutKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CPUDropoutKernel
:
public
framework
::
OpKernel
<
T
>
{
...
@@ -60,8 +60,8 @@ class CPUDropoutKernel : public framework::OpKernel<T> {
...
@@ -60,8 +60,8 @@ class CPUDropoutKernel : public framework::OpKernel<T> {
}
}
}
}
}
else
{
}
else
{
auto
X
=
Eigen
Vector
<
T
>::
Flatten
(
*
x
);
auto
X
=
Eigen
Matrix
<
T
>::
Reshape
(
*
x
,
1
);
auto
Y
=
Eigen
Vector
<
T
>::
Flatten
(
*
y
);
auto
Y
=
Eigen
Matrix
<
T
>::
Reshape
(
*
y
,
1
);
auto
&
place
=
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
Y
.
device
(
place
)
=
X
*
(
1.0
f
-
dropout_prob
);
Y
.
device
(
place
)
=
X
*
(
1.0
f
-
dropout_prob
);
...
@@ -81,9 +81,9 @@ class DropoutGradKernel : public framework::OpKernel<T> {
...
@@ -81,9 +81,9 @@ class DropoutGradKernel : public framework::OpKernel<T> {
auto
*
mask
=
context
.
Input
<
Tensor
>
(
"Mask"
);
auto
*
mask
=
context
.
Input
<
Tensor
>
(
"Mask"
);
grad_x
->
mutable_data
<
T
>
(
context
.
GetPlace
());
grad_x
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
M
=
Eigen
Vector
<
T
>::
Flatten
(
*
mask
);
auto
M
=
Eigen
Matrix
<
T
>::
Reshape
(
*
mask
,
1
);
auto
dX
=
Eigen
Vector
<
T
>::
Flatten
(
*
grad_x
);
auto
dX
=
Eigen
Matrix
<
T
>::
Reshape
(
*
grad_x
,
1
);
auto
dY
=
Eigen
Vector
<
T
>::
Flatten
(
*
grad_y
);
auto
dY
=
Eigen
Matrix
<
T
>::
Reshape
(
*
grad_y
,
1
);
auto
&
place
=
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
...
...
paddle/fluid/operators/dropout_op_test.cc
浏览文件 @
1d756746
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <unistd.h>
#include <unistd.h>
#include <iostream>
#include <string>
#include <string>
#include <thread> // NOLINT
#include <thread> // NOLINT
...
@@ -33,16 +32,14 @@ namespace m = paddle::operators::math;
...
@@ -33,16 +32,14 @@ namespace m = paddle::operators::math;
USE_OP
(
dropout
);
USE_OP
(
dropout
);
static
paddle
::
framework
::
DDim
dims
=
{
10
,
10
};
void
Compare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
void
Compare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
// init
// init
auto
var
=
scope
->
Var
(
"X"
);
auto
var
=
scope
->
Var
(
"X"
);
auto
tensor
=
var
->
GetMutable
<
f
::
LoDTensor
>
();
auto
tensor
=
var
->
GetMutable
<
f
::
LoDTensor
>
();
tensor
->
Resize
(
dims
);
tensor
->
Resize
(
{
10
,
10
}
);
std
::
vector
<
float
>
init
;
std
::
vector
<
float
>
init
;
for
(
int64_t
i
=
0
;
i
<
f
::
product
(
dims
)
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
10
*
10
;
++
i
)
{
init
.
push_back
(
1.0
);
init
.
push_back
(
1.0
);
}
}
...
@@ -51,19 +48,18 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
...
@@ -51,19 +48,18 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
auto
place
=
ctx
.
GetPlace
();
auto
place
=
ctx
.
GetPlace
();
auto
out_var
=
scope
->
Var
(
"Out"
);
auto
out_var
=
scope
->
Var
(
"Out"
);
auto
out_tensor
=
out_var
->
GetMutable
<
f
::
LoDTensor
>
();
auto
out_tensor
=
out_var
->
GetMutable
<
f
::
LoDTensor
>
();
out_tensor
->
Resize
(
dims
);
out_tensor
->
Resize
(
{
10
,
10
}
);
out_tensor
->
mutable_data
<
float
>
(
place
);
// allocate
out_tensor
->
mutable_data
<
float
>
(
place
);
// allocate
auto
mask_var
=
scope
->
Var
(
"Mask"
);
auto
mask_var
=
scope
->
Var
(
"Mask"
);
auto
mask_tensor
=
mask_var
->
GetMutable
<
f
::
LoDTensor
>
();
auto
mask_tensor
=
mask_var
->
GetMutable
<
f
::
LoDTensor
>
();
mask_tensor
->
Resize
(
dims
);
mask_tensor
->
Resize
(
{
10
,
10
}
);
mask_tensor
->
mutable_data
<
float
>
(
place
);
// allocate
mask_tensor
->
mutable_data
<
float
>
(
place
);
// allocate
// run
// run
f
::
AttributeMap
attrs
;
f
::
AttributeMap
attrs
;
float
dropout_prob
=
0.5
;
float
dropout_prob
=
0.5
;
attrs
.
insert
({
"is_test"
,
false
});
attrs
.
insert
({
"fix_seed"
,
1
});
attrs
.
insert
({
"fix_seed"
,
true
});
attrs
.
insert
({
"seed"
,
3
});
attrs
.
insert
({
"seed"
,
3
});
attrs
.
insert
({
"dropout_prob"
,
dropout_prob
});
attrs
.
insert
({
"dropout_prob"
,
dropout_prob
});
auto
dropout_op
=
f
::
OpRegistry
::
CreateOp
(
auto
dropout_op
=
f
::
OpRegistry
::
CreateOp
(
...
@@ -73,7 +69,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
...
@@ -73,7 +69,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
std
::
vector
<
float
>
out_vec
;
std
::
vector
<
float
>
out_vec
;
TensorToVector
(
*
out_tensor
,
ctx
,
&
out_vec
);
TensorToVector
(
*
out_tensor
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
std
::
vector
<
float
>
std_out
=
{
std
::
vector
<
float
>
std_out
=
{
0
,
0
,
1
,
1
,
1
,
1
,
1
,
0
,
1
,
0
,
0
,
1
,
1
,
0
,
1
,
1
,
1
,
1
,
0
,
1
,
0
,
0
,
1
,
1
,
1
,
1
,
1
,
0
,
1
,
0
,
0
,
1
,
1
,
0
,
1
,
1
,
1
,
1
,
0
,
1
,
...
@@ -88,22 +83,22 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
...
@@ -88,22 +83,22 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
}
}
}
}
// TODO(wyi): Due to
// https://github.com/PaddlePaddle/Paddle/issues/9507, I temporarily
// disable this test to remove the prevention of the merge of
// unrelated PRs.
/*
TEST(Dropout, CPUDense) {
TEST(Dropout, CPUDense) {
f::Scope scope;
f::Scope scope;
p::CPUPlace place;
p::CPUPlace place;
p::CPUDeviceContext ctx(place);
p::CPUDeviceContext ctx(place);
Compare
(
&
scope
,
ctx
);
Compare(scope, ctx);
}
}
// TODO(wyi, dzhwinter): Due to
// https://github.com/PaddlePaddle/Paddle/issues/9507, I temporarily
// disable this test to remove the prevention of the merge of
// unrelated PRs.
/*
TEST(Dropout, GPUDense) {
TEST(Dropout, GPUDense) {
f::Scope scope;
f::Scope scope;
p::CUDAPlace place;
p::CUDAPlace place;
p::CUDADeviceContext ctx(place);
p::CUDADeviceContext ctx(place);
Compare(
&
scope, ctx);
Compare(scope, ctx);
}
}
*/
*/
paddle/fluid/operators/mkldnn_activation_op.h
浏览文件 @
1d756746
...
@@ -60,7 +60,7 @@ class MKLDNNActivationGradKernel
...
@@ -60,7 +60,7 @@ class MKLDNNActivationGradKernel
}
}
};
};
namespace
{
namespace
{
// NOLINT
framework
::
OpKernelType
GetKernelType
(
framework
::
OpKernelType
GetKernelType
(
const
framework
::
ExecutionContext
&
ctx
,
const
framework
::
ExecutionContext
&
ctx
,
const
framework
::
OperatorWithKernel
&
oper
)
{
const
framework
::
OperatorWithKernel
&
oper
)
{
...
...
paddle/fluid/pybind/protobuf.cc
浏览文件 @
1d756746
...
@@ -127,6 +127,7 @@ void BindProgramDesc(pybind11::module *m) {
...
@@ -127,6 +127,7 @@ void BindProgramDesc(pybind11::module *m) {
.
def
(
"block"
,
&
pd
::
ProgramDesc
::
MutableBlock
,
.
def
(
"block"
,
&
pd
::
ProgramDesc
::
MutableBlock
,
pybind11
::
return_value_policy
::
reference
)
pybind11
::
return_value_policy
::
reference
)
.
def
(
"num_blocks"
,
&
pd
::
ProgramDesc
::
Size
)
.
def
(
"num_blocks"
,
&
pd
::
ProgramDesc
::
Size
)
.
def
(
"flush"
,
&
pd
::
ProgramDesc
::
Flush
)
.
def
(
"get_feed_target_names"
,
&
pd
::
ProgramDesc
::
GetFeedTargetNames
)
.
def
(
"get_feed_target_names"
,
&
pd
::
ProgramDesc
::
GetFeedTargetNames
)
.
def
(
"get_fetch_target_names"
,
&
pd
::
ProgramDesc
::
GetFetchTargetNames
)
.
def
(
"get_fetch_target_names"
,
&
pd
::
ProgramDesc
::
GetFetchTargetNames
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
pd
::
ProgramDesc
>
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
pd
::
ProgramDesc
>
)
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
1d756746
...
@@ -33,6 +33,7 @@ limitations under the License. */
...
@@ -33,6 +33,7 @@ limitations under the License. */
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -461,6 +462,9 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -461,6 +462,9 @@ All parameter, weight, gradient are variables in Paddle.
self
.
back
().
set_lod
(
t
.
lod
());
self
.
back
().
set_lod
(
t
.
lod
());
});
});
m
.
def
(
"IsInplace"
,
[](
std
::
string
op
)
->
bool
{
return
operators
::
IsInplace
(
op
);
});
m
.
def
(
"op_support_gpu"
,
OpSupportGPU
);
m
.
def
(
"op_support_gpu"
,
OpSupportGPU
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
m
.
def
(
"get_cuda_device_count"
,
platform
::
GetCUDADeviceCount
);
m
.
def
(
"get_cuda_device_count"
,
platform
::
GetCUDADeviceCount
);
...
...
paddle/scripts/docker/build.sh
浏览文件 @
1d756746
...
@@ -155,7 +155,7 @@ EOF
...
@@ -155,7 +155,7 @@ EOF
function
gen_dockerfile
()
{
function
gen_dockerfile
()
{
# Set BASE_IMAGE according to env variables
# Set BASE_IMAGE according to env variables
if
[[
${
WITH_GPU
}
==
"ON"
]]
;
then
if
[[
${
WITH_GPU
}
==
"ON"
]]
;
then
BASE_IMAGE
=
"nvidia/cuda:8.0-cudnn
5
-runtime-ubuntu16.04"
BASE_IMAGE
=
"nvidia/cuda:8.0-cudnn
7
-runtime-ubuntu16.04"
else
else
BASE_IMAGE
=
"ubuntu:16.04"
BASE_IMAGE
=
"ubuntu:16.04"
fi
fi
...
@@ -164,7 +164,7 @@ function gen_dockerfile() {
...
@@ -164,7 +164,7 @@ function gen_dockerfile() {
DOCKERFILE_CUDNN_DSO
=
""
DOCKERFILE_CUDNN_DSO
=
""
if
[[
${
WITH_GPU
:-
OFF
}
==
'ON'
]]
;
then
if
[[
${
WITH_GPU
:-
OFF
}
==
'ON'
]]
;
then
DOCKERFILE_GPU_ENV
=
"ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:
\$
{LD_LIBRARY_PATH}"
DOCKERFILE_GPU_ENV
=
"ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:
\$
{LD_LIBRARY_PATH}"
DOCKERFILE_CUDNN_DSO
=
"RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.
5
/usr/lib/x86_64-linux-gnu/libcudnn.so"
DOCKERFILE_CUDNN_DSO
=
"RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.
7
/usr/lib/x86_64-linux-gnu/libcudnn.so"
fi
fi
cat
<<
EOF
cat
<<
EOF
...
...
python/paddle/fluid/io.py
浏览文件 @
1d756746
...
@@ -336,18 +336,20 @@ def save_inference_model(dirname,
...
@@ -336,18 +336,20 @@ def save_inference_model(dirname,
if
main_program
is
None
:
if
main_program
is
None
:
main_program
=
default_main_program
()
main_program
=
default_main_program
()
copy_program
=
main_program
if
not
os
.
path
.
isdir
(
dirname
):
if
not
os
.
path
.
isdir
(
dirname
):
os
.
makedirs
(
dirname
)
os
.
makedirs
(
dirname
)
# Clear the is_target information and remove the existed feed and fetch op
# Clear the is_target information and remove the existed feed and fetch op
global_block
=
main
_program
.
global_block
()
global_block
=
copy
_program
.
global_block
()
for
i
,
op
in
enumerate
(
global_block
.
ops
):
for
i
,
op
in
enumerate
(
global_block
.
ops
):
op
.
desc
.
set_is_target
(
False
)
op
.
desc
.
set_is_target
(
False
)
if
op
.
type
==
"feed"
or
op
.
type
==
"fetch"
:
if
op
.
type
==
"feed"
or
op
.
type
==
"fetch"
:
global_block
.
remove_op
(
i
)
global_block
.
remove_op
(
i
)
copy_program
.
desc
.
flush
()
pruned_program
=
main
_program
.
prune
(
targets
=
target_vars
)
pruned_program
=
copy
_program
.
prune
(
targets
=
target_vars
)
inference_program
=
pruned_program
.
inference_optimize
()
inference_program
=
pruned_program
.
inference_optimize
()
fetch_var_names
=
[
v
.
name
for
v
in
target_vars
]
fetch_var_names
=
[
v
.
name
for
v
in
target_vars
]
...
...
python/paddle/fluid/layer_helper.py
浏览文件 @
1d756746
...
@@ -19,6 +19,7 @@ from framework import Variable, Parameter, default_main_program, default_startup
...
@@ -19,6 +19,7 @@ from framework import Variable, Parameter, default_main_program, default_startup
import
unique_name
import
unique_name
from
paddle.fluid.initializer
import
Constant
,
Xavier
from
paddle.fluid.initializer
import
Constant
,
Xavier
from
param_attr
import
ParamAttr
,
WeightNormParamAttr
from
param_attr
import
ParamAttr
,
WeightNormParamAttr
import
core
class
LayerHelper
(
object
):
class
LayerHelper
(
object
):
...
@@ -398,13 +399,16 @@ class LayerHelper(object):
...
@@ -398,13 +399,16 @@ class LayerHelper(object):
return
input_var
return
input_var
if
isinstance
(
act
,
basestring
):
if
isinstance
(
act
,
basestring
):
act
=
{
'type'
:
act
}
act
=
{
'type'
:
act
}
tmp
=
self
.
create_tmp_variable
(
dtype
=
input_var
.
dtype
)
if
'use_mkldnn'
in
self
.
kwargs
:
if
'use_mkldnn'
in
self
.
kwargs
:
act
[
'use_mkldnn'
]
=
self
.
kwargs
.
get
(
'use_mkldnn'
)
act
[
'use_mkldnn'
]
=
self
.
kwargs
.
get
(
'use_mkldnn'
)
act_type
=
act
.
pop
(
'type'
)
act_type
=
act
.
pop
(
'type'
)
if
'use_mkldnn'
in
self
.
kwargs
:
if
'use_mkldnn'
in
self
.
kwargs
:
act
[
'use_mkldnn'
]
=
self
.
kwargs
.
get
(
'use_mkldnn'
)
act
[
'use_mkldnn'
]
=
self
.
kwargs
.
get
(
'use_mkldnn'
)
tmp
=
input_var
# NOTE(dzhwinter): some activation support inplace compution.
if
not
core
.
IsInplace
(
act_type
):
tmp
=
self
.
create_tmp_variable
(
dtype
=
input_var
.
dtype
)
self
.
append_op
(
self
.
append_op
(
type
=
act_type
,
type
=
act_type
,
inputs
=
{
"X"
:
[
input_var
]},
inputs
=
{
"X"
:
[
input_var
]},
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
1d756746
...
@@ -21,7 +21,7 @@ from ..executor import global_scope
...
@@ -21,7 +21,7 @@ from ..executor import global_scope
__all__
=
[
__all__
=
[
'data'
,
'BlockGuardServ'
,
'ListenAndServ'
,
'Send'
,
'open_recordio_file'
,
'data'
,
'BlockGuardServ'
,
'ListenAndServ'
,
'Send'
,
'open_recordio_file'
,
'open_files'
,
'read_file'
,
'shuffle'
,
'double_buffer'
'open_files'
,
'read_file'
,
'shuffle'
,
'
batch'
,
'
double_buffer'
]
]
...
@@ -290,7 +290,7 @@ def open_recordio_file(filename,
...
@@ -290,7 +290,7 @@ def open_recordio_file(filename,
lod_levels
,
lod_levels
,
dtypes
,
dtypes
,
pass_num
=
1
,
pass_num
=
1
,
for_parallel
=
Fals
e
):
for_parallel
=
Tru
e
):
"""
"""
Open a RecordIO file
Open a RecordIO file
...
@@ -364,7 +364,7 @@ def open_files(filenames,
...
@@ -364,7 +364,7 @@ def open_files(filenames,
thread_num
,
thread_num
,
buffer_size
=
None
,
buffer_size
=
None
,
pass_num
=
1
,
pass_num
=
1
,
for_parallel
=
Fals
e
):
for_parallel
=
Tru
e
):
"""
"""
Open files
Open files
...
@@ -476,6 +476,11 @@ def shuffle(reader, buffer_size):
...
@@ -476,6 +476,11 @@ def shuffle(reader, buffer_size):
'create_shuffle_reader'
,
reader
,
{
'buffer_size'
:
int
(
buffer_size
)})
'create_shuffle_reader'
,
reader
,
{
'buffer_size'
:
int
(
buffer_size
)})
def
batch
(
reader
,
batch_size
):
return
__create_unshared_decorated_reader__
(
'create_batch_reader'
,
reader
,
{
'batch_size'
:
int
(
batch_size
)})
def
double_buffer
(
reader
,
place
=
None
):
def
double_buffer
(
reader
,
place
=
None
):
attrs
=
dict
()
attrs
=
dict
()
if
place
is
not
None
:
if
place
is
not
None
:
...
...
python/paddle/fluid/tests/unittests/test_activation_op.py
浏览文件 @
1d756746
...
@@ -361,10 +361,7 @@ class TestCeil(OpTest):
...
@@ -361,10 +361,7 @@ class TestCeil(OpTest):
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
def
test_check_grad
(
self
):
# The same reason with TestFloor
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.007
)
def
init_dtype
(
self
):
def
init_dtype
(
self
):
pass
pass
...
@@ -396,10 +393,8 @@ class TestFloor(OpTest):
...
@@ -396,10 +393,8 @@ class TestFloor(OpTest):
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
def
test_check_grad
(
self
):
# the gradient on floor, ceil, round is undefined.
if
self
.
dtype
==
np
.
float16
:
# we return zero as gradient, but the numpy return nan
return
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.007
)
def
init_dtype
(
self
):
def
init_dtype
(
self
):
pass
pass
...
@@ -501,11 +496,6 @@ class TestRound(OpTest):
...
@@ -501,11 +496,6 @@ class TestRound(OpTest):
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.007
)
def
init_dtype
(
self
):
def
init_dtype
(
self
):
pass
pass
...
...
python/paddle/fluid/tests/unittests/test_batch_norm_op.py
浏览文件 @
1d756746
...
@@ -100,6 +100,9 @@ def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format):
...
@@ -100,6 +100,9 @@ def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format):
# (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
# (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
# transfer from (N, C, H, W) to (N, H, W, C) to simplify computation
# transfer from (N, C, H, W) to (N, H, W, C) to simplify computation
if
data_format
!=
"NCHW"
and
data_format
!=
"NHWC"
:
raise
ValueError
(
"Unknown data order."
)
if
data_format
==
"NCHW"
:
if
data_format
==
"NCHW"
:
x
=
np
.
transpose
(
x
,
(
0
,
2
,
3
,
1
))
x
=
np
.
transpose
(
x
,
(
0
,
2
,
3
,
1
))
y_grad
=
np
.
transpose
(
y_grad
,
(
0
,
2
,
3
,
1
))
y_grad
=
np
.
transpose
(
y_grad
,
(
0
,
2
,
3
,
1
))
...
@@ -304,7 +307,7 @@ class TestBatchNormOpTraining(unittest.TestCase):
...
@@ -304,7 +307,7 @@ class TestBatchNormOpTraining(unittest.TestCase):
# run backward
# run backward
y_grad
=
np
.
random
.
random_sample
(
shape
).
astype
(
np
.
float32
)
y_grad
=
np
.
random
.
random_sample
(
shape
).
astype
(
np
.
float32
)
x_grad
,
scale_grad
,
bias_grad
=
_reference_grad
(
x_grad
,
scale_grad
,
bias_grad
=
_reference_grad
(
x
,
y_grad
,
scale
,
saved_mean
,
var_ref
,
epsilon
,
data_
forma
t
)
x
,
y_grad
,
scale
,
saved_mean
,
var_ref
,
epsilon
,
data_
layou
t
)
var_dict
=
locals
()
var_dict
=
locals
()
var_dict
[
'y@GRAD'
]
=
y_grad
var_dict
[
'y@GRAD'
]
=
y_grad
...
...
python/paddle/fluid/tests/unittests/test_multi_file_reader.py
浏览文件 @
1d756746
...
@@ -69,7 +69,6 @@ class TestMultipleReader(unittest.TestCase):
...
@@ -69,7 +69,6 @@ class TestMultipleReader(unittest.TestCase):
break
break
batch_count
+=
1
batch_count
+=
1
self
.
assertLessEqual
(
img_val
.
shape
[
0
],
self
.
batch_size
)
self
.
assertLessEqual
(
img_val
.
shape
[
0
],
self
.
batch_size
)
data_files
.
reset
()
self
.
assertEqual
(
batch_count
,
self
.
num_batch
*
3
)
self
.
assertEqual
(
batch_count
,
self
.
num_batch
*
3
)
def
test_main
(
self
):
def
test_main
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_multi_pass_reader.py
浏览文件 @
1d756746
...
@@ -43,9 +43,8 @@ class TestMultipleReader(unittest.TestCase):
...
@@ -43,9 +43,8 @@ class TestMultipleReader(unittest.TestCase):
filename
=
'./mnist.recordio'
,
filename
=
'./mnist.recordio'
,
shapes
=
[(
-
1
,
784
),
(
-
1
,
1
)],
shapes
=
[(
-
1
,
784
),
(
-
1
,
1
)],
lod_levels
=
[
0
,
0
],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
dtypes
=
[
'float32'
,
'int64'
],
data_file
=
fluid
.
layers
.
io
.
multi_pass
(
pass_num
=
self
.
pass_num
)
reader
=
data_file
,
pass_num
=
self
.
pass_num
)
img
,
label
=
fluid
.
layers
.
read_file
(
data_file
)
img
,
label
=
fluid
.
layers
.
read_file
(
data_file
)
if
fluid
.
core
.
is_compiled_with_cuda
():
if
fluid
.
core
.
is_compiled_with_cuda
():
...
@@ -65,5 +64,4 @@ class TestMultipleReader(unittest.TestCase):
...
@@ -65,5 +64,4 @@ class TestMultipleReader(unittest.TestCase):
break
break
batch_count
+=
1
batch_count
+=
1
self
.
assertLessEqual
(
img_val
.
shape
[
0
],
self
.
batch_size
)
self
.
assertLessEqual
(
img_val
.
shape
[
0
],
self
.
batch_size
)
data_file
.
reset
()
self
.
assertEqual
(
batch_count
,
self
.
num_batch
*
self
.
pass_num
)
self
.
assertEqual
(
batch_count
,
self
.
num_batch
*
self
.
pass_num
)
python/paddle/fluid/tests/unittests/test_recordio_reader.py
浏览文件 @
1d756746
...
@@ -74,13 +74,13 @@ class TestRecordIO(unittest.TestCase):
...
@@ -74,13 +74,13 @@ class TestRecordIO(unittest.TestCase):
avg_loss_np
.
append
(
tmp
)
avg_loss_np
.
append
(
tmp
)
batch_id
+=
1
batch_id
+=
1
data_file
.
reset
()
self
.
assertEqual
(
batch_id
,
self
.
num_batches
)
self
.
assertEqual
(
batch_id
,
self
.
num_batches
)
self
.
assertLess
(
avg_loss_np
[
-
1
],
avg_loss_np
[
0
])
self
.
assertLess
(
avg_loss_np
[
-
1
],
avg_loss_np
[
0
])
def
test_shuffle_reader
(
self
):
def
test_shuffle_reader
(
self
):
self
.
test_main
(
decorator_callback
=
lambda
reader
:
fluid
.
layers
.
io
.
shuffle
(
reader
,
buffer_size
=
200
))
self
.
test_main
(
decorator_callback
=
lambda
reader
:
fluid
.
layers
.
io
.
shuffle
(
reader
,
buffer_size
=
200
))
def
test_double_buffer_reader
(
self
):
def
test_double_buffer_reader
(
self
):
self
.
test_main
(
decorator_callback
=
lambda
reader
:
fluid
.
layers
.
io
.
double_buffer
(
reader
,
self
.
test_main
(
decorator_callback
=
lambda
reader
:
fluid
.
layers
.
io
.
double_buffer
(
reader
,
place
=
'cuda:0'
if
fluid
.
core
.
is_compiled_with_cuda
()
else
'cpu'
))
place
=
'cuda:0'
if
fluid
.
core
.
is_compiled_with_cuda
()
else
'cpu'
))
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录