Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
b6dc3a59
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b6dc3a59
编写于
6月 28, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add DataBalanceOpHandle to MultiDeviceSSAGragh
上级
a1f1a5ed
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
229 addition
and
5 deletion
+229
-5
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+2
-1
paddle/fluid/framework/details/data_balance_op_handle.cc
paddle/fluid/framework/details/data_balance_op_handle.cc
+138
-0
paddle/fluid/framework/details/data_balance_op_handle.h
paddle/fluid/framework/details/data_balance_op_handle.h
+50
-0
paddle/fluid/framework/details/multi_devices_graph_builder.cc
...le/fluid/framework/details/multi_devices_graph_builder.cc
+26
-1
paddle/fluid/framework/details/multi_devices_graph_builder.h
paddle/fluid/framework/details/multi_devices_graph_builder.h
+3
-0
paddle/fluid/framework/lod_tensor.cc
paddle/fluid/framework/lod_tensor.cc
+2
-1
paddle/fluid/operators/read_op.cc
paddle/fluid/operators/read_op.cc
+8
-2
未找到文件。
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
b6dc3a59
...
@@ -25,11 +25,12 @@ else()
...
@@ -25,11 +25,12 @@ else()
cc_library
(
broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
endif
()
endif
()
cc_library
(
data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_base scope lod_tensor
)
cc_library
(
gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope
)
cc_library
(
fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope
)
cc_library
(
multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
cc_library
(
multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle
)
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle
data_balance_op_handle
)
cc_library
(
ssa_graph_builder_factory SRCS ssa_graph_builder_factory.cc DEPS multi_devices_graph_builder ssa_graph_printer ssa_graph_checker
)
cc_library
(
ssa_graph_builder_factory SRCS ssa_graph_builder_factory.cc DEPS multi_devices_graph_builder ssa_graph_printer ssa_graph_checker
)
...
...
paddle/fluid/framework/details/data_balance_op_handle.cc
0 → 100644
浏览文件 @
b6dc3a59
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/data_balance_op_handle.h"
#include <algorithm>
#include "paddle/fluid/framework/details/container_cast.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
DataBalanceOpHandle
::
DataBalanceOpHandle
(
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
)
:
local_scopes_
(
local_scopes
),
places_
(
places
)
{}
std
::
string
DataBalanceOpHandle
::
Name
()
const
{
return
"data balance"
;
}
std
::
vector
<
std
::
array
<
int
,
3
>>
DataBalanceOpHandle
::
GetBalancePlan
(
const
std
::
vector
<
int
>
&
device_sizes
)
{
int
device_num
=
device_sizes
.
size
();
int
total_size
=
0
;
int
empty_num
=
0
;
std
::
vector
<
std
::
array
<
int
,
2
>>
size_device_vec
;
size_device_vec
.
reserve
(
device_num
);
for
(
int
i
=
0
;
i
<
device_num
;
++
i
)
{
if
(
device_sizes
[
i
]
==
0
)
{
++
empty_num
;
}
total_size
+=
device_sizes
[
i
];
size_device_vec
.
push_back
({{
device_sizes
[
i
],
i
}});
}
std
::
vector
<
std
::
array
<
int
,
3
>>
res
;
if
(
empty_num
==
0
)
{
// No need to do data balance.
return
res
;
}
if
(
total_size
<
device_num
)
{
// No enough data.
PADDLE_THROW
(
"There is no next data."
);
}
std
::
sort
(
size_device_vec
.
begin
(),
size_device_vec
.
end
(),
[](
const
std
::
array
<
int
,
2
>
&
a
,
const
std
::
array
<
int
,
2
>
&
b
)
{
return
a
[
0
]
>
b
[
0
];
});
int
expected_device_size
=
total_size
/
device_num
;
int
src_idx
=
0
;
for
(
int
dst_idx
=
device_num
-
empty_num
;
dst_idx
<
device_num
;
++
dst_idx
)
{
if
(
size_device_vec
[
src_idx
][
0
]
<=
expected_device_size
)
{
++
src_idx
;
PADDLE_ENFORCE_LT
(
src_idx
,
device_num
-
empty_num
);
}
size_device_vec
[
src_idx
][
0
]
-=
expected_device_size
;
size_device_vec
[
dst_idx
][
0
]
+=
expected_device_size
;
res
.
push_back
({{
size_device_vec
[
src_idx
][
1
],
size_device_vec
[
dst_idx
][
1
],
expected_device_size
}});
}
return
res
;
}
void
DataBalanceOpHandle
::
RunImpl
()
{
if
(
places_
.
size
()
==
1
)
{
return
;
}
auto
in_var_handles
=
DynamicCast
<
VarHandle
>
(
inputs_
);
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
outputs_
);
PADDLE_ENFORCE
(
in_var_handles
.
size
()
%
places_
.
size
()
==
0
);
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
out_var_handles
.
size
(),
"The NoDummyInputSize and NoDummyOutputSize should be equal."
);
int
data_num
=
in_var_handles
.
size
()
/
places_
.
size
();
WaitInputVarGenerated
();
std
::
vector
<
std
::
vector
<
LoDTensor
*>>
lod_tensors
;
std
::
vector
<
int
>
device_sizes
;
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
in_var_handles
.
size
());
++
i
)
{
PADDLE_ENFORCE_EQ
(
in_var_handles
[
i
]
->
name_
,
out_var_handles
[
i
]
->
name_
,
"The name of input and output should be equal."
);
int
place_idx
=
i
/
data_num
;
int
data_idx
=
i
%
data_num
;
auto
*
local_scope
=
local_scopes_
[
place_idx
]
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
*
tensor_var
=
local_scope
->
FindVar
(
in_var_handles
[
i
]
->
name_
);
PADDLE_ENFORCE
(
tensor_var
->
IsType
<
LoDTensor
>
());
auto
*
tensor
=
tensor_var
->
GetMutable
<
LoDTensor
>
();
PADDLE_ENFORCE
(
places_
[
place_idx
]
==
tensor
->
place
());
lod_tensors
[
data_idx
].
push_back
(
tensor
);
int
ins_size
=
tensor
->
lod
().
empty
()
?
tensor
->
dims
()[
0
]
:
tensor
->
NumElements
();
if
(
data_idx
==
0
)
{
device_sizes
.
emplace_back
(
ins_size
);
}
else
{
PADDLE_ENFORCE_EQ
(
ins_size
,
device_sizes
.
at
(
place_idx
));
}
}
const
auto
&
balance_plan
=
GetBalancePlan
(
device_sizes
);
for
(
const
auto
&
trans
:
balance_plan
)
{
for
(
int
data_idx
=
0
;
data_idx
<
data_num
;
++
data_idx
)
{
LoDTensor
*
src_tensor
=
lod_tensors
[
data_idx
][
trans
[
0
]];
LoDTensor
*
dst_tensor
=
lod_tensors
[
data_idx
][
trans
[
1
]];
int
trans_ins_size
=
trans
[
2
];
LoD
src_lod
=
src_tensor
->
lod
();
int
src_ins_size
=
src_lod
.
empty
()
?
src_tensor
->
dims
()[
0
]
:
src_tensor
->
NumElements
();
int
cut_point
=
src_ins_size
-
trans_ins_size
;
if
(
!
src_lod
.
empty
())
{
for
(
auto
&
level
:
src_lod
)
{
cut_point
=
level
[
cut_point
];
}
}
TensorCopySync
(
src_tensor
->
Slice
(
cut_point
,
src_tensor
->
dims
()[
0
]),
dst_tensor
->
place
(),
dst_tensor
);
src_tensor
->
ShareDataWith
(
src_tensor
->
Slice
(
0
,
cut_point
));
if
(
!
src_lod
.
empty
())
{
dst_tensor
->
set_lod
(
SliceInLevel
(
src_lod
,
0
,
src_ins_size
-
trans_ins_size
,
src_ins_size
));
src_tensor
->
set_lod
(
SliceInLevel
(
src_lod
,
0
,
0
,
src_ins_size
-
trans_ins_size
));
}
}
}
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/data_balance_op_handle.h
0 → 100644
浏览文件 @
b6dc3a59
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
struct
DataBalanceOpHandle
:
public
OpHandleBase
{
public:
DataBalanceOpHandle
(
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
);
std
::
string
Name
()
const
override
;
bool
IsMultiDeviceTransfer
()
override
{
return
false
;
};
protected:
void
RunImpl
()
override
;
private:
// std::vector<(src_dev_id, dst_dev_id, trans_size)>
std
::
vector
<
std
::
array
<
int
,
3
>>
GetBalancePlan
(
const
std
::
vector
<
int
>
&
batch_size_per_device
);
const
std
::
vector
<
Scope
*>
&
local_scopes_
;
const
std
::
vector
<
platform
::
Place
>
&
places_
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/multi_devices_graph_builder.cc
浏览文件 @
b6dc3a59
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/details/all_reduce_op_handle.h"
#include "paddle/fluid/framework/details/all_reduce_op_handle.h"
#include "paddle/fluid/framework/details/broadcast_op_handle.h"
#include "paddle/fluid/framework/details/broadcast_op_handle.h"
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/data_balance_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_graph_builder.h"
#include "paddle/fluid/framework/details/multi_devices_graph_builder.h"
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/rpc_op_handle.h"
#include "paddle/fluid/framework/details/rpc_op_handle.h"
...
@@ -217,6 +218,11 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
...
@@ -217,6 +218,11 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
// gradients.
// gradients.
CreateComputationalOps
(
&
result
,
*
op
,
places_
.
size
());
CreateComputationalOps
(
&
result
,
*
op
,
places_
.
size
());
if
(
op
->
Type
()
==
"read"
)
{
const
auto
&
data_var_names
=
op
->
Output
(
"Out"
);
InsertDataBalanceOp
(
&
result
,
data_var_names
);
}
if
(
!
is_forwarding
&&
places_
.
size
()
>
1
)
{
if
(
!
is_forwarding
&&
places_
.
size
()
>
1
)
{
// Currently, we assume that once gradient is generated, it can be
// Currently, we assume that once gradient is generated, it can be
// broadcast, and each gradient is only broadcast once.
// broadcast, and each gradient is only broadcast once.
...
@@ -360,6 +366,24 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(SSAGraph *result,
...
@@ -360,6 +366,24 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(SSAGraph *result,
}
}
}
}
void
MultiDevSSAGraphBuilder
::
InsertDataBalanceOp
(
SSAGraph
*
result
,
const
std
::
vector
<
std
::
string
>
&
datas
)
const
{
result
->
ops_
.
emplace_back
(
new
DataBalanceOpHandle
(
local_scopes_
,
places_
));
auto
*
op_handle
=
result
->
ops_
.
back
().
get
();
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
auto
&
p
=
places_
[
i
];
SetCommunicationContext
(
op_handle
,
p
);
for
(
const
std
::
string
&
d_name
:
datas
)
{
auto
&
vars
=
result
->
vars_
[
i
][
d_name
];
PADDLE_ENFORCE
(
!
vars
.
empty
());
op_handle
->
AddInput
(
vars
.
back
().
get
());
auto
var
=
new
VarHandle
(
vars
.
size
(),
i
,
d_name
,
p
);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
}
}
}
bool
MultiDevSSAGraphBuilder
::
IsParameterGradientOnce
(
bool
MultiDevSSAGraphBuilder
::
IsParameterGradientOnce
(
const
std
::
string
&
og
,
const
std
::
string
&
og
,
std
::
unordered_set
<
std
::
string
>
*
og_has_been_broadcast
)
const
{
std
::
unordered_set
<
std
::
string
>
*
og_has_been_broadcast
)
const
{
...
@@ -509,7 +533,8 @@ void MultiDevSSAGraphBuilder::CreateRPCOp(SSAGraph *result,
...
@@ -509,7 +533,8 @@ void MultiDevSSAGraphBuilder::CreateRPCOp(SSAGraph *result,
op_dev_id
=
GetVarDeviceID
(
op
.
InputArgumentNames
()[
0
]);
op_dev_id
=
GetVarDeviceID
(
op
.
InputArgumentNames
()[
0
]);
// the variable name which contains .block means it was splited by
// the variable name which contains .block means it was splited by
// split_byref op
// split_byref op
// so that we can balance the variable blocks to all the pserver instances.
// so that we can balance the variable blocks to all the pserver
// instances.
if
(
strategy_
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kAllReduce
&&
if
(
strategy_
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kAllReduce
&&
op
.
InputArgumentNames
()[
0
].
find
(
".block"
)
==
std
::
string
::
npos
)
{
op
.
InputArgumentNames
()[
0
].
find
(
".block"
)
==
std
::
string
::
npos
)
{
op_dev_id
=
GetAppropriateDeviceID
(
op
.
InputArgumentNames
());
op_dev_id
=
GetAppropriateDeviceID
(
op
.
InputArgumentNames
());
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.h
浏览文件 @
b6dc3a59
...
@@ -101,6 +101,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
...
@@ -101,6 +101,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
void
InsertAllReduceOp
(
SSAGraph
*
result
,
const
std
::
string
&
og
)
const
;
void
InsertAllReduceOp
(
SSAGraph
*
result
,
const
std
::
string
&
og
)
const
;
void
InsertDataBalanceOp
(
SSAGraph
*
result
,
const
std
::
vector
<
std
::
string
>
&
datas
)
const
;
void
CreateBroadcastOp
(
SSAGraph
*
result
,
const
std
::
string
&
p_name
,
void
CreateBroadcastOp
(
SSAGraph
*
result
,
const
std
::
string
&
p_name
,
size_t
src_dev_id
)
const
;
size_t
src_dev_id
)
const
;
...
...
paddle/fluid/framework/lod_tensor.cc
浏览文件 @
b6dc3a59
...
@@ -68,7 +68,7 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
...
@@ -68,7 +68,7 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
// only print first ten elements
// only print first ten elements
int64_t
size
=
t
.
numel
()
<
10
?
t
.
numel
()
:
10
;
int64_t
size
=
t
.
numel
()
<
10
?
t
.
numel
()
:
10
;
for
(
int64_t
i
=
0
;
i
<
size
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
size
;
++
i
)
{
if
(
t
.
type
().
hash_code
()
==
typeid
(
float
).
hash_code
())
{
if
(
t
.
type
().
hash_code
()
==
typeid
(
float
).
hash_code
())
{
// NOLINT
os
<<
t
.
data
<
float
>
()[
i
]
<<
" "
;
os
<<
t
.
data
<
float
>
()[
i
]
<<
" "
;
}
else
if
(
t
.
type
().
hash_code
()
==
typeid
(
int64_t
).
hash_code
())
{
}
else
if
(
t
.
type
().
hash_code
()
==
typeid
(
int64_t
).
hash_code
())
{
os
<<
t
.
data
<
int64_t
>
()[
i
]
<<
" "
;
os
<<
t
.
data
<
int64_t
>
()[
i
]
<<
" "
;
...
@@ -89,6 +89,7 @@ std::string LoDToString(const LoD &lod) {
...
@@ -89,6 +89,7 @@ std::string LoDToString(const LoD &lod) {
LoD
SliceInLevel
(
const
LoD
&
in
,
size_t
level
,
size_t
elem_begin
,
LoD
SliceInLevel
(
const
LoD
&
in
,
size_t
level
,
size_t
elem_begin
,
size_t
elem_end
)
{
size_t
elem_end
)
{
PADDLE_ENFORCE_LT
(
level
,
in
.
size
());
PADDLE_ENFORCE_LT
(
level
,
in
.
size
());
PADDLE_ENFORCE_LT
(
elem_begin
,
elem_end
);
PADDLE_ENFORCE_LT
(
elem_end
,
in
[
level
].
size
());
PADDLE_ENFORCE_LT
(
elem_end
,
in
[
level
].
size
());
LoD
res
;
LoD
res
;
...
...
paddle/fluid/operators/read_op.cc
浏览文件 @
b6dc3a59
...
@@ -66,9 +66,15 @@ class ReadOp : public framework::OperatorBase {
...
@@ -66,9 +66,15 @@ class ReadOp : public framework::OperatorBase {
std
::
vector
<
std
::
string
>
out_arg_names
=
Outputs
(
"Out"
);
std
::
vector
<
std
::
string
>
out_arg_names
=
Outputs
(
"Out"
);
std
::
vector
<
framework
::
LoDTensor
>
ins
;
std
::
vector
<
framework
::
LoDTensor
>
ins
;
reader
->
ReadNext
(
&
ins
);
reader
->
ReadNext
(
&
ins
);
PADDLE_ENFORCE
(
!
ins
.
empty
(),
"There is no next data."
);
if
(
ins
.
empty
())
{
ins
.
resize
(
out_arg_names
.
size
());
for
(
auto
&
tensor
:
ins
)
{
// data type is not important for subsequent DataBalanceOpHandle
tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
0
}),
dev_place
);
}
}
PADDLE_ENFORCE_EQ
(
ins
.
size
(),
out_arg_names
.
size
());
PADDLE_ENFORCE_EQ
(
ins
.
size
(),
out_arg_names
.
size
());
for
(
size_t
i
=
0
;
i
<
in
s
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
out_arg_name
s
.
size
();
++
i
)
{
auto
*
out
=
auto
*
out
=
scope
.
FindVar
(
out_arg_names
[
i
])
->
GetMutable
<
framework
::
LoDTensor
>
();
scope
.
FindVar
(
out_arg_names
[
i
])
->
GetMutable
<
framework
::
LoDTensor
>
();
out
->
ShareDataWith
(
ins
[
i
]);
out
->
ShareDataWith
(
ins
[
i
]);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录