Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
6d752baf
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6d752baf
编写于
6月 12, 2018
作者:
Y
Yancey1989
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
use get_appropriate_dev to schedule rpc op
上级
4444e79e
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
54 addition
and
69 deletion
+54
-69
paddle/fluid/framework/details/multi_devices_graph_builder.cc
...le/fluid/framework/details/multi_devices_graph_builder.cc
+48
-51
paddle/fluid/framework/details/multi_devices_graph_builder.h
paddle/fluid/framework/details/multi_devices_graph_builder.h
+3
-12
paddle/fluid/framework/details/ssa_graph_builder.h
paddle/fluid/framework/details/ssa_graph_builder.h
+1
-3
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+2
-3
未找到文件。
paddle/fluid/framework/details/multi_devices_graph_builder.cc
浏览文件 @
6d752baf
...
...
@@ -142,7 +142,6 @@ bool MultiDevSSAGraphBuilder::IsDistTrainOp(
std
::
unique_ptr
<
SSAGraph
>
MultiDevSSAGraphBuilder
::
Build
(
const
ProgramDesc
&
program
)
const
{
VLOG
(
3
)
<<
"Building ...."
;
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars
;
for
(
auto
*
var
:
program
.
Block
(
0
).
AllVars
())
{
all_vars
[
var
->
Name
()]
=
var
;
...
...
@@ -162,36 +161,32 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
auto
send_vars
=
FindDistTrainSendVars
(
program
);
auto
recv_vars
=
FindDistTrainRecvVars
(
program
);
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
var_name_on_devices
;
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
bcast_var_name_set
;
var_name_on_devices
.
resize
(
places_
.
size
());
bcast_var_name_set
.
resize
(
places_
.
size
());
size_t
cur_device_id
=
0
;
std
::
vector
<
int64_t
>
balance_grads
(
places_
.
size
(),
0
);
auto
get_appropriate_dev
=
[
&
](
std
::
string
&
g_name
)
->
size_t
{
auto
var_desc
=
all_vars
.
at
(
g_name
);
PADDLE_ENFORCE_NOT_NULL
(
var_desc
);
auto
dim
=
framework
::
make_ddim
(
var_desc
->
GetShape
());
int64_t
numel
=
framework
::
product
(
dim
);
PADDLE_ENFORCE_GE
(
numel
,
0
);
auto
get_appropriate_dev
=
[
&
](
std
::
vector
<
std
::
string
>
var_names
)
->
size_t
{
int64_t
numel_all
=
0
;
for
(
auto
var_name
:
var_names
)
{
auto
var_desc
=
all_vars
.
at
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var_desc
);
auto
dim
=
framework
::
make_ddim
(
var_desc
->
GetShape
());
int64_t
numel
=
framework
::
product
(
dim
);
PADDLE_ENFORCE_GT
(
numel
,
0
);
numel_all
+=
numel
;
}
auto
smallest
=
std
::
min_element
(
std
::
begin
(
balance_grads
),
std
::
end
(
balance_grads
));
size_t
dev_id
=
static_cast
<
size_t
>
(
std
::
distance
(
std
::
begin
(
balance_grads
),
smallest
));
balance_grads
[
dev_id
]
+=
numel
;
balance_grads
[
dev_id
]
+=
numel
_all
;
return
dev_id
;
};
bool
is_forwarding
=
true
;
int
rpc_op_device_id
=
0
;
auto
schedule_rpc_op
=
[
&
]()
->
void
{
rpc_op_device_id
++
;
if
(
rpc_op_device_id
>=
static_cast
<
int
>
(
places_
.
size
()))
{
rpc_op_device_id
=
0
;
}
};
for
(
auto
*
op
:
program
.
Block
(
0
).
AllOps
())
{
if
(
boost
::
get
<
int
>
(
...
...
@@ -200,37 +195,40 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
// append rpc op if program is distributed trainer main program.
// always use the first device
if
(
op
->
Type
()
==
"send_vars"
)
{
auto
got
=
remote_vars_devices_
.
find
(
op
->
InputArgumentNames
()[
0
]);
if
(
got
==
remote_vars_devices_
.
end
())
{
schedule_rpc_op
();
}
else
{
rpc_op_device_id
=
got
->
second
;
int
op_dev_id
=
GetVarDeviceID
(
op
->
InputArgumentNames
()[
0
]);
if
(
op_dev_id
==
-
1
)
{
op_dev_id
=
get_appropriate_dev
(
op
->
InputArgumentNames
());
for
(
auto
&
varname
:
op
->
InputArgumentNames
())
{
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
}
CreateRPCOp
(
&
result
,
*
op
,
rpc_op_device
_id
);
CreateRPCOp
(
&
result
,
*
op
,
op_dev
_id
);
}
else
if
(
op
->
Type
()
==
"recv"
)
{
schedule_rpc_op
(
);
int
op_dev_id
=
get_appropriate_dev
(
op
->
OutputArgumentNames
()
);
for
(
auto
&
varname
:
op
->
OutputArgumentNames
())
{
remote_vars_devices_
.
insert
({
varname
,
rpc_op_device_id
}
);
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
CreateRPCOp
(
&
result
,
*
op
,
rpc_op_device
_id
);
CreateRPCOp
(
&
result
,
*
op
,
op_dev
_id
);
}
else
{
// send_barrier and fetch_barrier op would run on device 0
CreateRPCOp
(
&
result
,
*
op
,
0
);
}
}
else
if
(
IsDistTrainOp
(
*
op
,
send_vars
,
recv_vars
))
{
if
(
op
->
Type
()
==
"split_byref"
)
{
schedule_rpc_op
(
);
int
op_dev_id
=
get_appropriate_dev
(
op
->
OutputArgumentNames
()
);
for
(
auto
&
varname
:
op
->
OutputArgumentNames
())
{
remote_vars_devices_
.
insert
({
varname
,
rpc_op_device_id
}
);
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
CreateDistTrainOp
(
&
result
,
*
op
,
rpc_op_device_id
);
}
if
(
op
->
Type
()
==
"concat"
)
{
auto
got
=
remote_vars_devices_
.
find
(
op
->
InputArgumentNames
()[
0
]);
PADDLE_ENFORCE
(
got
!=
remote_vars_devices_
.
end
(),
CreateDistTrainOp
(
&
result
,
*
op
,
op_dev_id
);
}
else
if
(
op
->
Type
()
==
"concat"
)
{
int
op_dev_id
=
GetVarDeviceID
(
op
->
InputArgumentNames
()[
0
]);
PADDLE_ENFORCE
(
op_dev_id
!=
-
1
,
"can not find right place to concatenate received var."
);
CreateDistTrainOp
(
&
result
,
*
op
,
got
->
secon
d
);
CreateDistTrainOp
(
&
result
,
*
op
,
op_dev_i
d
);
}
else
{
CreateDistTrainOp
(
&
result
,
*
op
,
0
);
PADDLE_ENFORCE
(
"the distribute training related op should be in [split_byref, "
"concat]."
);
}
}
else
if
(
IsScaleLossOp
(
*
op
))
{
// user can customize loss@grad if not use_default_grad_scale_
...
...
@@ -240,13 +238,13 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
}
is_forwarding
=
false
;
}
else
{
int
op_dev_id
=
GetOpDeviceID
(
var_name_on_devices
,
*
op
);
int
op_dev_id
=
GetOpDeviceID
(
*
op
);
if
(
op_dev_id
==
-
1
)
{
// var on all device
CreateComputationalOps
(
&
result
,
*
op
,
places_
.
size
());
}
else
{
CreateComputationalOp
(
&
result
,
*
op
,
op_dev_id
);
for
(
auto
&
var_name
:
op
->
OutputArgumentNames
())
{
var_name_on_devices
[
op_dev_id
].
emplace
(
var_name
);
var_name_on_devices
_
.
emplace
(
var_name
,
op_dev_id
);
}
}
if
(
!
is_forwarding
&&
places_
.
size
()
>
1
)
{
...
...
@@ -269,9 +267,9 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
switch
(
strategy_
.
reduce_
)
{
case
BuildStrategy
::
ReduceStrategy
::
kReduce
:
cur_device_id
=
get_appropriate_dev
(
g_name
);
cur_device_id
=
get_appropriate_dev
(
{
g_name
}
);
CreateReduceOp
(
&
result
,
g_name
,
cur_device_id
);
var_name_on_devices
[
cur_device_id
].
emplace
(
g_name
);
var_name_on_devices
_
.
emplace
(
g_name
,
cur_device_id
);
bcast_var_name_set
[
cur_device_id
].
emplace
(
p_name
);
break
;
case
BuildStrategy
::
ReduceStrategy
::
kAllReduce
:
...
...
@@ -402,24 +400,23 @@ bool MultiDevSSAGraphBuilder::IsParameterGradientOnce(
return
is_pg_once
;
}
int
MultiDevSSAGraphBuilder
::
GetOpDeviceID
(
const
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
&
var_name_on_devices
,
const
OpDesc
&
op
)
const
{
int
MultiDevSSAGraphBuilder
::
GetOpDeviceID
(
const
OpDesc
&
op
)
const
{
if
(
strategy_
.
reduce_
!=
BuildStrategy
::
ReduceStrategy
::
kReduce
)
{
return
-
1
;
}
int
var_dev_id
=
-
1
;
for
(
auto
&
var_name
:
op
.
InputArgumentNames
())
{
if
(
var_dev_id
!=
-
1
)
break
;
for
(
size_t
i
=
0
;
i
<
var_name_on_devices
.
size
();
++
i
)
{
if
(
var_name_on_devices
[
i
].
count
(
var_name
))
{
var_dev_id
=
static_cast
<
int
>
(
i
);
break
;
}
for
(
auto
&
varname
:
op
.
InputArgumentNames
())
{
int
dev_id
=
GetVarDeviceID
(
varname
);
if
(
dev_id
!=
-
1
)
{
return
dev_id
;
}
}
return
var_dev_id
;
return
-
1
;
}
int
MultiDevSSAGraphBuilder
::
GetVarDeviceID
(
const
std
::
string
&
varname
)
const
{
auto
got
=
var_name_on_devices_
.
find
(
varname
);
return
got
==
var_name_on_devices_
.
end
()
?
-
1
:
got
->
second
;
}
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
SSAGraph
*
result
)
const
{
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.h
浏览文件 @
6d752baf
...
...
@@ -47,14 +47,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
#endif
std
::
unique_ptr
<
SSAGraph
>
Build
(
const
ProgramDesc
&
program
)
const
override
;
int
GetRemoteVarDeviceId
(
const
std
::
string
&
var_name
)
const
override
{
auto
got
=
remote_vars_devices_
.
find
(
var_name
);
if
(
got
!=
remote_vars_devices_
.
end
())
{
return
got
->
second
;
}
return
-
1
;
}
int
GetVarDeviceID
(
const
std
::
string
&
varname
)
const
;
private:
void
CreateOpHandleIOs
(
SSAGraph
*
result
,
const
OpDesc
&
op
,
...
...
@@ -105,9 +98,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
const
std
::
string
&
og
,
std
::
unordered_set
<
std
::
string
>
*
og_has_been_broadcast
)
const
;
int
GetOpDeviceID
(
const
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
&
var_name_on_devices
,
const
OpDesc
&
op
)
const
;
int
GetOpDeviceID
(
const
OpDesc
&
op
)
const
;
void
InsertAllReduceOp
(
SSAGraph
*
result
,
const
std
::
string
&
og
)
const
;
...
...
@@ -120,7 +111,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
private:
BuildStrategy
strategy_
;
mutable
std
::
unordered_map
<
std
::
string
,
int
>
remote_vars
_devices_
;
mutable
std
::
unordered_map
<
std
::
string
,
int
>
var_name_on
_devices_
;
void
SetCommunicationContext
(
OpHandleBase
*
op_handle
,
const
platform
::
Place
&
p
)
const
;
...
...
paddle/fluid/framework/details/ssa_graph_builder.h
浏览文件 @
6d752baf
...
...
@@ -30,9 +30,7 @@ class SSAGraphBuilder {
SSAGraphBuilder
()
{}
virtual
~
SSAGraphBuilder
()
{}
virtual
std
::
unique_ptr
<
SSAGraph
>
Build
(
const
ProgramDesc
&
program
)
const
=
0
;
virtual
int
GetRemoteVarDeviceId
(
const
std
::
string
&
var_name
)
const
{
return
-
1
;
}
virtual
int
GetVarDeviceID
(
const
std
::
string
&
var_name
)
const
{
return
-
1
;
}
DISABLE_COPY_AND_ASSIGN
(
SSAGraphBuilder
);
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
6d752baf
...
...
@@ -161,9 +161,8 @@ void ParallelExecutor::BCastParamsToGPUs(
}
auto
&
nccl_ctx
=
member_
->
nccl_ctxs_
->
at
(
place
);
if
(
builder_
.
get
()
!=
nullptr
&&
builder_
->
GetRemoteVarDeviceId
(
var
)
!=
-
1
)
{
int
place_id
=
builder_
->
GetRemoteVarDeviceId
(
var
);
if
(
builder_
.
get
()
!=
nullptr
&&
builder_
->
GetVarDeviceID
(
var
)
!=
-
1
)
{
int
place_id
=
builder_
->
GetVarDeviceID
(
var
);
platform
::
dynload
::
ncclBcast
(
buffer
,
numel
,
data_type
,
place_id
,
nccl_ctx
.
comm_
,
nccl_ctx
.
stream
());
}
else
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录