Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2fa8df1c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2fa8df1c
编写于
7月 13, 2018
作者:
X
Xin Pan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
separate graph building pass and graph-based pe builder
上级
37e51443
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
273 addition
and
182 deletion
+273
-182
paddle/fluid/framework/details/broadcast_op_handle_test.cc
paddle/fluid/framework/details/broadcast_op_handle_test.cc
+5
-5
paddle/fluid/framework/details/gather_op_handle_test.cc
paddle/fluid/framework/details/gather_op_handle_test.cc
+5
-5
paddle/fluid/framework/details/multi_devices_graph_builder.cc
...le/fluid/framework/details/multi_devices_graph_builder.cc
+147
-113
paddle/fluid/framework/details/multi_devices_graph_builder.h
paddle/fluid/framework/details/multi_devices_graph_builder.h
+11
-14
paddle/fluid/framework/details/reduce_op_handle_test.cc
paddle/fluid/framework/details/reduce_op_handle_test.cc
+3
-3
paddle/fluid/framework/details/ssa_graph_builder.cc
paddle/fluid/framework/details/ssa_graph_builder.cc
+11
-15
paddle/fluid/framework/details/ssa_graph_builder.h
paddle/fluid/framework/details/ssa_graph_builder.h
+6
-6
paddle/fluid/framework/details/ssa_graph_checker.h
paddle/fluid/framework/details/ssa_graph_checker.h
+3
-3
paddle/fluid/framework/details/ssa_graph_printer.h
paddle/fluid/framework/details/ssa_graph_printer.h
+3
-3
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+33
-0
paddle/fluid/framework/ir/graph.h
paddle/fluid/framework/ir/graph.h
+19
-2
paddle/fluid/framework/ir/node.h
paddle/fluid/framework/ir/node.h
+20
-5
paddle/fluid/framework/ir/pass.h
paddle/fluid/framework/ir/pass.h
+4
-4
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+1
-4
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+2
-0
未找到文件。
paddle/fluid/framework/details/broadcast_op_handle_test.cc
浏览文件 @
2fa8df1c
...
...
@@ -96,7 +96,7 @@ struct TestBroadcastOpHandle {
}
param_scopes_
[
input_scope_idx
]
->
Var
(
"input"
);
std
::
unique_ptr
<
ir
::
Node
>
n
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kOperation
));
std
::
unique_ptr
<
ir
::
Node
>
n
(
new
ir
::
Node
());
if
(
use_gpu_
)
{
#ifdef PADDLE_WITH_CUDA
op_handle_
.
reset
(
new
BroadcastOpHandle
(
n
.
get
(),
local_scopes_
,
gpu_list_
,
...
...
@@ -114,7 +114,7 @@ struct TestBroadcastOpHandle {
#endif
}
std
::
unique_ptr
<
ir
::
Node
>
v
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
std
::
unique_ptr
<
ir
::
Node
>
v
(
new
ir
::
Node
());
auto
*
in_var_handle
=
new
VarHandle
(
v
.
get
(),
1
,
input_scope_idx
,
"input"
,
gpu_list_
[
input_scope_idx
]);
vars_
.
emplace_back
(
in_var_handle
);
...
...
@@ -122,7 +122,7 @@ struct TestBroadcastOpHandle {
// add dummy var
std
::
unique_ptr
<
ir
::
Node
>
v2
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
std
::
unique_ptr
<
ir
::
Node
>
v2
(
new
ir
::
Node
());
vars_
.
emplace_back
(
new
DummyVarHandle
(
v2
.
get
()));
DummyVarHandle
*
dummy_var_handle
=
static_cast
<
DummyVarHandle
*>
(
vars_
.
back
().
get
());
...
...
@@ -133,7 +133,7 @@ struct TestBroadcastOpHandle {
if
(
!
use_gpu_
)
{
op_handle_
->
SetDeviceContext
(
gpu_list_
[
j
],
ctxs_
[
j
].
get
());
}
std
::
unique_ptr
<
ir
::
Node
>
v3
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
std
::
unique_ptr
<
ir
::
Node
>
v3
(
new
ir
::
Node
());
VarHandle
*
out_var_handle
=
new
VarHandle
(
v3
.
get
(),
2
,
j
,
"out"
,
gpu_list_
[
j
]);
vars_
.
emplace_back
(
out_var_handle
);
...
...
@@ -141,7 +141,7 @@ struct TestBroadcastOpHandle {
}
// add dummy var
std
::
unique_ptr
<
ir
::
Node
>
v4
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
std
::
unique_ptr
<
ir
::
Node
>
v4
(
new
ir
::
Node
());
vars_
.
emplace_back
(
new
DummyVarHandle
(
v4
.
get
()));
DummyVarHandle
*
out_dummy_var_handle
=
static_cast
<
DummyVarHandle
*>
(
vars_
.
back
().
get
());
...
...
paddle/fluid/framework/details/gather_op_handle_test.cc
浏览文件 @
2fa8df1c
...
...
@@ -82,13 +82,13 @@ struct TestGatherOpHandle {
}
param_scopes_
[
input_scope_idx
]
->
Var
(
"out"
);
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kOperation
));
nodes
.
emplace_back
(
new
ir
::
Node
());
op_handle_
.
reset
(
new
GatherOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
gpu_list_
));
// add input
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
op_handle_
->
SetDeviceContext
(
gpu_list_
[
j
],
ctxs_
[
j
].
get
());
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
nodes
.
emplace_back
(
new
ir
::
Node
());
auto
*
in_var_handle
=
new
VarHandle
(
nodes
.
back
().
get
(),
1
,
j
,
"input"
,
gpu_list_
[
j
]);
vars_
.
emplace_back
(
in_var_handle
);
...
...
@@ -96,7 +96,7 @@ struct TestGatherOpHandle {
}
// add dummy var
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
nodes
.
emplace_back
(
new
ir
::
Node
());
vars_
.
emplace_back
(
new
DummyVarHandle
(
nodes
.
back
().
get
()));
DummyVarHandle
*
in_dummy_var_handle
=
static_cast
<
DummyVarHandle
*>
(
vars_
.
back
().
get
());
...
...
@@ -104,14 +104,14 @@ struct TestGatherOpHandle {
op_handle_
->
AddInput
(
in_dummy_var_handle
);
// add output
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
nodes
.
emplace_back
(
new
ir
::
Node
());
auto
*
out_var_handle
=
new
VarHandle
(
nodes
.
back
().
get
(),
2
,
input_scope_idx
,
"out"
,
gpu_list_
[
input_scope_idx
]);
vars_
.
emplace_back
(
out_var_handle
);
op_handle_
->
AddOutput
(
out_var_handle
);
// add dummy var
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
nodes
.
emplace_back
(
new
ir
::
Node
());
vars_
.
emplace_back
(
new
DummyVarHandle
(
nodes
.
back
().
get
()));
DummyVarHandle
*
dummy_var_handle
=
static_cast
<
DummyVarHandle
*>
(
vars_
.
back
().
get
());
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.cc
浏览文件 @
2fa8df1c
...
...
@@ -67,30 +67,31 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
}
}
void
MultiDevSSAGraphBuilder
::
CreateOpHandleIOs
(
Graph
*
result
,
const
OpDesc
&
op
,
void
MultiDevSSAGraphBuilder
::
CreateOpHandleIOs
(
Graph
*
result
,
ir
::
Node
*
node
,
size_t
place_id
)
const
{
auto
p
=
places_
[
place_id
];
auto
*
op_handle
=
result
->
Get
<
GraphOps
>
(
"ops"
).
back
().
get
();
op_handle
->
SetDeviceContext
(
p
,
platform
::
DeviceContextPool
::
Instance
().
Get
(
p
));
for
(
auto
&
each_var_name
:
op
.
InputArgumentNames
())
{
VarHandle
*
var
=
CreateOrGetLatestVarHandle
(
result
,
each_var_name
,
p
,
place_id
);
for
(
ir
::
Node
*
input
:
node
->
inputs
)
{
VarHandle
*
var
=
CreateOrGetLatestVarHandle
(
result
,
input
,
p
,
place_id
);
op_handle
->
AddInput
(
var
);
}
for
(
auto
&
each_var_name
:
op
.
OutputArgumentNames
()
)
{
CreateOpOutput
(
result
,
op_handle
,
each_var_name
,
p
,
place_id
);
for
(
ir
::
Node
*
output
:
node
->
outputs
)
{
CreateOpOutput
(
result
,
op_handle
,
output
,
p
,
place_id
);
}
}
std
::
vector
<
std
::
string
>
MultiDevSSAGraphBuilder
::
FindDistTrainSendVars
(
const
ProgramDesc
&
program
)
const
{
const
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
&
nodes
)
const
{
std
::
vector
<
std
::
string
>
send_vars
;
// since parameters are all in block 0,
// it's enough to only scan send ops in block 0
for
(
auto
*
op
:
program
.
Block
(
0
).
AllOps
())
{
for
(
auto
&
node
:
nodes
)
{
if
(
!
node
->
Op
())
continue
;
OpDesc
*
op
=
node
->
Op
();
// TODO(Yancey1989): use a graceful method to find send op,
// instead of the the hard code string
if
(
op
->
Type
()
==
"send"
)
{
...
...
@@ -104,9 +105,11 @@ std::vector<std::string> MultiDevSSAGraphBuilder::FindDistTrainSendVars(
}
std
::
vector
<
std
::
string
>
MultiDevSSAGraphBuilder
::
FindDistTrainRecvVars
(
const
ProgramDesc
&
program
)
const
{
const
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
&
nodes
)
const
{
std
::
vector
<
std
::
string
>
recv_vars
;
for
(
auto
*
op
:
program
.
Block
(
0
).
AllOps
())
{
for
(
auto
&
node
:
nodes
)
{
if
(
!
node
->
Op
())
continue
;
OpDesc
*
op
=
node
->
Op
();
// TODO(Yancey1989): use a graceful method to find recv op,
// instead of the hard code string
if
(
op
->
Type
()
==
"recv"
)
{
...
...
@@ -120,7 +123,7 @@ std::vector<std::string> MultiDevSSAGraphBuilder::FindDistTrainRecvVars(
}
bool
MultiDevSSAGraphBuilder
::
IsDistTrainOp
(
const
OpDesc
&
op
,
const
std
::
vector
<
std
::
string
>
&
send_vars
,
ir
::
Node
*
node
,
const
std
::
vector
<
std
::
string
>
&
send_vars
,
const
std
::
vector
<
std
::
string
>
&
recv_vars
)
const
{
if
(
send_vars
.
size
()
==
0
||
recv_vars
.
size
()
==
0
)
{
return
false
;
...
...
@@ -143,8 +146,17 @@ bool MultiDevSSAGraphBuilder::IsDistTrainOp(
return
false
;
};
return
checker
(
op
.
OutputArgumentNames
(),
send_vars
)
||
checker
(
op
.
InputArgumentNames
(),
recv_vars
);
std
::
vector
<
std
::
string
>
input_var_names
;
std
::
vector
<
std
::
string
>
output_var_names
;
for
(
ir
::
Node
*
input
:
node
->
inputs
)
{
input_var_names
.
push_back
(
input
->
Var
()
->
Name
());
}
for
(
ir
::
Node
*
output
:
node
->
outputs
)
{
output_var_names
.
push_back
(
output
->
Var
()
->
Name
());
}
return
checker
(
output_var_names
,
send_vars
)
||
checker
(
input_var_names
,
recv_vars
);
}
size_t
MultiDevSSAGraphBuilder
::
GetAppropriateDeviceID
(
...
...
@@ -167,11 +179,16 @@ size_t MultiDevSSAGraphBuilder::GetAppropriateDeviceID(
return
dev_id
;
}
std
::
unique_ptr
<
Graph
>
MultiDevSSAGraphBuilder
::
Build
(
std
::
unique_ptr
<
Graph
>
MultiDevSSAGraphBuilder
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
{
const
ProgramDesc
&
program
=
graph
->
Program
();
for
(
auto
*
var
:
program
.
Block
(
0
).
AllVars
())
{
all_vars_
.
emplace
(
var
->
Name
(),
var
);
auto
nodes
=
std
::
move
(
graph
->
nodes
);
graph
->
nodes
.
clear
();
LOG
(
ERROR
)
<<
"origin nodes count "
<<
nodes
.
size
();
for
(
auto
&
node
:
nodes
)
{
if
(
node
->
Var
())
{
all_vars_
.
emplace
(
node
->
Var
()
->
Name
(),
node
->
Var
());
}
}
Graph
&
result
=
*
graph
;
...
...
@@ -181,10 +198,11 @@ std::unique_ptr<Graph> MultiDevSSAGraphBuilder::Build(
result
.
Set
(
"vars"
,
new
GraphVars
(
places_
.
size
()));
result
.
Set
(
"dep_vars"
,
new
GraphDepVars
);
result
.
Set
(
"ops"
,
new
GraphOps
);
// find send/recv vars so that we can place the distributed training
// realted op in the place 0
auto
send_vars
=
FindDistTrainSendVars
(
program
);
auto
recv_vars
=
FindDistTrainRecvVars
(
program
);
auto
send_vars
=
FindDistTrainSendVars
(
nodes
);
auto
recv_vars
=
FindDistTrainRecvVars
(
nodes
);
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
bcast_var_name_set
;
bcast_var_name_set
.
resize
(
places_
.
size
());
...
...
@@ -192,14 +210,16 @@ std::unique_ptr<Graph> MultiDevSSAGraphBuilder::Build(
size_t
cur_device_id
=
0
;
bool
is_forwarding
=
true
;
for
(
auto
*
op
:
program
.
Block
(
0
).
AllOps
())
{
// TODO(panyx0718): FIXME: nodes should be sorted by "program" order.
for
(
auto
&
node
:
nodes
)
{
if
(
!
node
->
Op
())
continue
;
if
(
boost
::
get
<
int
>
(
op
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
==
node
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
==
static_cast
<
int
>
(
OpRole
::
kRPC
))
{
CreateRPCOp
(
&
result
,
*
op
);
}
else
if
(
IsDistTrainOp
(
*
op
,
send_vars
,
recv_vars
))
{
CreateDistTrainOp
(
&
result
,
*
op
);
}
else
if
(
IsScaleLossOp
(
*
op
))
{
CreateRPCOp
(
&
result
,
node
.
get
()
);
}
else
if
(
IsDistTrainOp
(
node
.
get
()
,
send_vars
,
recv_vars
))
{
CreateDistTrainOp
(
&
result
,
node
.
get
()
);
}
else
if
(
IsScaleLossOp
(
node
.
get
()
))
{
// user can customize loss@grad if not use_default_grad_scale_
if
(
strategy_
.
gradient_scale_
!=
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
...
...
@@ -211,33 +231,35 @@ std::unique_ptr<Graph> MultiDevSSAGraphBuilder::Build(
// the block.
is_forwarding
=
false
;
}
else
{
int
op_dev_id
=
GetOpDeviceID
(
*
op
);
int
op_dev_id
=
GetOpDeviceID
(
node
.
get
()
);
if
(
op_dev_id
!=
-
1
)
{
// This op only runs on one specific device.
CreateComputationalOp
(
&
result
,
*
op
,
op_dev_id
);
for
(
auto
&
var_name
:
op
->
OutputArgumentNames
()
)
{
var_name_on_devices_
.
emplace
(
var_name
,
op_dev_id
);
CreateComputationalOp
(
&
result
,
node
.
get
()
,
op_dev_id
);
for
(
ir
::
Node
*
n
:
node
->
outputs
)
{
var_name_on_devices_
.
emplace
(
n
->
Var
()
->
Name
()
,
op_dev_id
);
}
}
else
{
// This op runs on all devices, and its output may have parameter's
// gradients.
if
(
op
->
Type
()
==
"read"
&&
strategy_
.
enable_data_balance_
)
{
op
->
SetAttr
(
"throw_eof_exp"
,
false
);
CreateComputationalOps
(
&
result
,
*
op
,
places_
.
size
());
const
auto
&
data_var_names
=
op
->
Output
(
"Out"
);
if
(
node
->
Op
()
->
Type
()
==
"read"
&&
strategy_
.
enable_data_balance_
)
{
node
->
Op
()
->
SetAttr
(
"throw_eof_exp"
,
false
);
CreateComputationalOps
(
&
result
,
node
.
get
(),
places_
.
size
());
// TODO(panyx0718): builder shouldn't depend on the out logic of
// a specific op.
const
auto
&
data_var_names
=
node
->
Op
()
->
Output
(
"Out"
);
InsertDataBalanceOp
(
&
result
,
data_var_names
);
}
else
{
CreateComputationalOps
(
&
result
,
*
op
,
places_
.
size
());
CreateComputationalOps
(
&
result
,
node
.
get
()
,
places_
.
size
());
}
if
(
!
is_forwarding
&&
places_
.
size
()
>
1
)
{
// Currently, we assume that once gradient is generated, it can be
// broadcast, and each gradient is only broadcast once.
if
(
static_cast
<
bool
>
(
boost
::
get
<
int
>
(
op
->
GetAttr
(
if
(
static_cast
<
bool
>
(
boost
::
get
<
int
>
(
node
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
&
static_cast
<
int
>
(
OpRole
::
kBackward
)))
{
try
{
auto
backward_vars
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
op
->
GetNullableAttr
(
auto
backward_vars
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
node
->
Op
()
->
GetNullableAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
PADDLE_ENFORCE_EQ
(
backward_vars
.
size
()
%
2
,
0
);
...
...
@@ -328,13 +350,12 @@ void MultiDevSSAGraphBuilder::SetCommunicationContext(
void
MultiDevSSAGraphBuilder
::
CreateBroadcastOp
(
Graph
*
result
,
const
std
::
string
&
p_name
,
size_t
src_dev_id
)
const
{
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kOperation
));
#ifdef PADDLE_WITH_CUDA
auto
*
op_handle
=
new
BroadcastOpHandle
(
result
->
nodes
.
back
().
get
(
),
auto
*
op_handle
=
new
BroadcastOpHandle
(
result
->
CreateOpNode
(
nullptr
),
local_scopes_
,
places_
,
nccl_ctxs_
);
#else
auto
*
op_handle
=
new
BroadcastOpHandle
(
result
->
nodes
.
back
().
get
(),
local_scopes_
,
places_
);
auto
*
op_handle
=
new
BroadcastOpHandle
(
result
->
CreateOpNode
(
nullptr
),
local_scopes_
,
places_
);
#endif
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
op_handle
);
...
...
@@ -345,33 +366,31 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(Graph *result,
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
auto
&
p
=
places_
[
i
];
SetCommunicationContext
(
op_handle
,
p
);
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
auto
&
vars
=
result
->
Get
<
GraphVars
>
(
"vars"
).
at
(
i
).
at
(
p_name
);
auto
*
out_var
=
new
VarHandle
(
result
->
nodes
.
back
().
get
(
),
vars
.
size
(),
i
,
p_name
,
p
);
new
VarHandle
(
result
->
CreateVarNode
(
p_name
),
vars
.
size
(),
i
,
p_name
,
p
);
vars
.
emplace_back
(
out_var
);
op_handle
->
AddOutput
(
out_var
);
}
}
void
MultiDevSSAGraphBuilder
::
CreateComputationalOp
(
Graph
*
result
,
const
OpDesc
&
op
,
ir
::
Node
*
node
,
int
dev_id
)
const
{
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kOperation
));
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
ComputationOpHandle
(
result
->
nodes
.
back
().
get
(),
op
,
local_scopes_
[
dev_id
],
places_
[
dev_id
]));
CreateOpHandleIOs
(
result
,
op
,
dev_id
);
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
ComputationOpHandle
(
result
->
CreateOpNode
(
node
->
Op
()),
*
node
->
Op
(),
local_scopes_
[
dev_id
],
places_
[
dev_id
]));
CreateOpHandleIOs
(
result
,
node
,
dev_id
);
}
void
MultiDevSSAGraphBuilder
::
InsertAllReduceOp
(
Graph
*
result
,
const
std
::
string
&
og
)
const
{
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kOperation
));
#ifdef PADDLE_WITH_CUDA
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
AllReduceOpHandle
(
result
->
nodes
.
back
().
get
(
),
local_scopes_
,
places_
,
nccl_ctxs_
));
result
->
CreateOpNode
(
nullptr
),
local_scopes_
,
places_
,
nccl_ctxs_
));
#else
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
AllReduceOpHandle
(
result
->
nodes
.
back
().
get
(
),
local_scopes_
,
places_
));
result
->
CreateOpNode
(
nullptr
),
local_scopes_
,
places_
));
#endif
auto
*
op_handle
=
result
->
Get
<
GraphOps
>
(
"ops"
).
back
().
get
();
...
...
@@ -383,8 +402,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(Graph *result,
auto
&
prev_grad
=
vars
.
back
();
op_handle
->
AddInput
(
prev_grad
.
get
());
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
auto
var
=
new
VarHandle
(
result
->
nodes
.
back
().
get
(),
vars
.
size
(),
i
,
og
,
p
);
auto
var
=
new
VarHandle
(
result
->
CreateVarNode
(
og
),
vars
.
size
(),
i
,
og
,
p
);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
}
...
...
@@ -392,13 +410,12 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(Graph *result,
void
MultiDevSSAGraphBuilder
::
InsertDataBalanceOp
(
Graph
*
result
,
const
std
::
vector
<
std
::
string
>
&
datas
)
const
{
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kOperation
));
#ifdef PADDLE_WITH_CUDA
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
DataBalanceOpHandle
(
result
->
nodes
.
back
().
get
(
),
local_scopes_
,
places_
,
nccl_ctxs_
));
result
->
CreateOpNode
(
nullptr
),
local_scopes_
,
places_
,
nccl_ctxs_
));
#else
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
DataBalanceOpHandle
(
result
->
nodes
.
back
().
get
(
),
local_scopes_
,
places_
));
result
->
CreateOpNode
(
nullptr
),
local_scopes_
,
places_
));
#endif
auto
*
op_handle
=
result
->
Get
<
GraphOps
>
(
"ops"
).
back
().
get
();
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
...
...
@@ -408,9 +425,8 @@ void MultiDevSSAGraphBuilder::InsertDataBalanceOp(
auto
&
vars
=
result
->
Get
<
GraphVars
>
(
"vars"
)[
i
][
d_name
];
PADDLE_ENFORCE
(
!
vars
.
empty
());
op_handle
->
AddInput
(
vars
.
back
().
get
());
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
auto
var
=
new
VarHandle
(
result
->
nodes
.
back
().
get
(),
vars
.
size
(),
i
,
d_name
,
p
);
auto
var
=
new
VarHandle
(
result
->
CreateVarNode
(
d_name
),
vars
.
size
(),
i
,
d_name
,
p
);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
}
...
...
@@ -429,17 +445,17 @@ bool MultiDevSSAGraphBuilder::IsParameterGradientOnce(
return
is_pg_once
;
}
int
MultiDevSSAGraphBuilder
::
GetOpDeviceID
(
const
OpDesc
&
op
)
const
{
int
MultiDevSSAGraphBuilder
::
GetOpDeviceID
(
ir
::
Node
*
node
)
const
{
if
(
strategy_
.
reduce_
!=
BuildStrategy
::
ReduceStrategy
::
kReduce
)
{
return
-
1
;
}
int
op_role
=
boost
::
get
<
int
>
(
op
.
GetAttr
(
framework
::
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
node
->
Op
()
->
GetAttr
(
framework
::
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
if
(
op_role
!=
static_cast
<
int
>
(
framework
::
OpRole
::
kOptimize
))
{
return
-
1
;
}
auto
param_grad
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
op
.
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
node
->
Op
()
->
.
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
PADDLE_ENFORCE_EQ
(
param_grad
.
size
(),
2U
);
int
dev_id
=
GetVarDeviceID
(
param_grad
[
1
]);
...
...
@@ -464,9 +480,8 @@ void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(Graph *result) const {
auto
*
communication_dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
CPUPlace
());
#endif
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kOperation
));
auto
*
op_handle
=
new
ScaleLossGradOpHandle
(
result
->
nodes
.
back
().
get
(
),
local_scopes_
.
size
(),
local_scopes_
[
i
],
result
->
CreateOpNode
(
nullptr
),
local_scopes_
.
size
(),
local_scopes_
[
i
],
places_
[
i
],
communication_dev_ctx
);
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
op_handle
);
...
...
@@ -476,34 +491,38 @@ void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(Graph *result) const {
// loss->pending_ops_.emplace_back(op_handle);
// op_handle->inputs_.emplace_back(loss);
CreateOpOutput
(
result
,
op_handle
,
GradVarName
(
loss_var_name_
),
places_
[
i
],
i
);
// TODO(panyx0718): GradVarName(loss_var_name_)
const
std
::
string
grad_var_name
=
GradVarName
(
loss_var_name_
);
auto
&
vars
=
result
->
Get
<
GraphVars
>
(
"vars"
)[
i
][
grad_var_name
];
size_t
version
=
vars
.
size
();
auto
var
=
new
VarHandle
(
result
->
CreateVarNode
(
grad_var_name
),
version
,
i
,
grad_var_name
,
places_
[
i
]);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
}
}
void
MultiDevSSAGraphBuilder
::
CreateComputationalOps
(
Graph
*
result
,
const
OpDesc
&
op
,
ir
::
Node
*
node
,
size_t
num_places
)
const
{
for
(
size_t
scope_idx
=
0
;
scope_idx
<
num_places
;
++
scope_idx
)
{
auto
p
=
places_
[
scope_idx
];
auto
s
=
local_scopes_
[
scope_idx
];
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kOperation
));
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
ComputationOpHandle
(
result
->
nodes
.
back
().
get
(),
op
,
s
,
p
));
CreateOpHandleIOs
(
result
,
op
,
scope_idx
);
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
ComputationOpHandle
(
result
->
CreateOpNode
(
node
->
Op
()),
*
node
->
Op
(),
s
,
p
));
CreateOpHandleIOs
(
result
,
node
,
scope_idx
);
}
}
VarHandle
*
MultiDevSSAGraphBuilder
::
CreateReduceOp
(
Graph
*
result
,
const
std
::
string
&
og
,
int
dst_dev_id
)
const
{
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kOperation
));
#ifdef PADDLE_WITH_CUDA
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
ReduceOpHandle
(
result
->
nodes
.
back
().
get
(
),
local_scopes_
,
places_
,
nccl_ctxs_
));
result
->
CreateOpNode
(
nullptr
),
local_scopes_
,
places_
,
nccl_ctxs_
));
#else
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
ReduceOpHandle
(
result
->
nodes
.
back
().
get
(
),
local_scopes_
,
places_
));
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
ReduceOpHandle
(
result
->
CreateOpNode
(
nullptr
),
local_scopes_
,
places_
));
#endif
auto
*
op_handle
=
result
->
Get
<
GraphOps
>
(
"ops"
).
back
().
get
();
...
...
@@ -516,8 +535,7 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(Graph *result,
op_handle
->
AddInput
(
prev_grad
.
get
());
}
auto
&
vars
=
result
->
Get
<
GraphVars
>
(
"vars"
)[
dst_dev_id
][
og
];
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
auto
var
=
new
VarHandle
(
result
->
nodes
.
back
().
get
(),
vars
.
size
(),
dst_dev_id
,
auto
var
=
new
VarHandle
(
result
->
CreateVarNode
(
og
),
vars
.
size
(),
dst_dev_id
,
og
,
places_
[
dst_dev_id
]);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
...
...
@@ -530,8 +548,7 @@ void MultiDevSSAGraphBuilder::ConnectOp(Graph *result, OpHandleBase *op,
const
std
::
string
&
prev_op_name
)
const
{
for
(
auto
&
prev_op
:
result
->
Get
<
GraphOps
>
(
"ops"
))
{
if
(
prev_op
->
Name
()
==
prev_op_name
)
{
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
auto
*
dep_var
=
new
DummyVarHandle
(
result
->
nodes
.
back
().
get
());
auto
*
dep_var
=
new
DummyVarHandle
(
result
->
CreateVarNode
(
"dummy"
));
prev_op
->
AddOutput
(
dep_var
);
result
->
Get
<
GraphDepVars
>
(
"dep_vars"
).
emplace
(
dep_var
);
op
->
AddInput
(
dep_var
);
...
...
@@ -540,22 +557,32 @@ void MultiDevSSAGraphBuilder::ConnectOp(Graph *result, OpHandleBase *op,
}
void
MultiDevSSAGraphBuilder
::
CreateDistTrainOp
(
Graph
*
result
,
const
OpDesc
&
op
)
const
{
ir
::
Node
*
node
)
const
{
int
op_dev_id
=
-
1
;
if
(
op
.
Type
()
==
"split_byref"
||
op
.
Type
()
==
"split_selected_rows"
)
{
op_dev_id
=
GetVarDeviceID
(
op
.
InputArgumentNames
()[
0
]);
std
::
vector
<
std
::
string
>
input_var_names
;
std
::
vector
<
std
::
string
>
output_var_names
;
for
(
ir
::
Node
*
input
:
node
->
inputs
)
{
input_var_names
.
push_back
(
input
->
Var
()
->
Name
());
}
for
(
ir
::
Node
*
output
:
node
->
outputs
)
{
output_var_names
.
push_back
(
output
->
Var
()
->
Name
());
}
if
(
node
->
Op
()
->
Type
()
==
"split_byref"
||
node
->
Op
()
->
Type
()
==
"split_selected_rows"
)
{
op_dev_id
=
GetVarDeviceID
(
input_var_names
[
0
]);
if
(
strategy_
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kAllReduce
)
{
op_dev_id
=
GetAppropriateDeviceID
(
op
.
InputArgumentNames
()
);
for
(
auto
&
varname
:
op
.
InputArgumentNames
()
)
{
op_dev_id
=
GetAppropriateDeviceID
(
input_var_names
);
for
(
auto
&
varname
:
input_var_names
)
{
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
}
for
(
auto
&
varname
:
o
p
.
OutputArgumentNames
()
)
{
for
(
auto
&
varname
:
o
utput_var_names
)
{
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
}
else
if
(
op
.
Type
()
==
"concat"
)
{
op_dev_id
=
GetVarDeviceID
(
op
.
InputArgumentNames
()
[
0
]);
for
(
auto
&
varname
:
o
p
.
OutputArgumentNames
()
)
{
}
else
if
(
node
->
Op
()
->
Type
()
==
"concat"
)
{
op_dev_id
=
GetVarDeviceID
(
input_var_names
[
0
]);
for
(
auto
&
varname
:
o
utput_var_names
)
{
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
}
else
{
...
...
@@ -565,35 +592,43 @@ void MultiDevSSAGraphBuilder::CreateDistTrainOp(Graph *result,
}
PADDLE_ENFORCE
(
op_dev_id
!=
-
1
,
"can not find right place for distributed op: %s"
,
op
.
Type
());
"can not find right place for distributed op: %s"
,
node
->
Op
()
->
Type
());
CreateComputationalOp
(
result
,
op
,
op_dev_id
);
if
(
op
.
Type
()
==
"concat"
)
{
CreateComputationalOp
(
result
,
node
,
op_dev_id
);
if
(
node
->
Op
()
->
Type
()
==
"concat"
)
{
ConnectOp
(
result
,
result
->
Get
<
GraphOps
>
(
"ops"
).
back
().
get
(),
"fetch_barrier"
);
}
}
// Create RPC related op handles that connects its in ops and out ops.
void
MultiDevSSAGraphBuilder
::
CreateRPCOp
(
Graph
*
result
,
const
OpDesc
&
op
)
const
{
void
MultiDevSSAGraphBuilder
::
CreateRPCOp
(
Graph
*
result
,
ir
::
Node
*
node
)
const
{
int
op_dev_id
=
-
1
;
if
(
op
.
Type
()
==
"send"
)
{
op_dev_id
=
GetVarDeviceID
(
op
.
InputArgumentNames
()[
0
]
);
if
(
node
->
Op
()
->
Type
()
==
"send"
)
{
op_dev_id
=
GetVarDeviceID
(
node
->
inputs
[
0
]
->
Var
()
->
Name
()
);
// the variable name which contains .block means it was splited by
// split_byref op
// so that we can balance the variable blocks to all the pserver
// instances.
if
(
strategy_
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kAllReduce
&&
op
.
InputArgumentNames
()[
0
].
find
(
".block"
)
==
std
::
string
::
npos
)
{
op_dev_id
=
GetAppropriateDeviceID
(
op
.
InputArgumentNames
());
for
(
auto
&
varname
:
op
.
InputArgumentNames
())
{
node
->
inputs
[
0
]
->
Var
()
->
Name
().
find
(
".block"
)
==
std
::
string
::
npos
)
{
std
::
vector
<
std
::
string
>
input_var_names
;
for
(
ir
::
Node
*
n
:
node
->
inputs
)
{
input_var_names
.
push_back
(
n
->
Var
()
->
Name
());
}
op_dev_id
=
GetAppropriateDeviceID
(
input_var_names
);
for
(
auto
&
varname
:
input_var_names
)
{
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
}
}
else
if
(
op
.
Type
()
==
"recv"
)
{
op_dev_id
=
GetAppropriateDeviceID
(
op
.
OutputArgumentNames
());
for
(
auto
&
varname
:
op
.
OutputArgumentNames
())
{
}
else
if
(
node
->
Op
()
->
Type
()
==
"recv"
)
{
std
::
vector
<
std
::
string
>
output_var_names
;
for
(
ir
::
Node
*
n
:
node
->
outputs
)
{
output_var_names
.
push_back
(
n
->
Var
()
->
Name
());
}
op_dev_id
=
GetAppropriateDeviceID
(
output_var_names
);
for
(
auto
&
varname
:
output_var_names
)
{
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
}
else
{
...
...
@@ -602,21 +637,20 @@ void MultiDevSSAGraphBuilder::CreateRPCOp(Graph *result,
}
PADDLE_ENFORCE
(
op_dev_id
!=
-
1
,
"can not find the right place for rpc op: %s"
,
op
.
Type
());
node
->
Op
()
->
Type
());
result
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kOperation
));
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
RPCOpHandle
(
result
->
nodes
.
back
().
get
(),
op
,
local_scopes_
[
op_dev_id
],
op
.
Type
(),
places_
[
op_dev_id
]));
result
->
Get
<
GraphOps
>
(
"ops"
).
emplace_back
(
new
RPCOpHandle
(
result
->
CreateOpNode
(
node
->
Op
()),
*
node
->
Op
(),
local_scopes_
[
op_dev_id
],
node
->
Op
()
->
Type
(),
places_
[
op_dev_id
]));
if
(
op
.
Type
()
==
"send_barrier"
)
{
if
(
node
->
Op
()
->
Type
()
==
"send_barrier"
)
{
ConnectOp
(
result
,
result
->
Get
<
GraphOps
>
(
"ops"
).
back
().
get
(),
"send"
);
}
else
if
(
op
.
Type
()
==
"recv"
)
{
}
else
if
(
node
->
Op
()
->
Type
()
==
"recv"
)
{
ConnectOp
(
result
,
result
->
Get
<
GraphOps
>
(
"ops"
).
back
().
get
(),
"send_barrier"
);
}
else
if
(
op
.
Type
()
==
"fetch_barrier"
)
{
}
else
if
(
node
->
Op
()
->
Type
()
==
"fetch_barrier"
)
{
ConnectOp
(
result
,
result
->
Get
<
GraphOps
>
(
"ops"
).
back
().
get
(),
"recv"
);
}
else
if
(
op
.
Type
()
==
"send"
)
{
}
else
if
(
node
->
Op
()
->
Type
()
==
"send"
)
{
// do nothing
}
else
{
PADDLE_THROW
(
...
...
@@ -624,12 +658,12 @@ void MultiDevSSAGraphBuilder::CreateRPCOp(Graph *result,
"send, send_barrier. recv, fetch_barrier]"
);
}
CreateOpHandleIOs
(
result
,
op
,
op_dev_id
);
CreateOpHandleIOs
(
result
,
node
,
op_dev_id
);
}
bool
MultiDevSSAGraphBuilder
::
IsScaleLossOp
(
const
OpDesc
&
op
)
const
{
bool
MultiDevSSAGraphBuilder
::
IsScaleLossOp
(
ir
::
Node
*
node
)
const
{
return
boost
::
get
<
int
>
(
op
.
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
==
node
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
==
(
static_cast
<
int
>
(
OpRole
::
kBackward
)
|
static_cast
<
int
>
(
OpRole
::
kLoss
))
&&
!
loss_var_name_
.
empty
();
// If loss_var is empty. This is test mode
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.h
浏览文件 @
2fa8df1c
...
...
@@ -46,13 +46,11 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
BuildStrategy
&
strategy
);
#endif
std
::
unique_ptr
<
Graph
>
Build
(
std
::
unique_ptr
<
Graph
>
graph
)
const
override
;
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
override
;
int
GetVarDeviceID
(
const
std
::
string
&
varname
)
const
override
;
private:
void
CreateOpHandleIOs
(
Graph
*
result
,
const
OpDesc
&
op
,
size_t
device_id
)
const
;
void
CreateOpHandleIOs
(
Graph
*
result
,
ir
::
Node
*
node
,
size_t
device_id
)
const
;
private:
std
::
string
loss_var_name_
;
...
...
@@ -64,40 +62,39 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
bool
IsScaleLossOp
(
const
OpDesc
&
op
)
const
;
bool
IsScaleLossOp
(
ir
::
Node
*
node
)
const
;
void
CreateRPCOp
(
Graph
*
result
,
const
OpDesc
&
op
)
const
;
void
CreateDistTrainOp
(
Graph
*
result
,
const
OpDesc
&
op
)
const
;
void
CreateRPCOp
(
Graph
*
result
,
ir
::
Node
*
node
)
const
;
void
CreateDistTrainOp
(
Graph
*
result
,
ir
::
Node
*
node
)
const
;
/**
* Is this operator as the end-point operator before/after send operator.
*/
bool
IsDistTrainOp
(
const
OpDesc
&
op
,
const
std
::
vector
<
std
::
string
>
&
send_vars
,
bool
IsDistTrainOp
(
ir
::
Node
*
node
,
const
std
::
vector
<
std
::
string
>
&
send_vars
,
const
std
::
vector
<
std
::
string
>
&
recv_vars
)
const
;
std
::
vector
<
std
::
string
>
FindDistTrainSendVars
(
const
ProgramDesc
&
program
)
const
;
const
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
&
nodes
)
const
;
std
::
vector
<
std
::
string
>
FindDistTrainRecvVars
(
const
ProgramDesc
&
program
)
const
;
const
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
&
nodes
)
const
;
void
ConnectOp
(
Graph
*
result
,
OpHandleBase
*
op
,
const
std
::
string
&
prev_op_name
)
const
;
void
CreateComputationalOps
(
Graph
*
result
,
const
OpDesc
&
op
,
void
CreateComputationalOps
(
Graph
*
result
,
ir
::
Node
*
node
,
size_t
num_places
)
const
;
void
CreateScaleLossGradOp
(
Graph
*
result
)
const
;
VarHandle
*
CreateReduceOp
(
Graph
*
result
,
const
std
::
string
&
og
,
int
dst_dev_id
)
const
;
void
CreateComputationalOp
(
Graph
*
result
,
const
OpDesc
&
op
,
int
dev_id
)
const
;
void
CreateComputationalOp
(
Graph
*
result
,
ir
::
Node
*
node
,
int
dev_id
)
const
;
bool
IsParameterGradientOnce
(
const
std
::
string
&
og
,
std
::
unordered_set
<
std
::
string
>
*
og_has_been_broadcast
)
const
;
int
GetOpDeviceID
(
const
OpDesc
&
op
)
const
;
int
GetOpDeviceID
(
ir
::
Node
*
node
)
const
;
void
InsertAllReduceOp
(
Graph
*
result
,
const
std
::
string
&
og
)
const
;
...
...
paddle/fluid/framework/details/reduce_op_handle_test.cc
浏览文件 @
2fa8df1c
...
...
@@ -97,7 +97,7 @@ struct TestReduceOpHandle {
}
param_scopes_
[
out_scope_idx
]
->
Var
(
"out"
);
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kOperation
));
nodes
.
emplace_back
(
new
ir
::
Node
());
if
(
use_gpu_
)
{
#ifdef PADDLE_WITH_CUDA
op_handle_
.
reset
(
new
ReduceOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
...
...
@@ -121,7 +121,7 @@ struct TestReduceOpHandle {
if
(
!
use_gpu_
)
{
op_handle_
->
SetDeviceContext
(
gpu_list_
[
j
],
ctxs_
[
j
].
get
());
}
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
nodes
.
emplace_back
(
new
ir
::
Node
());
auto
*
in_var_handle
=
new
VarHandle
(
nodes
.
back
().
get
(),
1
,
j
,
"input"
,
gpu_list_
[
j
]);
in_var_handle
->
ClearGeneratedOp
();
...
...
@@ -137,7 +137,7 @@ struct TestReduceOpHandle {
op_handle_
->
AddInput
(
in_dummy_var_handle
);
// add output
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
nodes
.
emplace_back
(
new
ir
::
Node
());
auto
*
out_var_handle
=
new
VarHandle
(
nodes
.
back
().
get
(),
2
,
out_scope_idx
,
"out"
,
gpu_list_
[
out_scope_idx
]);
vars_
.
emplace_back
(
out_var_handle
);
...
...
paddle/fluid/framework/details/ssa_graph_builder.cc
浏览文件 @
2fa8df1c
...
...
@@ -37,8 +37,7 @@ void SSAGraphBuilder::PolishGraphToSupportDataHazards(Graph *graph) {
continue
;
}
graph
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
auto
*
dep_var
=
new
DummyVarHandle
(
graph
->
nodes
.
back
().
get
());
auto
*
dep_var
=
new
DummyVarHandle
(
graph
->
CreateVarNode
(
"dummy"
));
read_op
->
AddOutput
(
dep_var
);
write_op
->
AddInput
(
dep_var
);
graph
->
Get
<
GraphDepVars
>
(
"dep_vars"
).
emplace
(
dep_var
);
...
...
@@ -49,15 +48,14 @@ void SSAGraphBuilder::PolishGraphToSupportDataHazards(Graph *graph) {
}
VarHandle
*
SSAGraphBuilder
::
CreateOrGetLatestVarHandle
(
Graph
*
graph
,
const
std
::
string
&
each_var_nam
e
,
const
platform
::
Place
&
place
,
size_t
place_offset
)
{
Graph
*
graph
,
ir
::
Node
*
node
,
const
platform
::
Place
&
plac
e
,
size_t
place_offset
)
{
auto
&
var_holders
=
graph
->
Get
<
GraphVars
>
(
"vars"
)[
place_offset
];
auto
&
var_holder
=
var_holders
[
each_var_name
];
auto
&
var_holder
=
var_holders
[
node
->
Var
()
->
Name
()
];
VarHandle
*
var
=
nullptr
;
if
(
var_holder
.
empty
())
{
graph
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
var
=
new
VarHandle
(
graph
->
nodes
.
back
().
get
(),
0
,
place_offset
,
each_var_name
,
place
);
var
=
new
VarHandle
(
graph
->
CreateVarNode
(
node
->
Var
()),
0
,
place_offset
,
node
->
Var
()
->
Name
(),
place
);
var_holder
.
emplace_back
(
var
);
}
else
{
var
=
var_holder
.
rbegin
()
->
get
();
...
...
@@ -66,14 +64,13 @@ VarHandle *SSAGraphBuilder::CreateOrGetLatestVarHandle(
}
void
SSAGraphBuilder
::
CreateOpOutput
(
Graph
*
graph
,
OpHandleBase
*
op_handle
,
const
std
::
string
&
each_var_nam
e
,
ir
::
Node
*
nod
e
,
const
platform
::
Place
&
place
,
size_t
place_offset
)
{
auto
&
vars
=
graph
->
Get
<
GraphVars
>
(
"vars"
)[
place_offset
][
each_var_name
];
auto
&
vars
=
graph
->
Get
<
GraphVars
>
(
"vars"
)[
place_offset
][
node
->
Var
()
->
Name
()
];
size_t
version
=
vars
.
size
();
graph
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
auto
var
=
new
VarHandle
(
graph
->
nodes
.
back
().
get
(),
version
,
place_offset
,
each_var_name
,
place
);
auto
var
=
new
VarHandle
(
graph
->
CreateVarNode
(
node
->
Var
()),
version
,
place_offset
,
node
->
Var
()
->
Name
(),
place
);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
}
...
...
@@ -85,8 +82,7 @@ void SSAGraphBuilder::AddOutputToLeafOps(Graph *graph) {
if
(
!
op
->
Outputs
().
empty
())
{
continue
;
}
graph
->
nodes
.
emplace_back
(
new
ir
::
Node
(
ir
::
Node
::
Type
::
kVariable
));
auto
*
dummy_leaf
=
new
DummyVarHandle
(
graph
->
nodes
.
back
().
get
());
auto
*
dummy_leaf
=
new
DummyVarHandle
(
graph
->
CreateVarNode
(
"dummy"
));
graph
->
Get
<
GraphDepVars
>
(
"dep_vars"
).
emplace
(
dummy_leaf
);
op
->
AddOutput
(
dummy_leaf
);
}
...
...
paddle/fluid/framework/details/ssa_graph_builder.h
浏览文件 @
2fa8df1c
...
...
@@ -23,6 +23,7 @@
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/pass.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -34,11 +35,11 @@ typedef std::vector<
typedef
std
::
unordered_set
<
std
::
unique_ptr
<
VarHandleBase
>>
GraphDepVars
;
typedef
std
::
vector
<
std
::
unique_ptr
<
OpHandleBase
>>
GraphOps
;
class
SSAGraphBuilder
{
class
SSAGraphBuilder
:
public
ir
::
Pass
{
public:
SSAGraphBuilder
()
{}
virtual
~
SSAGraphBuilder
()
{}
virtual
std
::
unique_ptr
<
Graph
>
Build
(
std
::
unique_ptr
<
Graph
>
graph
)
const
=
0
;
virtual
int
GetVarDeviceID
(
const
std
::
string
&
var_name
)
const
=
0
;
DISABLE_COPY_AND_ASSIGN
(
SSAGraphBuilder
);
...
...
@@ -53,16 +54,15 @@ class SSAGraphBuilder {
*/
static
void
PolishGraphToSupportDataHazards
(
Graph
*
graph
);
static
VarHandle
*
CreateOrGetLatestVarHandle
(
Graph
*
graph
,
const
std
::
string
&
each_var_name
,
static
VarHandle
*
CreateOrGetLatestVarHandle
(
Graph
*
graph
,
ir
::
Node
*
node
,
const
platform
::
Place
&
place
,
size_t
place_offset
);
// Add an output variable (each_var_name, place, place_offset) to op_handle,
// which belongs to graph
static
void
CreateOpOutput
(
Graph
*
graph
,
OpHandleBase
*
op_handle
,
const
std
::
string
&
each_var_nam
e
,
const
platform
::
Place
&
place
,
size_t
place_offset
);
ir
::
Node
*
node
,
const
platform
::
Place
&
plac
e
,
size_t
place_offset
);
static
void
AddOutputToLeafOps
(
Graph
*
graph
);
};
...
...
paddle/fluid/framework/details/ssa_graph_checker.h
浏览文件 @
2fa8df1c
...
...
@@ -28,10 +28,10 @@ class SSAGraghBuilderWithChecker : public SSAGraphBuilder {
std
::
unique_ptr
<
SSAGraphBuilder
>&&
builder
)
:
builder_
(
std
::
move
(
builder
))
{}
std
::
unique_ptr
<
Graph
>
Build
(
std
::
unique_ptr
<
Graph
>
graph
)
const
override
{
auto
new_graph
=
builder_
->
Build
(
std
::
move
(
graph
));
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
override
{
auto
new_graph
=
builder_
->
Apply
(
std
::
move
(
graph
));
PADDLE_ENFORCE
(
IsValidGraph
(
new_graph
.
get
()));
return
new_graph
;
return
std
::
move
(
new_graph
)
;
}
int
GetVarDeviceID
(
const
std
::
string
&
var_name
)
const
override
{
...
...
paddle/fluid/framework/details/ssa_graph_printer.h
浏览文件 @
2fa8df1c
...
...
@@ -50,10 +50,10 @@ class SSAGraghBuilderWithPrinter : public SSAGraphBuilder {
stream_ptr_
(
std
::
move
(
sout
)),
stream_ref_
(
*
stream_ptr_
)
{}
std
::
unique_ptr
<
Graph
>
Build
(
std
::
unique_ptr
<
Graph
>
graph
)
const
override
{
auto
new_graph
=
builder_
->
Build
(
std
::
move
(
graph
));
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
override
{
auto
new_graph
=
builder_
->
Apply
(
std
::
move
(
graph
));
printer_
->
Print
(
*
new_graph
,
stream_ref_
);
return
new_graph
;
return
std
::
move
(
new_graph
)
;
}
int
GetVarDeviceID
(
const
std
::
string
&
var_name
)
const
override
{
...
...
paddle/fluid/framework/ir/graph.cc
浏览文件 @
2fa8df1c
...
...
@@ -13,12 +13,45 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/var_desc.h"
namespace
paddle
{
namespace
framework
{
std
::
unique_ptr
<
Graph
>
ProgramToGraph
(
const
ProgramDesc
&
program
)
{
std
::
unique_ptr
<
Graph
>
graph
(
new
Graph
(
program
));
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars
;
for
(
auto
*
var
:
program
.
Block
(
0
).
AllVars
())
{
all_vars
.
emplace
(
var
->
Name
(),
var
);
}
for
(
auto
*
op
:
program
.
Block
(
0
).
AllOps
())
{
ir
::
Node
*
node
=
graph
->
CreateOpNode
(
op
);
for
(
auto
&
each_var_name
:
op
->
InputArgumentNames
())
{
ir
::
Node
*
var
=
nullptr
;
if
(
all_vars
.
count
(
each_var_name
)
!=
0
)
{
var
=
graph
->
CreateVarNode
(
all_vars
.
at
(
each_var_name
));
}
else
{
var
=
graph
->
CreateVarNode
(
each_var_name
);
}
node
->
inputs
.
push_back
(
var
);
var
->
outputs
.
push_back
(
node
);
}
for
(
auto
&
each_var_name
:
op
->
OutputArgumentNames
())
{
ir
::
Node
*
var
=
nullptr
;
if
(
all_vars
.
count
(
each_var_name
)
!=
0
)
{
var
=
graph
->
CreateVarNode
(
all_vars
.
at
(
each_var_name
));
}
else
{
var
=
graph
->
CreateVarNode
(
each_var_name
);
}
node
->
outputs
.
push_back
(
var
);
var
->
inputs
.
push_back
(
node
);
}
}
return
std
::
move
(
graph
);
}
...
...
paddle/fluid/framework/ir/graph.h
浏览文件 @
2fa8df1c
...
...
@@ -39,8 +39,6 @@ class Graph {
attr_dels_
.
clear
();
}
const
ProgramDesc
&
Program
()
const
{
return
program_
;
}
template
<
typename
AttrType
>
AttrType
&
Get
(
const
std
::
string
&
attr_name
)
const
{
return
*
boost
::
any_cast
<
AttrType
*>
(
attrs_
.
at
(
attr_name
));
...
...
@@ -63,11 +61,30 @@ class Graph {
return
attr
;
}
ir
::
Node
*
CreateVarNode
(
VarDesc
*
var_desc
)
{
nodes
.
emplace_back
(
new
ir
::
Node
(
var_desc
));
return
nodes
.
back
().
get
();
}
ir
::
Node
*
CreateOpNode
(
OpDesc
*
op_desc
)
{
nodes
.
emplace_back
(
new
ir
::
Node
(
op_desc
));
return
nodes
.
back
().
get
();
}
// TODO(panyx0718): Need to handle CreateOpNode(nullptr).
ir
::
Node
*
CreateVarNode
(
const
std
::
string
&
var_name
)
{
var_descs_
.
emplace_back
(
new
VarDesc
(
var_name
));
nodes
.
emplace_back
(
new
ir
::
Node
(
var_descs_
.
back
().
get
()));
return
nodes
.
back
().
get
();
}
std
::
vector
<
ir
::
Node
*>
inputs
;
std
::
vector
<
ir
::
Node
*>
outputs
;
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
nodes
;
std
::
vector
<
std
::
unique_ptr
<
VarDesc
>>
var_descs_
;
private:
// NOTE: program_ shouldn't be exposed to user.
const
ProgramDesc
&
program_
;
std
::
map
<
std
::
string
,
boost
::
any
>
attrs_
;
std
::
map
<
std
::
string
,
std
::
function
<
void
(
void
)
>>
attr_dels_
;
...
...
paddle/fluid/framework/ir/node.h
浏览文件 @
2fa8df1c
...
...
@@ -21,6 +21,8 @@ limitations under the License. */
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/variant.h"
...
...
@@ -32,10 +34,12 @@ class Node {
public:
enum
class
Type
{
kNone
=
-
1
,
kOperation
,
kVariable
};
Node
()
:
type_
(
Type
::
kNone
)
{}
explicit
Node
(
Type
type
)
:
type_
(
type
)
{}
virtual
~
Node
()
{
for
(
auto
&
attr
:
attrs_
)
{
for
(
auto
&
attr
:
attrs_
)
{
if
(
attr_dels_
.
find
(
attr
.
first
)
!=
attr_dels_
.
end
())
{
attr_dels_
[
attr
.
first
]();
}
...
...
@@ -47,23 +51,34 @@ class Node {
Type
NodeType
()
const
{
return
type_
;
}
template
<
typename
AttrType
>
void
Set
(
const
std
::
string
&
name
,
AttrType
attr
)
{
void
Set
(
const
std
::
string
&
name
,
AttrType
attr
)
{
attrs_
[
name
]
=
attr
;
}
template
<
typename
AttrType
>
void
Set
(
const
std
::
string
&
name
,
AttrType
*
attr
,
void
Set
(
const
std
::
string
&
name
,
AttrType
*
attr
,
std
::
function
<
void
(
void
)
>
attr_del
)
{
attrs_
[
name
]
=
attr
;
attr_dels_
[
name
]
=
attr_del
;
}
std
::
vector
<
Node
*>
inputs
;
std
::
vector
<
Node
*>
outputs
;
VarDesc
*
Var
()
{
return
var_desc_
;
}
OpDesc
*
Op
()
{
return
op_desc_
;
}
explicit
Node
(
VarDesc
*
var_desc
)
:
var_desc_
(
var_desc
),
op_desc_
(
nullptr
),
type_
(
Type
::
kVariable
)
{}
explicit
Node
(
OpDesc
*
op_desc
)
:
var_desc_
(
nullptr
),
op_desc_
(
op_desc
),
type_
(
Type
::
kOperation
)
{}
std
::
vector
<
Node
*>
inputs
;
std
::
vector
<
Node
*>
outputs
;
protected:
std
::
map
<
std
::
string
,
boost
::
any
>
attrs_
;
std
::
map
<
std
::
string
,
std
::
function
<
void
(
void
)
>>
attr_dels_
;
VarDesc
*
var_desc_
;
OpDesc
*
op_desc_
;
Type
type_
;
private:
...
...
paddle/fluid/framework/ir/pass.h
浏览文件 @
2fa8df1c
...
...
@@ -20,15 +20,15 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
Pass
{
public:
Pass
()
=
default
;
virtual
~
Pass
()
{}
virtual
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
{
return
std
::
move
(
graph
);
}
};
virtual
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
=
0
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
2fa8df1c
...
...
@@ -131,13 +131,10 @@ ParallelExecutor::ParallelExecutor(
PADDLE_THROW
(
"Not compiled with CUDA."
);
#endif
}
builder_
=
builder_factory
.
Create
();
std
::
unique_ptr
<
Graph
>
graph
=
builder_
->
Build
(
ProgramToGraph
(
main_program
));
std
::
unique_ptr
<
Graph
>
graph
=
builder_
->
Apply
(
ProgramToGraph
(
main_program
));
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
places
,
std
::
move
(
graph
)));
member_
->
executor_
.
reset
(
new
details
::
ScopeBufferedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
std
::
move
(
var_infos
),
member_
->
places_
,
std
::
move
(
member_
->
executor_
)));
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
2fa8df1c
...
...
@@ -148,6 +148,7 @@ class ParallelExecutor(object):
lambda
var
:
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
,
main
.
list_vars
())
]
sys
.
stderr
.
write
(
'!!!!!!!!before
\n
'
)
self
.
executor
=
core
.
ParallelExecutor
(
self
.
_places
,
...
...
@@ -158,6 +159,7 @@ class ParallelExecutor(object):
set
(
self
.
persistable_vars
),
main
.
desc
,
loss_name
if
loss_name
else
''
,
scope
,
local_scopes
,
exec_strategy
,
build_strategy
,
num_trainers
,
trainer_id
)
sys
.
stderr
.
write
(
'!!!!!!!!after
\n
'
)
self
.
scope
=
scope
def
run
(
self
,
fetch_list
,
feed
=
None
,
feed_dict
=
None
,
return_numpy
=
True
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录