Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d0342f12
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d0342f12
编写于
4月 16, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Simplify DelayOps Logic
上级
494c262a
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
18 addition
and
30 deletion
+18
-30
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
...le/fluid/framework/details/threaded_ssa_graph_executor.cc
+15
-28
python/paddle/fluid/tests/unittests/test_parallel_executor.py
...on/paddle/fluid/tests/unittests/test_parallel_executor.py
+3
-2
未找到文件。
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
浏览文件 @
d0342f12
...
@@ -51,8 +51,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
...
@@ -51,8 +51,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
// together since we currently cannot overlap computation and memcpy streams.
// together since we currently cannot overlap computation and memcpy streams.
// Should revisit it if overlapping is available.
// Should revisit it if overlapping is available.
std
::
unordered_set
<
OpHandleBase
*>
delayed_ops
;
std
::
unordered_set
<
OpHandleBase
*>
delayed_ops
;
std
::
unordered_set
<
OpHandleBase
*>
blocked_by_delayed_ops
;
std
::
unordered_set
<
VarHandleBase
*>
delayed_vars
;
auto
InsertPendingVar
=
[
&
pending_vars
,
&
ready_vars
](
VarHandleBase
&
var
)
{
auto
InsertPendingVar
=
[
&
pending_vars
,
&
ready_vars
](
VarHandleBase
&
var
)
{
pending_vars
.
insert
(
&
var
);
pending_vars
.
insert
(
&
var
);
...
@@ -122,24 +120,26 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
...
@@ -122,24 +120,26 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
InsertPendingOp
(
*
op
);
InsertPendingOp
(
*
op
);
}
}
auto
run_all_ready_ops
=
[
&
]
{
auto
run_all_ops
=
[
&
](
std
::
unordered_set
<
OpHandleBase
*>
&
set
)
{
for
(
auto
*
op
:
ready_ops
)
{
for
(
auto
*
op
:
set
)
{
if
(
op
->
IsMultiDeviceTransfer
()
&&
allow_op_delay_
)
{
delayed_ops
.
insert
(
op
);
delayed_vars
.
insert
(
op
->
outputs_
.
begin
(),
op
->
outputs_
.
end
());
ready_vars
.
Extend
(
op
->
outputs_
);
continue
;
}
running_ops_
++
;
running_ops_
++
;
RunOp
(
&
ready_vars
,
op
);
RunOp
(
&
ready_vars
,
op
);
}
}
ready_ops
.
clear
();
set
.
clear
();
};
};
// Step 3. Execution
// Step 3. Execution
while
(
!
pending_vars
.
empty
()
||
!
ready_ops
.
empty
()
||
!
delayed_ops
.
empty
()
)
{
while
(
!
pending_vars
.
empty
())
{
// 1. Run All Ready ops
// 1. Run All Ready ops
run_all_ready_ops
();
// Keep loop until all vars are ready.
//
// NOTE: DelayedOps have a lower priority. It will be scheduled after all
// ready_ops have been performed.
if
(
ready_ops
.
empty
()
&&
allow_op_delay_
)
{
run_all_ops
(
delayed_ops
);
}
else
{
run_all_ops
(
ready_ops
);
}
// 2. Find ready variable
// 2. Find ready variable
bool
timeout
;
bool
timeout
;
...
@@ -160,29 +160,16 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
...
@@ -160,29 +160,16 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
auto
&
deps
=
pending_ops
[
op
];
auto
&
deps
=
pending_ops
[
op
];
--
deps
;
--
deps
;
if
(
deps
==
0
)
{
if
(
deps
==
0
)
{
if
(
delayed_vars
.
find
(
ready_var
)
!=
delayed_vars
.
end
()
)
{
if
(
op
->
IsMultiDeviceTransfer
()
&&
allow_op_delay_
)
{
blocked_by_
delayed_ops
.
insert
(
op
);
delayed_ops
.
insert
(
op
);
}
else
{
}
else
{
ready_ops
.
insert
(
op
);
ready_ops
.
insert
(
op
);
}
}
}
}
}
}
}
}
// When there are no other ops to schedule, schedule buffered delayed
// ops and unblock other ops.
if
(
ready_ops
.
empty
()
&&
!
delayed_ops
.
empty
()
&&
running_ops_
==
0
)
{
RunDelayedOps
(
delayed_ops
);
delayed_ops
.
clear
();
for
(
auto
*
op
:
blocked_by_delayed_ops
)
{
ready_ops
.
insert
(
op
);
}
blocked_by_delayed_ops
.
clear
();
}
// Keep loop until all vars are ready.
}
}
PADDLE_ENFORCE
(
ready_ops
.
empty
());
PADDLE_ENFORCE
(
ready_ops
.
empty
());
PADDLE_ENFORCE
(
delayed_ops
.
empty
());
PADDLE_ENFORCE
(
blocked_by_delayed_ops
.
empty
());
// Wait FetchOps.
// Wait FetchOps.
if
(
!
fetch_ops
.
empty
())
{
if
(
!
fetch_ops
.
empty
())
{
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor.py
浏览文件 @
d0342f12
...
@@ -206,18 +206,19 @@ class TestParallelExecutorBase(unittest.TestCase):
...
@@ -206,18 +206,19 @@ class TestParallelExecutorBase(unittest.TestCase):
feed_dict
=
{}):
feed_dict
=
{}):
main
=
fluid
.
Program
()
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
startup
.
random_seed
=
1
# Fix random seed
with
fluid
.
program_guard
(
main
,
startup
):
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
method
(
use_feed
=
len
(
feed_dict
)
>
0
)
loss
=
method
(
use_feed
=
len
(
feed_dict
)
>
0
)
adam
=
fluid
.
optimizer
.
Adam
()
adam
=
fluid
.
optimizer
.
Adam
()
adam
.
minimize
(
loss
)
adam
.
minimize
(
loss
)
if
memory_opt
:
if
memory_opt
:
fluid
.
memory_optimize
(
main
)
fluid
.
memory_optimize
(
main
)
place
=
fluid
.
CUDAPlace
(
0
)
place
=
fluid
.
CUDAPlace
(
0
)
startup_exe
=
fluid
.
Executor
(
place
)
startup_exe
=
fluid
.
Executor
(
place
)
startup_exe
.
run
(
startup
)
startup_exe
.
run
(
startup
)
exe
=
fluid
.
ParallelExecutor
(
True
,
loss_name
=
loss
.
name
)
exe
=
fluid
.
ParallelExecutor
(
True
,
loss_name
=
loss
.
name
,
allow_op_delay
=
allow_op_delay
)
if
batch_size
is
not
None
:
if
batch_size
is
not
None
:
batch_size
*=
fluid
.
core
.
get_cuda_device_count
()
batch_size
*=
fluid
.
core
.
get_cuda_device_count
()
begin
=
time
.
time
()
begin
=
time
.
time
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录