Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
7643c2cb
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7643c2cb
编写于
3月 20, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add flag for use event
上级
ca4b3d25
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
17 addition
and
13 deletion
+17
-13
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+16
-13
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+1
-0
未找到文件。
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
7643c2cb
...
@@ -86,8 +86,8 @@ struct OpHandle {
...
@@ -86,8 +86,8 @@ struct OpHandle {
virtual
~
OpHandle
()
{}
virtual
~
OpHandle
()
{}
void
Run
()
{
void
Run
(
bool
use_event
)
{
if
(
events_
.
empty
())
{
if
(
events_
.
empty
()
&&
use_event
)
{
for
(
auto
&
p
:
dev_ctx_
)
{
for
(
auto
&
p
:
dev_ctx_
)
{
int
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
p
.
first
).
device
;
int
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
p
.
first
).
device
;
cudaSetDevice
(
dev_id
);
cudaSetDevice
(
dev_id
);
...
@@ -97,6 +97,7 @@ struct OpHandle {
...
@@ -97,6 +97,7 @@ struct OpHandle {
RunImpl
();
RunImpl
();
if
(
use_event
)
{
for
(
auto
&
p
:
dev_ctx_
)
{
for
(
auto
&
p
:
dev_ctx_
)
{
int
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
p
.
first
).
device
;
int
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
p
.
first
).
device
;
auto
stream
=
auto
stream
=
...
@@ -104,9 +105,10 @@ struct OpHandle {
...
@@ -104,9 +105,10 @@ struct OpHandle {
cudaEventRecord
(
events_
.
at
(
dev_id
),
stream
);
cudaEventRecord
(
events_
.
at
(
dev_id
),
stream
);
}
}
}
}
}
virtual
void
Wait
(
platform
::
DeviceContext
*
waited_dev
)
{
virtual
void
Wait
(
platform
::
DeviceContext
*
waited_dev
)
{
if
(
platform
::
is_cpu_place
(
waited_dev
->
GetPlace
()))
{
if
(
platform
::
is_cpu_place
(
waited_dev
->
GetPlace
())
&&
events_
.
empty
()
)
{
for
(
auto
&
dev_ctx
:
dev_ctx_
)
{
for
(
auto
&
dev_ctx
:
dev_ctx_
)
{
dev_ctx
.
second
->
Wait
();
dev_ctx
.
second
->
Wait
();
}
}
...
@@ -677,7 +679,7 @@ void ParallelExecutor::BuildNCCLCommunicator() const {
...
@@ -677,7 +679,7 @@ void ParallelExecutor::BuildNCCLCommunicator() const {
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
)
{
const
std
::
string
&
fetched_var_name
)
{
VLOG
(
3
)
<<
"Run iter"
;
bool
use_event
=
false
;
auto
fetched_data
=
std
::
make_shared
<
FetchedData
>
(
fetch_tensors
.
size
());
auto
fetched_data
=
std
::
make_shared
<
FetchedData
>
(
fetch_tensors
.
size
());
// Version --> VarHandle
// Version --> VarHandle
member_
->
exception_
.
reset
();
member_
->
exception_
.
reset
();
...
@@ -748,7 +750,7 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
...
@@ -748,7 +750,7 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
}
}
for
(
auto
*
op
:
to_run
)
{
for
(
auto
*
op
:
to_run
)
{
RunOp
(
pending_vars
,
op
);
RunOp
(
use_event
,
pending_vars
,
op
);
}
}
while
(
!
pending_vars
.
empty
())
{
while
(
!
pending_vars
.
empty
())
{
...
@@ -776,7 +778,7 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
...
@@ -776,7 +778,7 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
}
}
for
(
auto
*
op
:
to_run
)
{
for
(
auto
*
op
:
to_run
)
{
pending_ops
.
erase
(
op
);
pending_ops
.
erase
(
op
);
RunOp
(
pending_vars
,
op
);
RunOp
(
use_event
,
pending_vars
,
op
);
}
}
}
}
...
@@ -790,6 +792,7 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
...
@@ -790,6 +792,7 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
}
}
void
ParallelExecutor
::
RunOp
(
void
ParallelExecutor
::
RunOp
(
bool
use_event
,
std
::
unordered_map
<
VarHandleBase
*
,
std
::
atomic
<
bool
>>
&
pending_vars
,
std
::
unordered_map
<
VarHandleBase
*
,
std
::
atomic
<
bool
>>
&
pending_vars
,
OpHandle
*
op
)
const
{
OpHandle
*
op
)
const
{
std
::
vector
<
std
::
atomic
<
bool
>
*>
*
ready_buffer
=
std
::
vector
<
std
::
atomic
<
bool
>
*>
*
ready_buffer
=
...
@@ -798,10 +801,10 @@ void ParallelExecutor::RunOp(
...
@@ -798,10 +801,10 @@ void ParallelExecutor::RunOp(
ready_buffer
->
emplace_back
(
&
pending_vars
[
var
]);
ready_buffer
->
emplace_back
(
&
pending_vars
[
var
]);
}
}
auto
op_run
=
[
ready_buffer
,
op
,
this
]
{
auto
op_run
=
[
ready_buffer
,
op
,
this
,
use_event
]
{
try
{
try
{
VLOG
(
10
)
<<
op
->
DebugString
();
VLOG
(
10
)
<<
op
->
DebugString
();
op
->
Run
();
op
->
Run
(
use_event
);
for
(
auto
*
ready
:
*
ready_buffer
)
{
for
(
auto
*
ready
:
*
ready_buffer
)
{
ready
->
store
(
true
,
std
::
memory_order_release
);
ready
->
store
(
true
,
std
::
memory_order_release
);
}
}
...
...
paddle/fluid/framework/parallel_executor.h
浏览文件 @
7643c2cb
...
@@ -62,6 +62,7 @@ class ParallelExecutor {
...
@@ -62,6 +62,7 @@ class ParallelExecutor {
void
BuildNCCLCommunicator
()
const
;
void
BuildNCCLCommunicator
()
const
;
void
RunOp
(
void
RunOp
(
bool
use_event
,
std
::
unordered_map
<
VarHandleBase
*
,
std
::
atomic
<
bool
>>&
pending_vars
,
std
::
unordered_map
<
VarHandleBase
*
,
std
::
atomic
<
bool
>>&
pending_vars
,
OpHandle
*
op
)
const
;
OpHandle
*
op
)
const
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录