Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
74ca89ef
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
74ca89ef
编写于
11月 29, 2021
作者:
Y
Yuang Liu
提交者:
GitHub
11月 29, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[fleet_executor] Hold the carrier while running for one micro step. (#37605)
上级
27a5f52b
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
42 addition
and
20 deletion
+42
-20
paddle/fluid/distributed/fleet_executor/carrier.cc
paddle/fluid/distributed/fleet_executor/carrier.cc
+4
-0
paddle/fluid/distributed/fleet_executor/carrier.h
paddle/fluid/distributed/fleet_executor/carrier.h
+6
-0
paddle/fluid/distributed/fleet_executor/compute_interceptor.cc
...e/fluid/distributed/fleet_executor/compute_interceptor.cc
+6
-5
paddle/fluid/distributed/fleet_executor/compute_interceptor.h
...le/fluid/distributed/fleet_executor/compute_interceptor.h
+0
-1
paddle/fluid/distributed/fleet_executor/interceptor.cc
paddle/fluid/distributed/fleet_executor/interceptor.cc
+12
-8
paddle/fluid/distributed/fleet_executor/interceptor.h
paddle/fluid/distributed/fleet_executor/interceptor.h
+1
-2
paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc
...stributed/fleet_executor/test/compute_interceptor_test.cc
+4
-0
paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_test.cc
...ributed/fleet_executor/test/interceptor_ping_pong_test.cc
+4
-0
paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_with_brpc_test.cc
...eet_executor/test/interceptor_ping_pong_with_brpc_test.cc
+4
-0
python/paddle/fluid/tests/unittests/test_fleet_executor.py
python/paddle/fluid/tests/unittests/test_fleet_executor.py
+1
-4
未找到文件。
paddle/fluid/distributed/fleet_executor/carrier.cc
浏览文件 @
74ca89ef
...
...
@@ -96,8 +96,12 @@ void Carrier::Start() {
"Message bus has not been initialized."
));
message_bus_instance
.
Send
(
tmp_msg
);
}
std
::
unique_lock
<
std
::
mutex
>
lock
(
running_mutex_
);
cond_var_
.
wait
(
lock
);
}
std
::
condition_variable
&
Carrier
::
GetCondVar
()
{
return
cond_var_
;
}
bool
Carrier
::
IsInit
()
const
{
return
is_init_
;
}
Interceptor
*
Carrier
::
SetInterceptor
(
int64_t
interceptor_id
,
...
...
paddle/fluid/distributed/fleet_executor/carrier.h
浏览文件 @
74ca89ef
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <condition_variable>
#include <memory>
#include <mutex>
#include <string>
...
...
@@ -57,6 +58,8 @@ class Carrier final {
void
SetCreatingFlag
(
bool
flag
);
std
::
condition_variable
&
GetCondVar
();
void
Start
();
bool
IsInit
()
const
;
...
...
@@ -83,6 +86,9 @@ class Carrier final {
bool
creating_interceptors_
{
true
};
std
::
mutex
creating_flag_mutex_
;
bool
is_init_
{
false
};
std
::
mutex
running_mutex_
;
std
::
condition_variable
cond_var_
;
};
}
// namespace distributed
...
...
paddle/fluid/distributed/fleet_executor/compute_interceptor.cc
浏览文件 @
74ca89ef
...
...
@@ -221,12 +221,11 @@ void ComputeInterceptor::TryStop() {
Send
(
down_id
,
stop
);
}
stop_
=
true
;
}
void
ComputeInterceptor
::
HandleStop
(
const
InterceptorMessage
&
msg
)
{
ReceivedStop
(
msg
.
src_id
());
TryStop
();
if
(
out_buffs_
.
size
()
==
0
)
{
// TODO(fleet executor dev) need a better place to notify
StopCarrier
();
}
}
void
ComputeInterceptor
::
Compute
(
const
InterceptorMessage
&
msg
)
{
...
...
@@ -236,6 +235,8 @@ void ComputeInterceptor::Compute(const InterceptorMessage& msg) {
}
else
if
(
msg
.
message_type
()
==
DATE_IS_USELESS
)
{
DecreaseBuff
(
msg
.
src_id
());
Run
();
}
else
if
(
msg
.
message_type
()
==
STOP
)
{
ReceivedStop
(
msg
.
src_id
());
}
TryStop
();
...
...
paddle/fluid/distributed/fleet_executor/compute_interceptor.h
浏览文件 @
74ca89ef
...
...
@@ -39,7 +39,6 @@ class ComputeInterceptor : public Interceptor {
void
Run
();
void
Compute
(
const
InterceptorMessage
&
msg
);
void
HandleStop
(
const
InterceptorMessage
&
msg
)
override
;
void
ReceivedStop
(
int64_t
up_id
);
void
TryStop
();
...
...
paddle/fluid/distributed/fleet_executor/interceptor.cc
浏览文件 @
74ca89ef
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
...
...
@@ -50,10 +51,20 @@ void Interceptor::Handle(const InterceptorMessage& msg) {
InterceptorMessage
msg
;
msg
.
set_message_type
(
STOP
);
Send
(
interceptor_id_
,
msg
);
}
else
if
(
msg
.
message_type
()
==
STOP
)
{
stop_
=
true
;
StopCarrier
();
}
}
}
void
Interceptor
::
StopCarrier
()
{
Carrier
&
carrier_instance
=
Carrier
::
Instance
();
std
::
condition_variable
&
cond_var
=
carrier_instance
.
GetCondVar
();
// probably double notify, but ok for ut
cond_var
.
notify_all
();
}
std
::
condition_variable
&
Interceptor
::
GetCondVar
()
{
// get the conditional var
return
cond_var_
;
...
...
@@ -80,9 +91,6 @@ bool Interceptor::Send(int64_t dst_id, InterceptorMessage& msg) {
return
MessageBus
::
Instance
().
Send
(
msg
);
}
// maybe need a better method for interceptor base
void
Interceptor
::
HandleStop
(
const
InterceptorMessage
&
msg
)
{
stop_
=
true
;
}
void
Interceptor
::
PoolTheMailbox
()
{
// pool the local mailbox, parse the Message
for
(;;)
{
...
...
@@ -101,11 +109,7 @@ void Interceptor::PoolTheMailbox() {
<<
" from interceptor "
<<
interceptor_message
.
src_id
()
<<
" with message: "
<<
message_type
<<
"."
;
if
(
message_type
==
STOP
)
{
HandleStop
(
interceptor_message
);
}
else
{
Handle
(
interceptor_message
);
}
Handle
(
interceptor_message
);
if
(
stop_
)
{
// break the pooling thread
...
...
paddle/fluid/distributed/fleet_executor/interceptor.h
浏览文件 @
74ca89ef
...
...
@@ -52,8 +52,6 @@ class Interceptor {
// register interceptor handle
void
RegisterMsgHandle
(
MsgHandle
handle
);
virtual
void
HandleStop
(
const
InterceptorMessage
&
msg
);
void
Handle
(
const
InterceptorMessage
&
msg
);
// return the interceptor id
...
...
@@ -89,6 +87,7 @@ class Interceptor {
// for stop
bool
stop_
{
false
};
void
StopCarrier
();
// for runtime
platform
::
Place
place_
;
...
...
paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc
浏览文件 @
74ca89ef
...
...
@@ -33,6 +33,10 @@ class StartInterceptor : public Interceptor {
}
void
NOP
(
const
InterceptorMessage
&
msg
)
{
if
(
msg
.
message_type
()
==
STOP
)
{
stop_
=
true
;
return
;
}
std
::
cout
<<
GetInterceptorId
()
<<
" recv msg from "
<<
msg
.
src_id
()
<<
std
::
endl
;
++
count_
;
...
...
paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_test.cc
浏览文件 @
74ca89ef
...
...
@@ -32,6 +32,10 @@ class PingPongInterceptor : public Interceptor {
}
void
PingPong
(
const
InterceptorMessage
&
msg
)
{
if
(
msg
.
message_type
()
==
STOP
)
{
stop_
=
true
;
return
;
}
std
::
cout
<<
GetInterceptorId
()
<<
" recv msg, count="
<<
count_
<<
std
::
endl
;
++
count_
;
...
...
paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_with_brpc_test.cc
浏览文件 @
74ca89ef
...
...
@@ -34,6 +34,10 @@ class PingPongInterceptor : public Interceptor {
}
void
PingPong
(
const
InterceptorMessage
&
msg
)
{
if
(
msg
.
message_type
()
==
STOP
)
{
stop_
=
true
;
return
;
}
std
::
cout
<<
GetInterceptorId
()
<<
" recv msg, count="
<<
count_
<<
std
::
endl
;
++
count_
;
...
...
python/paddle/fluid/tests/unittests/test_fleet_executor.py
浏览文件 @
74ca89ef
...
...
@@ -32,11 +32,8 @@ class TestFleetExecutor(unittest.TestCase):
exe
.
run
(
empty_program
,
feed
=
{
'x'
:
[
1
]})
def
test_executor_on_single_device
(
self
):
places
=
[
fluid
.
CPUPlace
()]
if
fluid
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
run_fleet_executor
(
place
)
self
.
run_fleet_executor
(
fluid
.
CUDAPlace
(
0
))
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录