Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
8cdd5564
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8cdd5564
编写于
11月 11, 2021
作者:
W
WangXi
提交者:
GitHub
11月 11, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[fleet_executor] interceptor send message through message_bus (#37106)
上级
f5e7b02a
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
36 addition
and
25 deletion
+36
-25
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
+1
-0
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
+0
-5
paddle/fluid/distributed/fleet_executor/fleet_executor.h
paddle/fluid/distributed/fleet_executor/fleet_executor.h
+0
-2
paddle/fluid/distributed/fleet_executor/interceptor.cc
paddle/fluid/distributed/fleet_executor/interceptor.cc
+3
-4
paddle/fluid/distributed/fleet_executor/interceptor.h
paddle/fluid/distributed/fleet_executor/interceptor.h
+3
-3
paddle/fluid/distributed/fleet_executor/message_bus.cc
paddle/fluid/distributed/fleet_executor/message_bus.cc
+14
-6
paddle/fluid/distributed/fleet_executor/message_bus.h
paddle/fluid/distributed/fleet_executor/message_bus.h
+15
-5
未找到文件。
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
浏览文件 @
8cdd5564
...
...
@@ -16,6 +16,7 @@ cc_library(fleet_executor SRCS fleet_executor.cc carrier.cc
if
(
WITH_DISTRIBUTE
)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set_source_files_properties
(
interceptor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
message_bus.h PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
message_bus.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
carrier.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
...
...
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
浏览文件 @
8cdd5564
...
...
@@ -46,10 +46,5 @@ std::shared_ptr<Carrier> FleetExecutor::GetCarrier() {
return
nullptr
;
}
std
::
shared_ptr
<
MessageBus
>
FleetExecutor
::
GetMessageBus
()
{
// get message bus
return
nullptr
;
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/fleet_executor.h
浏览文件 @
8cdd5564
...
...
@@ -37,14 +37,12 @@ class FleetExecutor final {
void
Run
();
void
Release
();
static
std
::
shared_ptr
<
Carrier
>
GetCarrier
();
static
std
::
shared_ptr
<
MessageBus
>
GetMessageBus
();
private:
DISABLE_COPY_AND_ASSIGN
(
FleetExecutor
);
FleetExecutorDesc
exe_desc_
;
std
::
unique_ptr
<
RuntimeGraph
>
runtime_graph_
;
static
std
::
shared_ptr
<
Carrier
>
global_carrier_
;
static
std
::
shared_ptr
<
MessageBus
>
global_message_bus_
;
};
}
// namespace distributed
...
...
paddle/fluid/distributed/fleet_executor/interceptor.cc
浏览文件 @
8cdd5564
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
namespace
paddle
{
namespace
distributed
{
...
...
@@ -27,9 +28,7 @@ Interceptor::Interceptor(int64_t interceptor_id, TaskNode* node)
Interceptor
::~
Interceptor
()
{
interceptor_thread_
.
join
();
}
void
Interceptor
::
RegisterInterceptorHandle
(
InterceptorHandle
handle
)
{
handle_
=
handle
;
}
void
Interceptor
::
RegisterMsgHandle
(
MsgHandle
handle
)
{
handle_
=
handle
;
}
void
Interceptor
::
Handle
(
const
InterceptorMessage
&
msg
)
{
if
(
handle_
)
{
...
...
@@ -61,7 +60,7 @@ void Interceptor::Send(int64_t dst_id,
std
::
unique_ptr
<
InterceptorMessage
>
msg
)
{
msg
->
set_src_id
(
interceptor_id_
);
msg
->
set_dst_id
(
dst_id
);
// send interceptor msg
MessageBus
::
Instance
().
Send
(
*
msg
.
get
());
}
void
Interceptor
::
PoolTheMailbox
()
{
...
...
paddle/fluid/distributed/fleet_executor/interceptor.h
浏览文件 @
8cdd5564
...
...
@@ -34,7 +34,7 @@ class TaskNode;
class
Interceptor
{
public:
using
Interceptor
Handle
=
std
::
function
<
void
(
const
InterceptorMessage
&
)
>
;
using
Msg
Handle
=
std
::
function
<
void
(
const
InterceptorMessage
&
)
>
;
public:
Interceptor
()
=
delete
;
...
...
@@ -44,7 +44,7 @@ class Interceptor {
virtual
~
Interceptor
();
// register interceptor handle
void
Register
InterceptorHandle
(
Interceptor
Handle
handle
);
void
Register
MsgHandle
(
Msg
Handle
handle
);
void
Handle
(
const
InterceptorMessage
&
msg
);
...
...
@@ -77,7 +77,7 @@ class Interceptor {
TaskNode
*
node_
;
// interceptor handle which process message
Interceptor
Handle
handle_
{
nullptr
};
Msg
Handle
handle_
{
nullptr
};
// mutex to control read/write conflict for remote mailbox
std
::
mutex
remote_mailbox_mutex_
;
...
...
paddle/fluid/distributed/fleet_executor/message_bus.cc
浏览文件 @
8cdd5564
...
...
@@ -21,20 +21,28 @@
namespace
paddle
{
namespace
distributed
{
MessageBus
::
MessageBus
(
void
MessageBus
::
Init
(
const
std
::
unordered_map
<
int64_t
,
int64_t
>&
interceptor_id_to_rank
,
const
std
::
unordered_map
<
int64_t
,
std
::
string
>&
rank_to_addr
,
const
std
::
string
&
addr
)
:
interceptor_id_to_rank_
(
interceptor_id_to_rank
),
rank_to_addr_
(
rank_to_addr
),
addr_
(
addr
)
{
const
std
::
string
&
addr
)
{
PADDLE_ENFORCE_EQ
(
is_init_
,
false
,
platform
::
errors
::
AlreadyExists
(
"MessageBus is already init."
));
is_init_
=
true
;
interceptor_id_to_rank_
=
interceptor_id_to_rank
;
rank_to_addr_
=
rank_to_addr
;
addr_
=
addr
;
listen_port_thread_
=
std
::
thread
([
this
]()
{
VLOG
(
3
)
<<
"Start listen_port_thread_ for message bus"
;
ListenPort
();
});
std
::
call_once
(
once_flag_
,
[]()
{
std
::
atexit
([]()
{
MessageBus
::
Instance
().
Release
();
});
});
}
MessageBus
::~
MessageBus
()
{
void
MessageBus
::
Release
()
{
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
server_
.
Stop
(
1000
);
...
...
paddle/fluid/distributed/fleet_executor/message_bus.h
浏览文件 @
8cdd5564
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <mutex>
#include <string>
#include <thread>
#include <unordered_map>
...
...
@@ -35,15 +36,19 @@ namespace distributed {
class
Carrier
;
// A singleton MessageBus
class
MessageBus
final
{
public:
MessageBus
()
=
delete
;
static
MessageBus
&
Instance
()
{
static
MessageBus
msg_bus
;
return
msg_bus
;
}
MessageBus
(
const
std
::
unordered_map
<
int64_t
,
int64_t
>&
interceptor_id_to_rank
,
const
std
::
unordered_map
<
int64_t
,
std
::
string
>&
rank_to_addr
,
const
std
::
string
&
addr
);
void
Init
(
const
std
::
unordered_map
<
int64_t
,
int64_t
>&
interceptor_id_to_rank
,
const
std
::
unordered_map
<
int64_t
,
std
::
string
>&
rank_to_addr
,
const
std
::
string
&
addr
);
~
MessageBus
();
void
Release
();
// called by Interceptor, send InterceptorMessage to dst
bool
Send
(
const
InterceptorMessage
&
interceptor_message
);
...
...
@@ -51,6 +56,8 @@ class MessageBus final {
DISABLE_COPY_AND_ASSIGN
(
MessageBus
);
private:
MessageBus
()
=
default
;
// function keep listen the port and handle the message
void
ListenPort
();
...
...
@@ -66,6 +73,9 @@ class MessageBus final {
// send the message intra rank (dst is the same rank with src)
bool
SendIntraRank
(
const
InterceptorMessage
&
interceptor_message
);
bool
is_init_
{
false
};
std
::
once_flag
once_flag_
;
// handed by above layer, save the info mapping interceptor id to rank id
std
::
unordered_map
<
int64_t
,
int64_t
>
interceptor_id_to_rank_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录