Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
6bf208c3
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6bf208c3
编写于
11月 12, 2021
作者:
Y
Yuang Liu
提交者:
GitHub
11月 12, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[fleet_executor] Parse rank_to_ip map on cpp side and start message bus. (#37126)
上级
778a3630
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
42 addition
and
0 deletion
+42
-0
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
+1
-0
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
+35
-0
paddle/fluid/distributed/fleet_executor/fleet_executor.h
paddle/fluid/distributed/fleet_executor/fleet_executor.h
+1
-0
paddle/fluid/distributed/fleet_executor/message_bus.cc
paddle/fluid/distributed/fleet_executor/message_bus.cc
+3
-0
paddle/fluid/distributed/fleet_executor/message_bus.h
paddle/fluid/distributed/fleet_executor/message_bus.h
+2
-0
未找到文件。
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
浏览文件 @
6bf208c3
...
...
@@ -19,6 +19,7 @@ if(WITH_DISTRIBUTE)
set_source_files_properties
(
interceptor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
message_bus.h PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
message_bus.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
fleet_executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
carrier.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
interceptor_message_service.h PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
interceptor_message_service.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
...
...
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
浏览文件 @
6bf208c3
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/distributed/fleet_executor/fleet_executor.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
#include "paddle/fluid/distributed/fleet_executor/runtime_graph.h"
#include "paddle/fluid/framework/program_desc.h"
...
...
@@ -31,6 +32,40 @@ FleetExecutor::~FleetExecutor() {
void
FleetExecutor
::
Init
(
const
paddle
::
framework
::
ProgramDesc
&
program_desc
)
{
// Compile and Initialize
InitMessageBus
();
}
void
FleetExecutor
::
InitMessageBus
()
{
std
::
stringstream
ss
;
ss
<<
"
\n
The DNS table of the message bus is:
\n
"
;
int64_t
cur_rank
=
exe_desc_
.
cur_rank
();
std
::
unordered_map
<
int64_t
,
int64_t
>
interceptor_id_to_rank
;
std
::
unordered_map
<
int64_t
,
std
::
string
>
rank_to_addr
;
std
::
string
addr
;
for
(
const
auto
&
rank_info
:
exe_desc_
.
cluster_info
())
{
int64_t
rank
=
rank_info
.
rank
();
std
::
string
ip_port
=
rank_info
.
ip_port
();
ss
<<
rank
<<
"
\t
->
\t
"
<<
ip_port
<<
"
\n
"
;
// TODO(Yuang): replace the first 'rank' with real interceptor id
interceptor_id_to_rank
.
insert
(
std
::
make_pair
(
rank
,
rank
));
rank_to_addr
.
insert
(
std
::
make_pair
(
rank
,
ip_port
));
if
(
rank
==
cur_rank
)
{
addr
=
ip_port
;
}
}
PADDLE_ENFORCE_NE
(
addr
,
""
,
platform
::
errors
::
NotFound
(
"Current rank is %s, which ip_port cannot be found in the config."
,
cur_rank
));
VLOG
(
3
)
<<
"Current rank is "
<<
cur_rank
<<
" and the ip_port is "
<<
addr
<<
"."
;
VLOG
(
3
)
<<
"The number of ranks are "
<<
interceptor_id_to_rank
.
size
()
<<
"."
;
VLOG
(
5
)
<<
ss
.
str
();
MessageBus
&
message_bus_instance
=
MessageBus
::
Instance
();
if
(
!
message_bus_instance
.
IsInit
())
{
message_bus_instance
.
Init
(
interceptor_id_to_rank
,
rank_to_addr
,
addr
);
}
}
void
FleetExecutor
::
Run
()
{
...
...
paddle/fluid/distributed/fleet_executor/fleet_executor.h
浏览文件 @
6bf208c3
...
...
@@ -42,6 +42,7 @@ class FleetExecutor final {
DISABLE_COPY_AND_ASSIGN
(
FleetExecutor
);
FleetExecutorDesc
exe_desc_
;
std
::
unique_ptr
<
RuntimeGraph
>
runtime_graph_
;
void
InitMessageBus
();
static
std
::
shared_ptr
<
Carrier
>
global_carrier_
;
};
...
...
paddle/fluid/distributed/fleet_executor/message_bus.cc
浏览文件 @
6bf208c3
...
...
@@ -42,7 +42,10 @@ void MessageBus::Init(
});
}
bool
MessageBus
::
IsInit
()
const
{
return
is_init_
;
}
void
MessageBus
::
Release
()
{
VLOG
(
3
)
<<
"Message bus releases resource."
;
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
server_
.
Stop
(
1000
);
...
...
paddle/fluid/distributed/fleet_executor/message_bus.h
浏览文件 @
6bf208c3
...
...
@@ -48,6 +48,8 @@ class MessageBus final {
const
std
::
unordered_map
<
int64_t
,
std
::
string
>&
rank_to_addr
,
const
std
::
string
&
addr
);
bool
IsInit
()
const
;
void
Release
();
// called by Interceptor, send InterceptorMessage to dst
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录