Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
be4eaba0
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
be4eaba0
编写于
11月 04, 2021
作者:
Y
Yuang Liu
提交者:
GitHub
11月 04, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[fleet_executor] Framework for message and manager part. (#36966)
上级
bf9374c1
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
553 addition
and
3 deletion
+553
-3
cmake/third_party.cmake
cmake/third_party.cmake
+1
-1
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
+20
-2
paddle/fluid/distributed/fleet_executor/carrier.cc
paddle/fluid/distributed/fleet_executor/carrier.cc
+43
-0
paddle/fluid/distributed/fleet_executor/carrier.h
paddle/fluid/distributed/fleet_executor/carrier.h
+60
-0
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
+10
-0
paddle/fluid/distributed/fleet_executor/fleet_executor.h
paddle/fluid/distributed/fleet_executor/fleet_executor.h
+7
-0
paddle/fluid/distributed/fleet_executor/interceptor.cc
paddle/fluid/distributed/fleet_executor/interceptor.cc
+46
-0
paddle/fluid/distributed/fleet_executor/interceptor.h
paddle/fluid/distributed/fleet_executor/interceptor.h
+83
-0
paddle/fluid/distributed/fleet_executor/interceptor_message.proto
...luid/distributed/fleet_executor/interceptor_message.proto
+40
-0
paddle/fluid/distributed/fleet_executor/interceptor_message_service.cc
...distributed/fleet_executor/interceptor_message_service.cc
+31
-0
paddle/fluid/distributed/fleet_executor/interceptor_message_service.h
.../distributed/fleet_executor/interceptor_message_service.h
+37
-0
paddle/fluid/distributed/fleet_executor/message_bus.cc
paddle/fluid/distributed/fleet_executor/message_bus.cc
+54
-0
paddle/fluid/distributed/fleet_executor/message_bus.h
paddle/fluid/distributed/fleet_executor/message_bus.h
+94
-0
paddle/fluid/distributed/fleet_executor/task_node.h
paddle/fluid/distributed/fleet_executor/task_node.h
+27
-0
未找到文件。
cmake/third_party.cmake
浏览文件 @
be4eaba0
...
@@ -331,7 +331,7 @@ if (WITH_PSCORE)
...
@@ -331,7 +331,7 @@ if (WITH_PSCORE)
include
(
external/libmct
)
# download, build, install libmct
include
(
external/libmct
)
# download, build, install libmct
list
(
APPEND third_party_deps extern_libmct
)
list
(
APPEND third_party_deps extern_libmct
)
if
(
WITH_HETERPS
)
if
(
WITH_HETERPS
)
include
(
external/rocksdb
)
# download, build, install libmct
include
(
external/rocksdb
)
# download, build, install libmct
list
(
APPEND third_party_deps extern_rocksdb
)
list
(
APPEND third_party_deps extern_rocksdb
)
...
...
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
浏览文件 @
be4eaba0
proto_library
(
fleet_executor_desc_proto SRCS fleet_executor_desc.proto
)
proto_library
(
fleet_executor_desc_proto SRCS fleet_executor_desc.proto
)
cc_library
(
fleet_executor SRCS fleet_executor.cc DEPS fleet_executor_desc_proto
)
if
(
WITH_PYTHON
)
if
(
WITH_PYTHON
)
py_proto_compile
(
fleet_executor_desc_py_proto SRCS fleet_executor_desc.proto
)
py_proto_compile
(
fleet_executor_desc_py_proto SRCS fleet_executor_desc.proto
)
endif
()
endif
()
proto_library
(
interceptor_message_proto SRCS interceptor_message.proto
)
if
(
WITH_DISTRIBUTE AND
NOT
(
WITH_ASCEND OR WITH_ASCEND_CL
))
set
(
BRPC_DEPS brpc
)
else
()
set
(
BRPC_DEPS
""
)
endif
()
cc_library
(
fleet_executor SRCS fleet_executor.cc carrier.cc
interceptor.cc interceptor_message_service.cc message_bus.cc
DEPS fleet_executor_desc_proto interceptor_message_proto
${
BRPC_DEPS
}
)
if
(
WITH_DISTRIBUTE
)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set_source_files_properties
(
message_bus.h PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
message_bus.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
carrier.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
interceptor_message_service.h PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
interceptor_message_service.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
endif
()
paddle/fluid/distributed/fleet_executor/carrier.cc
0 → 100644
浏览文件 @
be4eaba0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor_message_service.h"
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
namespace
paddle
{
namespace
distributed
{
Carrier
::
Carrier
(
const
std
::
unordered_map
<
int64_t
,
TaskNode
*>&
interceptor_id_to_node
)
{
// init
}
Carrier
::~
Carrier
()
{
// destroy
}
bool
Carrier
::
EnqueueInterceptorMessage
(
const
InterceptorMessage
&
interceptor_message
)
{
// enqueue message to interceptor
return
true
;
}
void
Carrier
::
CreateInterceptors
()
{
// create each Interceptor
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/carrier.h
0 → 100644
浏览文件 @
be4eaba0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/distributed/fleet_executor/interceptor_message.pb.h"
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
distributed
{
class
Interceptor
;
class
TaskNode
;
class
InterceptorMessageServiceImpl
;
class
Carrier
final
{
public:
Carrier
()
=
delete
;
Carrier
(
const
std
::
unordered_map
<
int64_t
,
TaskNode
*>&
interceptor_id_to_node
);
~
Carrier
();
// Enqueue a message to corresponding interceptor id
bool
EnqueueInterceptorMessage
(
const
InterceptorMessage
&
interceptor_message
);
DISABLE_COPY_AND_ASSIGN
(
Carrier
);
private:
// create each Interceptor
void
CreateInterceptors
();
// get interceptor based on the interceptor id
Interceptor
*
GetInterceptor
(
int64_t
interceptor_id
);
// interceptor logic id to the Nodes info
std
::
unordered_map
<
int64_t
,
TaskNode
*>
interceptor_id_to_node_
;
// interceptor logic id to actually interceptor
std
::
unordered_map
<
int64_t
,
std
::
unique_ptr
<
Interceptor
>>
interceptor_idx_to_interceptor_
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
浏览文件 @
be4eaba0
...
@@ -39,5 +39,15 @@ void FleetExecutor::Release() {
...
@@ -39,5 +39,15 @@ void FleetExecutor::Release() {
// Release
// Release
}
}
std
::
shared_ptr
<
Carrier
>
FleetExecutor
::
GetCarrier
()
{
// get carrier
return
nullptr
;
}
std
::
shared_ptr
<
MessageBus
>
FleetExecutor
::
GetMessageBus
()
{
// get message bus
return
nullptr
;
}
}
// namespace distributed
}
// namespace distributed
}
// namespace paddle
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/fleet_executor.h
浏览文件 @
be4eaba0
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
#pragma once
#pragma once
#include <memory>
#include <memory>
#include "paddle/fluid/distributed/fleet_executor/fleet_executor_desc.pb.h"
#include "paddle/fluid/distributed/fleet_executor/fleet_executor_desc.pb.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/macros.h"
...
@@ -24,6 +25,8 @@ class ProgramDesc;
...
@@ -24,6 +25,8 @@ class ProgramDesc;
namespace
distributed
{
namespace
distributed
{
class
RuntimeGraph
;
class
RuntimeGraph
;
class
Carrier
;
class
MessageBus
;
class
FleetExecutor
final
{
class
FleetExecutor
final
{
public:
public:
...
@@ -33,11 +36,15 @@ class FleetExecutor final {
...
@@ -33,11 +36,15 @@ class FleetExecutor final {
void
Init
(
const
paddle
::
framework
::
ProgramDesc
&
program_desc
);
void
Init
(
const
paddle
::
framework
::
ProgramDesc
&
program_desc
);
void
Run
();
void
Run
();
void
Release
();
void
Release
();
static
std
::
shared_ptr
<
Carrier
>
GetCarrier
();
static
std
::
shared_ptr
<
MessageBus
>
GetMessageBus
();
private:
private:
DISABLE_COPY_AND_ASSIGN
(
FleetExecutor
);
DISABLE_COPY_AND_ASSIGN
(
FleetExecutor
);
FleetExecutorDesc
exe_desc_
;
FleetExecutorDesc
exe_desc_
;
std
::
unique_ptr
<
RuntimeGraph
>
runtime_graph_
;
std
::
unique_ptr
<
RuntimeGraph
>
runtime_graph_
;
static
std
::
shared_ptr
<
Carrier
>
global_carrier_
;
static
std
::
shared_ptr
<
MessageBus
>
global_message_bus_
;
};
};
}
// namespace distributed
}
// namespace distributed
...
...
paddle/fluid/distributed/fleet_executor/interceptor.cc
0 → 100644
浏览文件 @
be4eaba0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
namespace
paddle
{
namespace
distributed
{
Interceptor
::
Interceptor
(
int64_t
interceptor_id_
,
TaskNode
*
node
)
{
// init
}
int64_t
Interceptor
::
GetInterceptorId
()
const
{
// return the interceptor id
return
0
;
}
bool
Interceptor
::
EnqueueRemoteInterceptorMessage
(
const
InterceptorMessage
&
interceptor_message
)
{
// Called by Carrier, enqueue an InterceptorMessage to remote mailbox
return
true
;
}
void
Interceptor
::
PoolTheMailbox
()
{
// pool the local mailbox, parse the Message
}
bool
Interceptor
::
FetchRemoteMailbox
()
{
// fetch all Message from remote mailbox to local mailbox
// return true if remote mailbox not empty, otherwise return false
return
true
;
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/interceptor.h
0 → 100644
浏览文件 @
be4eaba0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <condition_variable>
#include <map>
#include <memory>
#include <queue>
#include <thread>
#include <vector>
#include "paddle/fluid/distributed/fleet_executor/interceptor_message.pb.h"
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
distributed
{
class
TaskNode
;
class
Interceptor
{
public:
Interceptor
()
=
delete
;
Interceptor
(
int64_t
interceptor_id_
,
TaskNode
*
node
);
virtual
~
Interceptor
()
=
default
;
// return the interceptor id
int64_t
GetInterceptorId
()
const
;
// Called by Carrier, enqueue an InterceptorMessage to remote mailbox
bool
EnqueueRemoteInterceptorMessage
(
const
InterceptorMessage
&
interceptor_message
);
DISABLE_COPY_AND_ASSIGN
(
Interceptor
);
private:
// pool the local mailbox, parse the Message
void
PoolTheMailbox
();
// fetch all Message from remote mailbox to local mailbox
// return true if remote mailbox not empty, otherwise return false
bool
FetchRemoteMailbox
();
// interceptor id, handed from above layer
int64_t
interceptor_id_
;
// node need to be handled by this interceptor
TaskNode
*
node_
;
// mutex to control read/write conflict for remote mailbox
std
::
mutex
remote_mailbox_mutex_
;
// interceptor runs PoolTheMailbox() function to poll local mailbox
std
::
thread
interceptor_thread_
;
// conditional variable for blocking the thread when
// fetch an empty remote mailbox
std
::
condition_variable
cond_var_
;
// remote mailbox, written by EnqueueRemoteMessage()
// read by FetchRemoteMailbox()
std
::
queue
<
InterceptorMessage
>
remote_mailbox_
;
// local mailbox, written by FetchRemoteMailbox()
// read by PoolTheMailbox()
std
::
queue
<
InterceptorMessage
>
local_mailbox_
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/interceptor_message.proto
0 → 100644
浏览文件 @
be4eaba0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax
=
"proto2"
;
package
paddle
.
distributed
;
option
cc_generic_services
=
true
;
option
cc_enable_arenas
=
true
;
enum
MessageType
{
STOP
=
1
;
// STOP an Interceptor
DATA_IS_READY
=
2
;
// upstream data is ready
DATE_IS_USELESS
=
3
;
// downstream has used the data
ERROR
=
4
;
// current Interceptor encounters error
RESET
=
5
;
// reset the status
}
message
InterceptorMessage
{
optional
int64
src_id
=
1
[
default
=
0
];
optional
int64
dst_id
=
2
[
default
=
0
];
optional
MessageType
message_type
=
3
[
default
=
RESET
];
optional
bool
ctrl_message
=
4
[
default
=
false
];
}
message
InterceptorResponse
{
optional
bool
rst
=
1
[
default
=
false
];
}
service
TheInterceptorMessageService
{
rpc
InterceptorMessageService
(
InterceptorMessage
)
returns
(
InterceptorResponse
);
}
paddle/fluid/distributed/fleet_executor/interceptor_message_service.cc
0 → 100644
浏览文件 @
be4eaba0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PADDLE_WITH_ASCEND_CL
#ifdef PADDLE_WITH_DISTRIBUTE
#include "paddle/fluid/distributed/fleet_executor/interceptor_message_service.h"
namespace
paddle
{
namespace
distributed
{
void
InterceptorMessageServiceImpl
::
InterceptorMessageService
(
google
::
protobuf
::
RpcController
*
control_base
,
const
InterceptorMessage
*
request
,
InterceptorResponse
*
response
,
google
::
protobuf
::
Closure
*
done
)
{
// receive msg
}
}
// namespace distributed
}
// namespace paddle
#endif
#endif
paddle/fluid/distributed/fleet_executor/interceptor_message_service.h
0 → 100644
浏览文件 @
be4eaba0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PADDLE_WITH_ASCEND_CL
#ifdef PADDLE_WITH_DISTRIBUTE
#pragma once
#include "brpc/server.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor_message.pb.h"
namespace
paddle
{
namespace
distributed
{
class
InterceptorMessageServiceImpl
:
public
TheInterceptorMessageService
{
public:
InterceptorMessageServiceImpl
()
{}
virtual
~
InterceptorMessageServiceImpl
()
{}
virtual
void
InterceptorMessageService
(
google
::
protobuf
::
RpcController
*
control_base
,
const
InterceptorMessage
*
request
,
InterceptorResponse
*
response
,
google
::
protobuf
::
Closure
*
done
);
};
}
// namespace distributed
}
// namespace paddle
#endif
#endif
paddle/fluid/distributed/fleet_executor/message_bus.cc
0 → 100644
浏览文件 @
be4eaba0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
namespace
paddle
{
namespace
distributed
{
MessageBus
::~
MessageBus
()
{
// destroy
}
bool
MessageBus
::
Send
(
const
InterceptorMessage
&
interceptor_message
)
{
// called by Interceptor, send InterceptorMessage to dst
return
true
;
}
void
MessageBus
::
ListenPort
()
{
// function keep listen the port and handle the message
}
bool
MessageBus
::
IsSameRank
(
int64_t
src_id
,
int64_t
dst_id
)
{
// check whether the dst is the same rank or different rank with src
return
true
;
}
#ifndef PADDLE_WITH_ASCEND_CL
#ifdef PADDLE_WITH_DISTRIBUTE
bool
MessageBus
::
SendInterRank
(
const
InterceptorMessage
&
interceptor_message
)
{
// send the message inter rank (dst is different rank with src)
return
true
;
}
#endif
#endif
bool
MessageBus
::
SendIntraRank
(
const
InterceptorMessage
&
interceptor_message
)
{
// send the message intra rank (dst is the same rank with src)
return
true
;
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/message_bus.h
0 → 100644
浏览文件 @
be4eaba0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <thread>
#include <unordered_map>
#ifndef PADDLE_WITH_ASCEND_CL
#ifdef PADDLE_WITH_DISTRIBUTE
#include "brpc/channel.h"
#include "brpc/server.h"
#endif
#endif
#include "paddle/fluid/distributed/fleet_executor/interceptor_message.pb.h"
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
distributed
{
class
Carrier
;
class
MessageBus
final
{
public:
MessageBus
()
=
delete
;
explicit
MessageBus
(
const
std
::
unordered_map
<
int64_t
,
int64_t
>&
interceptor_id_to_rank
,
const
std
::
unordered_map
<
int64_t
,
std
::
string
>&
rank_to_addr
,
const
std
::
string
&
addr
)
:
interceptor_id_to_rank_
(
interceptor_id_to_rank
),
rank_to_addr_
(
rank_to_addr
),
addr_
(
addr
)
{}
~
MessageBus
();
// called by Interceptor, send InterceptorMessage to dst
bool
Send
(
const
InterceptorMessage
&
interceptor_message
);
DISABLE_COPY_AND_ASSIGN
(
MessageBus
);
private:
// function keep listen the port and handle the message
void
ListenPort
();
// check whether the dst is the same rank or different rank with src
bool
IsSameRank
(
int64_t
src_id
,
int64_t
dst_id
);
#ifndef PADDLE_WITH_ASCEND_CL
#ifdef PADDLE_WITH_DISTRIBUTE
// send the message inter rank (dst is different rank with src)
bool
SendInterRank
(
const
InterceptorMessage
&
interceptor_message
);
#endif
#endif
// send the message intra rank (dst is the same rank with src)
bool
SendIntraRank
(
const
InterceptorMessage
&
interceptor_message
);
// handed by above layer, save the info mapping interceptor id to rank id
std
::
unordered_map
<
int64_t
,
int64_t
>
interceptor_id_to_rank_
;
// handed by above layer, save the info mapping rank id to addr
std
::
unordered_map
<
int64_t
,
std
::
string
>
rank_to_addr_
;
// the ip needs to be listened
std
::
string
addr_
;
#ifndef PADDLE_WITH_ASCEND_CL
#ifdef PADDLE_WITH_DISTRIBUTE
// brpc server
brpc
::
Server
server_
;
#endif
#endif
// thread keeps listening to the port to receive remote message
// this thread runs ListenPort() function
std
::
thread
listen_port_thread_
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/task_node.h
0 → 100644
浏览文件 @
be4eaba0
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace
paddle
{
namespace
distributed
{
class
TaskNode
final
{
public:
TaskNode
()
=
default
;
~
TaskNode
()
=
default
;
};
}
// namespace distributed
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录