Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
81848fff
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
81848fff
编写于
3月 17, 2022
作者:
Y
Yuang Liu
提交者:
GitHub
3月 17, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[fleet executor] fleet executor for npu (#40607)
上级
06fee998
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
19 addition
and
37 deletion
+19
-37
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
+1
-1
paddle/fluid/distributed/fleet_executor/message_bus.cc
paddle/fluid/distributed/fleet_executor/message_bus.cc
+4
-8
paddle/fluid/distributed/fleet_executor/message_bus.h
paddle/fluid/distributed/fleet_executor/message_bus.h
+3
-6
paddle/fluid/distributed/fleet_executor/message_service.cc
paddle/fluid/distributed/fleet_executor/message_service.cc
+1
-2
paddle/fluid/distributed/fleet_executor/message_service.h
paddle/fluid/distributed/fleet_executor/message_service.h
+1
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+6
-12
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+3
-6
未找到文件。
paddle/fluid/distributed/fleet_executor/CMakeLists.txt
浏览文件 @
81848fff
...
...
@@ -4,7 +4,7 @@ if(WITH_PYTHON)
endif
()
proto_library
(
interceptor_message_proto SRCS interceptor_message.proto
)
if
(
WITH_DISTRIBUTE AND WITH_PSCORE
AND
NOT
(
WITH_ASCEND OR WITH_ASCEND_CL
)
)
if
(
WITH_DISTRIBUTE AND WITH_PSCORE
)
set
(
BRPC_DEPS brpc ssl crypto protobuf zlib leveldb snappy gflags glog
)
else
()
set
(
BRPC_DEPS
""
)
...
...
paddle/fluid/distributed/fleet_executor/message_bus.cc
浏览文件 @
81848fff
...
...
@@ -67,8 +67,7 @@ bool MessageBus::IsInit() const { return is_init_; }
MessageBus
::~
MessageBus
()
{
VLOG
(
3
)
<<
"Message bus releases resource."
;
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
server_
.
Stop
(
1000
);
server_
.
Join
();
#endif
...
...
@@ -87,8 +86,7 @@ bool MessageBus::Send(int64_t dst_rank,
IsInit
(),
true
,
platform
::
errors
::
PreconditionNotMet
(
"Using message bus since it has not been initialized."
));
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
int
retry_time
=
0
;
// message bus will retry sending for 10 times
while
(
retry_time
<
10
)
{
++
retry_time
;
...
...
@@ -173,8 +171,7 @@ void MessageBus::ListenPort() {
LOG
(
INFO
)
<<
"No need listen to port since training on single card."
;
return
;
}
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
// function keep listen the port and handle the message
PADDLE_ENFORCE_EQ
(
server_
.
AddService
(
&
message_service_
,
brpc
::
SERVER_DOESNT_OWN_SERVICE
),
0
,
...
...
@@ -203,8 +200,7 @@ void MessageBus::ListenPort() {
#endif
}
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
bool
MessageBus
::
SendInterRank
(
int64_t
dst_rank
,
const
InterceptorMessage
&
interceptor_message
)
{
const
auto
&
dst_addr
=
GetAddr
(
dst_rank
);
...
...
paddle/fluid/distributed/fleet_executor/message_bus.h
浏览文件 @
81848fff
...
...
@@ -20,8 +20,7 @@
#include <thread>
#include <unordered_map>
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
#include "brpc/channel.h"
#include "brpc/server.h"
#include "paddle/fluid/distributed/fleet_executor/message_service.h"
...
...
@@ -64,8 +63,7 @@ class MessageBus final {
const
std
::
string
&
GetAddr
(
int64_t
rank
)
const
;
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
// send the message inter rank (dst is different rank with src)
bool
SendInterRank
(
int64_t
dst_rank
,
const
InterceptorMessage
&
interceptor_message
);
...
...
@@ -81,8 +79,7 @@ class MessageBus final {
// the ip needs to be listened
std
::
string
addr_
;
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
MessageServiceImpl
message_service_
;
// brpc server
brpc
::
Server
server_
;
...
...
paddle/fluid/distributed/fleet_executor/message_service.cc
浏览文件 @
81848fff
...
...
@@ -11,8 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
#include "paddle/fluid/distributed/fleet_executor/message_service.h"
#include "brpc/server.h"
#include "paddle/fluid/distributed/fleet_executor/global.h"
...
...
paddle/fluid/distributed/fleet_executor/message_service.h
浏览文件 @
81848fff
...
...
@@ -11,8 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
#pragma once
#include "brpc/server.h"
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
81848fff
...
...
@@ -50,8 +50,7 @@
#include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/utils/string/split.h"
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
#include "paddle/fluid/distributed/fleet_executor/fleet_executor.h"
#include "paddle/fluid/distributed/fleet_executor/fleet_executor_desc.pb.h"
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
...
...
@@ -374,8 +373,7 @@ static void DisablePrepareDataOpt(
}
bool
AnalysisPredictor
::
PrepareExecutor
()
{
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
if
(
config_
.
dist_config
().
use_dist_model
())
{
VLOG
(
3
)
<<
"use_dist_model is enabled, will init FleetExecutor."
;
return
PrepareFleetExecutor
();
...
...
@@ -393,8 +391,7 @@ bool AnalysisPredictor::PrepareExecutor() {
return
true
;
}
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
bool
AnalysisPredictor
::
PrepareFleetExecutor
()
{
VLOG
(
3
)
<<
"AnalysisPredictor::PrepareFleetExecutor()"
;
if
(
config_
.
dist_config
().
nranks
()
>
1
&&
!
CommInit
())
{
...
...
@@ -1194,8 +1191,7 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() {
std
::
unique_ptr
<
ZeroCopyTensor
>
AnalysisPredictor
::
GetInputTensor
(
const
std
::
string
&
name
)
{
framework
::
Scope
*
scope
;
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
if
(
config_
.
dist_config
().
use_dist_model
())
{
scope
=
scope_
.
get
();
}
else
{
...
...
@@ -1244,8 +1240,7 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
std
::
unique_ptr
<
ZeroCopyTensor
>
AnalysisPredictor
::
GetOutputTensor
(
const
std
::
string
&
name
)
{
framework
::
Scope
*
scope
;
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
if
(
config_
.
dist_config
().
use_dist_model
())
{
scope
=
scope_
.
get
();
}
else
{
...
...
@@ -1292,8 +1287,7 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
}
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
if
(
config_
.
dist_config
().
use_dist_model
())
{
VLOG
(
3
)
<<
"ZeroCopyRun will use the fleet executor."
;
inference
::
Timer
timer
;
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
81848fff
...
...
@@ -18,8 +18,7 @@
#include <memory>
#include <string>
#include <vector>
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
#include "paddle/fluid/distributed/fleet_executor/fleet_executor.h"
#endif
#include "paddle/fluid/framework/naive_executor.h"
...
...
@@ -395,8 +394,7 @@ class AnalysisPredictor : public PaddlePredictor {
void
StatisticShapeRangeInfo
();
void
CollectShapeRangeInfo
();
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
// fleet exe related
///
...
...
@@ -488,8 +486,7 @@ class AnalysisPredictor : public PaddlePredictor {
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
int32_t
>>>
shape_info_
;
static
int
clone_num_
;
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) && \
!defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
// fleet executor related
distributed
::
FleetExecutorDesc
executor_desc_
;
std
::
shared_ptr
<
distributed
::
FleetExecutor
>
fleet_exe_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录