Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
31673a92
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
31673a92
编写于
11月 11, 2021
作者:
L
LiYuRio
提交者:
GitHub
11月 11, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Get global cluster information (#37084)
上级
6c183a8e
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
33 addition
and
2 deletion
+33
-2
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
+3
-1
paddle/fluid/distributed/fleet_executor/fleet_executor_desc.proto
...luid/distributed/fleet_executor/fleet_executor_desc.proto
+7
-1
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+12
-0
python/paddle/fluid/tests/unittests/test_fleet_executor.py
python/paddle/fluid/tests/unittests/test_fleet_executor.py
+11
-0
未找到文件。
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
浏览文件 @
31673a92
...
...
@@ -20,7 +20,9 @@ namespace paddle {
namespace
distributed
{
FleetExecutor
::
FleetExecutor
(
const
std
::
string
&
exe_desc_str
)
{
// Initialize Executor
bool
parse_flag
=
exe_desc_
.
ParseFromString
(
exe_desc_str
);
PADDLE_ENFORCE
(
parse_flag
,
platform
::
errors
::
PreconditionNotMet
(
"Error occurs while parsing string to proto"
));
}
FleetExecutor
::~
FleetExecutor
()
{
...
...
paddle/fluid/distributed/fleet_executor/fleet_executor_desc.proto
浏览文件 @
31673a92
...
...
@@ -15,7 +15,13 @@
syntax
=
"proto2"
;
package
paddle
.
distributed
;
message
RankInfo
{
required
int64
rank
=
1
;
required
string
ip_port
=
2
;
}
message
FleetExecutorDesc
{
optional
string
grain
=
1
[
default
=
"coarse"
];
repeated
string
addrs
=
2
;
// "ip:port" of all ranks
optional
int64
cur_rank
=
2
[
default
=
0
];
// Rank id of current processor
repeated
RankInfo
cluster_info
=
3
;
}
python/paddle/fluid/executor.py
浏览文件 @
31673a92
...
...
@@ -1851,7 +1851,19 @@ class Executor(object):
use_program_cache
)
from
..distributed.fleet.proto
import
fleet_executor_desc_pb2
from
google.protobuf
import
text_format
cur_rank
=
os
.
getenv
(
"PADDLE_TRAINER_ID"
)
trainer_endpoints_str
=
os
.
getenv
(
"PADDLE_TRAINER_ENDPOINTS"
)
fleet_exe_desc
=
fleet_executor_desc_pb2
.
FleetExecutorDesc
()
if
cur_rank
and
trainer_endpoints_str
:
fleet_exe_desc
.
cur_rank
=
int
(
cur_rank
)
trainer_endpoints
=
trainer_endpoints_str
.
split
(
','
)
for
rank
,
endpoint
in
enumerate
(
trainer_endpoints
):
rank_info
=
fleet_executor_desc_pb2
.
RankInfo
()
rank_info
.
rank
=
rank
rank_info
.
ip_port
=
endpoint
fleet_exe_desc
.
cluster_info
.
append
(
rank_info
)
else
:
logging
.
warning
(
"Fleet Executor will run on single device only."
)
fleet_exe
=
core
.
FleetExecutor
(
fleet_exe_desc
.
SerializeToString
())
fleet_exe
.
init
(
program
.
_pipeline_opt
[
"section_program"
].
desc
)
fleet_exe
.
run
()
...
...
python/paddle/fluid/tests/unittests/test_fleet_executor.py
浏览文件 @
31673a92
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
import
unittest
import
os
import
paddle
import
paddle.fluid
as
fluid
...
...
@@ -38,6 +39,16 @@ class TestFleetExecutor(unittest.TestCase):
for
place
in
places
:
self
.
run_fleet_executor
(
place
)
def
test_dist_executor_on_multi_devices
(
self
):
os
.
environ
[
"PADDLE_TRAINER_ID"
]
=
"0"
os
.
environ
[
"PADDLE_TRAINER_ENDPOINTS"
]
=
"127.0.0.1:7000,127.0.0.1:7001,127.0.0.1:7002"
places
=
[
fluid
.
CPUPlace
()]
if
fluid
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
run_fleet_executor
(
place
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录