Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
821c2f4e
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
821c2f4e
编写于
2月 26, 2021
作者:
X
xiayanming
提交者:
GitHub
2月 26, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ascend unittest (#31249)
add ascend unittest
上级
d45f5d78
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
71 addition
and
6 deletion
+71
-6
python/paddle/distributed/fleet/launch_utils.py
python/paddle/distributed/fleet/launch_utils.py
+1
-2
python/paddle/fluid/tests/unittests/ascend_multi_process_collective.py
.../fluid/tests/unittests/ascend_multi_process_collective.py
+3
-2
python/paddle/fluid/tests/unittests/test_fleet_ascend_utils.py
...n/paddle/fluid/tests/unittests/test_fleet_ascend_utils.py
+65
-0
python/paddle/fluid/tests/unittests/test_fleet_launch_ascend.sh
.../paddle/fluid/tests/unittests/test_fleet_launch_ascend.sh
+2
-2
未找到文件。
python/paddle/distributed/fleet/launch_utils.py
浏览文件 @
821c2f4e
...
...
@@ -476,8 +476,7 @@ def start_local_trainers(cluster,
if
len
(
t
.
accelerators
)
>
0
and
pod
.
device_mode
==
DeviceMode
.
GPU
:
proc_env
[
"FLAGS_selected_gpus"
]
=
"%s"
%
","
.
join
(
[
str
(
g
)
for
g
in
t
.
accelerators
])
if
len
(
t
.
accelerators
)
>
0
and
pod
.
device_mode
==
DeviceMode
.
ASCEND_NPU
:
elif
len
(
t
.
accelerators
)
>
0
and
pod
.
device_mode
==
DeviceMode
.
ASCEND_NPU
:
proc_env
[
"FLAGS_selected_npus"
]
=
"%s"
%
","
.
join
(
[
str
(
g
)
for
g
in
t
.
accelerators
])
...
...
python/paddle/fluid/tests/unittests/ascend_multi_process_collective.py
浏览文件 @
821c2f4e
...
...
@@ -18,6 +18,7 @@ import time
def
train
(
prefix
):
selected_accelerators
=
os
.
getenv
(
"FLAGS_selected_accelerators"
)
selected_npus
=
os
.
getenv
(
"FLAGS_selected_npus"
)
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
worker_endpoints_env
=
os
.
getenv
(
"PADDLE_TRAINER_ENDPOINTS"
)
current_endpoint
=
os
.
getenv
(
"PADDLE_CURRENT_ENDPOINT"
)
...
...
@@ -26,8 +27,8 @@ def train(prefix):
device_ids
=
os
.
getenv
(
"PADDLE_WORLD_DEVICE_IDS"
)
current_device_id
=
os
.
getenv
(
"PADDLE_LOCAL_DEVICE_IDS"
)
details
=
"selected_accelerators:{} worker_endpoints:{} trainers_num:{} current_endpoint:{} trainer_id:{} device_ids:{} device_id:{}"
\
.
format
(
selected_accelerators
,
worker_endpoints
,
trainers_num
,
current_endpoint
,
trainer_id
,
device_ids
,
current_device_id
)
details
=
"selected_accelerators:{}
selected_npus:{}
worker_endpoints:{} trainers_num:{} current_endpoint:{} trainer_id:{} device_ids:{} device_id:{}"
\
.
format
(
selected_accelerators
,
selected_npus
,
worker_endpoints
,
trainers_num
,
current_endpoint
,
trainer_id
,
device_ids
,
current_device_id
)
print
(
details
)
with
open
(
"multi_process_{}.check_{}.log"
.
format
(
prefix
,
trainer_id
),
"w"
)
as
f
:
...
...
python/paddle/fluid/tests/unittests/test_fleet_ascend_utils.py
0 → 100644
浏览文件 @
821c2f4e
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
sys
import
os
import
time
import
six
import
copy
import
json
import
unittest
import
paddle.fluid
as
fluid
import
paddle.distributed.fleet.ascend_utils
as
ascend_utils
RANK_TABLE_JSON
=
{
"status"
:
"completed"
,
"version"
:
"1.0"
,
"server_count"
:
"1"
,
"server_list"
:
[
{
"server_id"
:
"127.0.0.1"
,
"device"
:
[
{
"device_id"
:
"0"
,
"device_ip"
:
"192.1.184.23"
,
"rank_id"
:
"0"
},
{
"device_id"
:
"1"
,
"device_ip"
:
"192.2.21.93"
,
"rank_id"
:
"1"
}
]
}
]
}
class
TestAscendUtil
(
unittest
.
TestCase
):
def
test_get_cloud_cluster
(
self
):
cluster
,
pod
=
ascend_utils
.
get_cloud_cluster
()
self
.
assertTrue
(
cluster
)
self
.
assertTrue
(
pod
)
with
open
(
'rank_table_file.json'
,
'w'
)
as
f
:
json
.
dump
(
RANK_TABLE_JSON
,
f
)
rank_table_file
=
"./rank_table_file.json"
cluster
,
pod
=
ascend_utils
.
get_cloud_cluster
(
rank_table_file
=
rank_table_file
)
self
.
assertTrue
(
cluster
)
self
.
assertTrue
(
pod
)
if
__name__
==
'__main__'
:
unittest
.
main
()
\ No newline at end of file
python/paddle/fluid/tests/unittests/test_fleet_launch_ascend.sh
浏览文件 @
821c2f4e
...
...
@@ -51,8 +51,8 @@ echo "begin test use ascend npu"
distributed_args
=
"--run_mode=collective --log_dir=testlog"
python
-m
paddle.distributed.fleet.launch
${
distributed_args
}
ascend_multi_process_collective.py fleetlaunchascend
str1
=
"selected_accelerators:0 worker_endpoints:127.0.0.1:6170,127.0.0.1:6171 trainers_num:2 current_endpoint:127.0.0.1:6170 trainer_id:0 device_ids:0,1 device_id:0"
str2
=
"selected_accelerators:1 worker_endpoints:127.0.0.1:6170,127.0.0.1:6171 trainers_num:2 current_endpoint:127.0.0.1:6171 trainer_id:1 device_ids:0,1 device_id:1"
str1
=
"selected_accelerators:0
selected_npus:0
worker_endpoints:127.0.0.1:6170,127.0.0.1:6171 trainers_num:2 current_endpoint:127.0.0.1:6170 trainer_id:0 device_ids:0,1 device_id:0"
str2
=
"selected_accelerators:1
selected_npus:1
worker_endpoints:127.0.0.1:6170,127.0.0.1:6171 trainers_num:2 current_endpoint:127.0.0.1:6171 trainer_id:1 device_ids:0,1 device_id:1"
file_0
=
"multi_process_fleetlaunchascend.check_0.log"
file_1
=
"multi_process_fleetlaunchascend.check_1.log"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录