Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
fc5acdd0
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fc5acdd0
编写于
8月 27, 2020
作者:
G
gongweibao
提交者:
GitHub
8月 27, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix fleet ut timeout issue. (#26694)
上级
32ae8e81
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
59 addition
and
28 deletion
+59
-28
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+3
-3
python/paddle/fluid/tests/unittests/launch_function_helper.py
...on/paddle/fluid/tests/unittests/launch_function_helper.py
+16
-0
python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py
...ts/unittests/test_fleet_graph_execution_meta_optimizer.py
+39
-25
python/requirements.txt
python/requirements.txt
+1
-0
未找到文件。
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
fc5acdd0
...
...
@@ -13,6 +13,7 @@ list(APPEND DIST_TEST_OPS test_parallel_dygraph_se_resnext)
list
(
APPEND DIST_TEST_OPS test_parallel_dygraph_sparse_embedding
)
list
(
APPEND DIST_TEST_OPS test_parallel_dygraph_transformer
)
list
(
APPEND DIST_TEST_OPS test_listen_and_serv_op
)
list
(
APPEND DIST_TEST_OPS test_fleet_graph_execution_meta_optimizer
)
set
(
MIXED_DIST_TEST_OPS
${
DIST_TEST_OPS
}
)
#remove distribute unittests.
list
(
APPEND MIXED_DIST_TEST_OPS test_dgc_op
)
...
...
@@ -36,7 +37,6 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_base)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_base_2
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_base_3
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_recompute_meta_optimizer
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_graph_execution_meta_optimizer
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_pipeline_meta_optimizer
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_amp_meta_optimizer
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_gradient_merge_meta_optimizer
)
...
...
@@ -454,7 +454,6 @@ if(WITH_DISTRIBUTE)
py_test_modules
(
test_fleet_base_2 MODULES test_fleet_base_2 ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_fleet_base_3 MODULES test_fleet_base_3 ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_fleet_recompute_meta_optimizer MODULES test_fleet_recompute_meta_optimizer ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_fleet_graph_execution_meta_optimizer MODULES test_fleet_graph_execution_meta_optimizer ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_fleet_graph_executor MODULES test_fleet_graph_executor ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_fleet_gradient_merge_meta_optimizer MODULES test_fleet_gradient_merge_meta_optimizer ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_fleet_amp_meta_optimizer MODULES test_fleet_amp_meta_optimizer ENVS
${
dist_ENVS
}
)
...
...
@@ -490,6 +489,7 @@ if(WITH_DISTRIBUTE)
bash_test_modules
(
test_launch_ps START_BASH test_launch_ps.sh ENVS PADDLE_BINARY_DIR=
${
PADDLE_BINARY_DIR
}
)
bash_test_modules
(
test_fleet_launch START_BASH test_fleet_launch.sh ENVS PADDLE_BINARY_DIR=
${
PADDLE_BINARY_DIR
}
)
# port range (20000, 23000) is reserved for dist-ops
set
(
dist_ut_port 20001
)
foreach
(
TEST_OP
${
DIST_TEST_OPS
}
)
bash_test_modules
(
${
TEST_OP
}
START_BASH dist_test.sh SERIAL LABELS
"RUN_TYPE=EXCLUSIVE"
ENVS
"PADDLE_DIST_UT_PORT=
${
dist_ut_port
}
"
)
...
...
python/paddle/fluid/tests/unittests/launch_function_helper.py
浏览文件 @
fc5acdd0
...
...
@@ -15,6 +15,7 @@ from multiprocessing import Pool, Process
import
os
import
socket
from
contextlib
import
closing
import
psutil
def
launch_func
(
func
,
env_dict
):
...
...
@@ -24,6 +25,21 @@ def launch_func(func, env_dict):
return
proc
def
wait
(
procs
,
timeout
=
None
):
# wait
decents
=
[]
for
p
in
procs
:
for
child
in
psutil
.
Process
(
p
.
pid
).
children
(
recursive
=
True
):
decents
.
append
(
child
)
gone
,
alive
=
psutil
.
wait_procs
(
decents
,
timeout
=
timeout
)
for
p
in
alive
:
p
.
kill
()
for
p
in
gone
:
if
p
.
returncode
!=
0
:
sys
.
exit
(
1
)
def
_find_free_port
(
port_set
):
def
__free_port
():
with
closing
(
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
))
as
s
:
...
...
python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py
浏览文件 @
fc5acdd0
...
...
@@ -15,25 +15,37 @@
import
unittest
import
paddle
import
os
from
launch_function_helper
import
launch_func
,
_find_free_port
from
launch_function_helper
import
launch_func
,
wait
,
_find_free_port
class
TestFleetGraphExecutionMetaOptimizer
(
unittest
.
TestCase
):
def
setUp
(
self
):
try
:
self
.
_dist_ut_port_0
=
int
(
os
.
environ
[
"PADDLE_DIST_UT_PORT"
])
self
.
_dist_ut_port_1
=
self
.
_dist_ut_port_0
+
1
except
Exception
as
e
:
self
.
_dist_ut_port_0
=
_find_free_port
(
set
())
self
.
_dist_ut_port_1
=
_find_free_port
(
set
())
def
test_graph_execution_optimizer_not_apply
(
self
):
port_a
=
self
.
_dist_ut_port_0
port_b
=
self
.
_dist_ut_port_1
node_a
=
{
"PADDLE_TRAINER_ID"
:
"0"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
36003"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
{}"
.
format
(
port_a
)
,
"PADDLE_TRAINERS_NUM"
:
"2"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:36003,127.0.0.1:36004"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:{},127.0.0.1:{}"
.
format
(
port_a
,
port_b
),
"http_proxy"
:
""
,
"https_proxy"
:
""
}
node_b
=
{
"PADDLE_TRAINER_ID"
:
"1"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
36004"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
{}"
.
format
(
port_b
)
,
"PADDLE_TRAINERS_NUM"
:
"2"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:36003,127.0.0.1:36004"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:{},127.0.0.1:{}"
.
format
(
port_a
,
port_b
),
"http_proxy"
:
""
,
"https_proxy"
:
""
}
...
...
@@ -65,14 +77,11 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
proc_a
.
start
()
proc_b
=
launch_func
(
node_func
,
node_b
)
proc_b
.
start
()
proc_a
.
join
()
proc_b
.
join
()
wait
([
proc_a
,
proc_b
])
def
test_graph_execution_optimizer
(
self
):
port_set
=
set
()
port_a
=
_find_free_port
(
port_set
)
port_b
=
_find_free_port
(
port_set
)
port_a
=
self
.
_dist_ut_port_0
+
2
port_b
=
self
.
_dist_ut_port_1
+
2
node_a
=
{
"PADDLE_TRAINER_ID"
:
"0"
,
...
...
@@ -138,24 +147,27 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
proc_a
.
start
()
proc_b
=
launch_func
(
node_func
,
node_b
)
proc_b
.
start
()
proc_a
.
join
()
proc_b
.
join
()
wait
([
proc_a
,
proc_b
])
def
test_graph_execution_optimizer_not_apply_v2
(
self
):
port_a
=
self
.
_dist_ut_port_0
+
4
port_b
=
self
.
_dist_ut_port_1
+
4
node_a
=
{
"PADDLE_TRAINER_ID"
:
"0"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
36003"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
{}"
.
format
(
port_a
)
,
"PADDLE_TRAINERS_NUM"
:
"2"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:36003,127.0.0.1:36004"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:{},127.0.0.1:{}"
.
format
(
port_a
,
port_b
),
"http_proxy"
:
""
,
"https_proxy"
:
""
}
node_b
=
{
"PADDLE_TRAINER_ID"
:
"1"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
36004"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
{}"
.
format
(
port_b
)
,
"PADDLE_TRAINERS_NUM"
:
"2"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:36003,127.0.0.1:36004"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:{},127.0.0.1:{}"
.
format
(
port_a
,
port_b
),
"http_proxy"
:
""
,
"https_proxy"
:
""
}
...
...
@@ -187,24 +199,27 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
proc_a
.
start
()
proc_b
=
launch_func
(
node_func
,
node_b
)
proc_b
.
start
()
proc_a
.
join
()
proc_b
.
join
()
wait
([
proc_a
,
proc_b
])
def
test_graph_execution_optimizer
(
self
):
port_a
=
self
.
_dist_ut_port_0
+
6
port_b
=
self
.
_dist_ut_port_1
+
6
node_a
=
{
"PADDLE_TRAINER_ID"
:
"0"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
36001"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
{}"
.
format
(
port_a
)
,
"PADDLE_TRAINERS_NUM"
:
"2"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:36001,127.0.0.1:36002"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:{},127.0.0.1:{}"
.
format
(
port_a
,
port_b
),
"http_proxy"
:
""
,
"https_proxy"
:
""
}
node_b
=
{
"PADDLE_TRAINER_ID"
:
"1"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
36002"
,
"PADDLE_CURRENT_ENDPOINT"
:
"127.0.0.1:
{}"
.
format
(
port_b
)
,
"PADDLE_TRAINERS_NUM"
:
"2"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:36001,127.0.0.1:36002"
,
"PADDLE_TRAINER_ENDPOINTS"
:
"127.0.0.1:{},127.0.0.1:{}"
.
format
(
port_a
,
port_b
),
"http_proxy"
:
""
,
"https_proxy"
:
""
}
...
...
@@ -253,8 +268,7 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
proc_a
.
start
()
proc_b
=
launch_func
(
node_func
,
node_b
)
proc_b
.
start
()
proc_a
.
join
()
proc_b
.
join
()
wait
([
proc_a
,
proc_b
])
if
__name__
==
"__main__"
:
...
...
python/requirements.txt
浏览文件 @
fc5acdd0
...
...
@@ -23,3 +23,4 @@ objgraph
astor
pathlib
netifaces
psutil
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录