Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
bf6470c7
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
bf6470c7
编写于
10月 14, 2019
作者:
G
gongweibao
提交者:
GitHub
10月 14, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add detail logs on resnet unit test (#20558)
Add detail logs on resnet unit test
上级
36c85ef4
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
25 addition
and
9 deletion
+25
-9
python/paddle/fluid/tests/unittests/dist_test.sh
python/paddle/fluid/tests/unittests/dist_test.sh
+8
-2
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+10
-6
python/paddle/fluid/tests/unittests/test_dist_se_resnext_async.py
...addle/fluid/tests/unittests/test_dist_se_resnext_async.py
+7
-1
未找到文件。
python/paddle/fluid/tests/unittests/dist_test.sh
浏览文件 @
bf6470c7
...
@@ -15,7 +15,7 @@ if [[ ${TEST_TIMEOUT}"x" == "x" ]]; then
...
@@ -15,7 +15,7 @@ if [[ ${TEST_TIMEOUT}"x" == "x" ]]; then
fi
fi
# rm flag file
# rm flag file
rm
-f
${
name
}*
.log
rm
-f
${
name
}
_
*
.log
# start the unit test
# start the unit test
run_time
=
$((
$TEST_TIMEOUT
-
10
))
run_time
=
$((
$TEST_TIMEOUT
-
10
))
...
@@ -28,9 +28,15 @@ fi
...
@@ -28,9 +28,15 @@ fi
echo
"
${
name
}
faild with
${
exit_code
}
"
echo
"
${
name
}
faild with
${
exit_code
}
"
netstat
-an
# paddle log
# paddle log
echo
"
${
name
}
log"
echo
"
${
name
}
log"
cat
-n
${
name
}*
.log
for
log
in
`
ls
${
name
}
_
*
.log
`
do
printf
"
\n
cat
${
log
}
\n
"
cat
-n
${
log
}
done
#display system context
#display system context
for
i
in
{
1..2
}
;
do
for
i
in
{
1..2
}
;
do
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
bf6470c7
...
@@ -525,7 +525,11 @@ class TestDistBase(unittest.TestCase):
...
@@ -525,7 +525,11 @@ class TestDistBase(unittest.TestCase):
self
.
_port_set
.
add
(
port
)
self
.
_port_set
.
add
(
port
)
return
port
return
port
def
start_pserver
(
self
,
model_file
,
check_error_log
,
required_envs
):
def
start_pserver
(
self
,
model_file
,
check_error_log
,
required_envs
,
log_name
=
""
):
ps0_ep
,
ps1_ep
=
self
.
_ps_endpoints
.
split
(
","
)
ps0_ep
,
ps1_ep
=
self
.
_ps_endpoints
.
split
(
","
)
ps_cmd
=
"%s"
ps_cmd
=
"%s"
...
@@ -548,8 +552,8 @@ class TestDistBase(unittest.TestCase):
...
@@ -548,8 +552,8 @@ class TestDistBase(unittest.TestCase):
print
(
ps0_cmd
)
print
(
ps0_cmd
)
print
(
ps1_cmd
)
print
(
ps1_cmd
)
ps0_pipe
=
open
(
"/tmp/
ps0_err.log"
,
"wb"
)
ps0_pipe
=
open
(
log_name
+
"_
ps0_err.log"
,
"wb"
)
ps1_pipe
=
open
(
"/tmp/
ps1_err.log"
,
"wb"
)
ps1_pipe
=
open
(
log_name
+
"_
ps1_err.log"
,
"wb"
)
print_to_err
(
type
(
self
).
__name__
,
"going to start pserver process 0"
)
print_to_err
(
type
(
self
).
__name__
,
"going to start pserver process 0"
)
ps0_proc
=
subprocess
.
Popen
(
ps0_proc
=
subprocess
.
Popen
(
...
@@ -628,8 +632,8 @@ class TestDistBase(unittest.TestCase):
...
@@ -628,8 +632,8 @@ class TestDistBase(unittest.TestCase):
def
_run_cluster
(
self
,
model
,
envs
,
check_error_log
,
log_name
):
def
_run_cluster
(
self
,
model
,
envs
,
check_error_log
,
log_name
):
# Run dist train to compare with local results
# Run dist train to compare with local results
ps0
,
ps1
,
ps0_pipe
,
ps1_pipe
=
self
.
start_pserver
(
model
,
ps0
,
ps1
,
ps0_pipe
,
ps1_pipe
=
self
.
start_pserver
(
check_error_log
,
envs
)
model
,
check_error_log
,
envs
,
log_name
=
log_name
)
ps0_ep
,
ps1_ep
=
self
.
_ps_endpoints
.
split
(
","
)
ps0_ep
,
ps1_ep
=
self
.
_ps_endpoints
.
split
(
","
)
...
@@ -848,7 +852,7 @@ class TestDistBase(unittest.TestCase):
...
@@ -848,7 +852,7 @@ class TestDistBase(unittest.TestCase):
if
check_error_log
:
if
check_error_log
:
required_envs
[
"GLOG_vmodule"
]
=
\
required_envs
[
"GLOG_vmodule"
]
=
\
"fused_all_reduce_op_handle=10,all_reduce_op_handle=10,alloc_continuous_space_op=10,fuse_all_reduce_op_pass=10,alloc_continuous_space_for_grad_pass=10,fast_threaded_ssa_graph_executor=10"
"fused_all_reduce_op_handle=10,all_reduce_op_handle=10,alloc_continuous_space_op=10,fuse_all_reduce_op_pass=10,alloc_continuous_space_for_grad_pass=10,fast_threaded_ssa_graph_executor=10
,executor=10,operator=10
"
required_envs
[
"GLOG_logtostderr"
]
=
"1"
required_envs
[
"GLOG_logtostderr"
]
=
"1"
local_losses
\
local_losses
\
...
...
python/paddle/fluid/tests/unittests/test_dist_se_resnext_async.py
浏览文件 @
bf6470c7
...
@@ -15,7 +15,9 @@
...
@@ -15,7 +15,9 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
from
test_dist_base
import
TestDistBase
from
test_dist_base
import
TestDistBase
import
os
import
os
flag_name
=
os
.
path
.
splitext
(
__file__
)[
0
]
def
skip_ci
(
func
):
def
skip_ci
(
func
):
...
@@ -36,7 +38,11 @@ class TestDistSeResneXt2x2Async(TestDistBase):
...
@@ -36,7 +38,11 @@ class TestDistSeResneXt2x2Async(TestDistBase):
@
skip_ci
@
skip_ci
def
test_dist_train
(
self
):
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
,
check_error_log
=
True
,
log_name
=
flag_name
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录