提交 0fe16539 编写于 作者: W WangXi 提交者: gongweibao

Fix dgc & launch tests in cpu ci (#21759)

上级 94d72752
......@@ -257,14 +257,23 @@ if(WITH_DISTRIBUTE)
py_test_modules(test_recv_save_op MODULES test_recv_save_op ENVS ${dist_ENVS})
py_test_modules(test_transpiler_ops MODULES test_transpiler_ops ENVS ${dist_ENVS})
if(WITH_DGC)
# if with dgc, test all dgc tests.
# NOTE. dist dgc tests is already in DIST_TEST_OPS
py_test_modules(test_dgc_op MODULES test_dgc_op)
py_test_modules(test_dgc_momentum_op MODULES test_dgc_momentum_op)
py_test_modules(test_dgc_optimizer MODULES test_dgc_optimizer)
else()
# if not with dgc, must close all dgc tests
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl")
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_se_resnext_dgc")
endif()
if(NOT APPLE)
bash_test_modules(test_listen_and_serv_op MODULES test_listen_and_serv.sh)
bash_test_modules(test_launch MODULES test_launch.sh)
bash_test_modules(test_launch_ps MODULES test_launch_ps.sh)
if(WITH_GPU)
# NOTE. test_launch only work in gpu collective mode
bash_test_modules(test_launch MODULES test_launch.sh)
endif()
bash_test_modules(test_launch_ps MODULES test_launch_ps.sh)
set(dist_ut_port 1000)
foreach(TEST_OP ${DIST_TEST_OPS})
......
......@@ -49,13 +49,15 @@ class TestDistMnistNCCL2DGC(TestDistBase):
log_name=flag_name)
def tearDown(self):
result = count_of_sparse_all_reduce_calls(
'test_dist_mnist_dgc_nccl_tr0_err.log')
# only 1 layer use dgc now, run_step=5, rampup_begin_step=2, so 1 * (5 - 2) = 3
import paddle.fluid as fluid
if fluid.core.is_compiled_with_cuda():
result = count_of_sparse_all_reduce_calls(
'test_dist_mnist_dgc_nccl_tr0_err.log')
# only 1 layer use dgc now, run_step=5, rampup_begin_step=2, so 1 * (5 - 2) = 3
# temp close this test. In python3 CI, the log is right, but the result
# has a problem, may be in multi process mode, log is not writed in time.
# self.assertEqual(result, 3)
# temp close this test. In python3 CI, the log is right, but the result
# has a problem, may be in multi process mode, log is not writed in time.
# self.assertEqual(result, 3)
class TestDistMnistNCCL2DGCMultiCards(TestDistBase):
......@@ -76,10 +78,12 @@ class TestDistMnistNCCL2DGCMultiCards(TestDistBase):
log_name=flag_name)
def tearDown(self):
result = count_of_sparse_all_reduce_calls(
'test_dist_mnist_dgc_nccl_dgc_2cards_local.log')
# same as above, but use two cards
self.assertEqual(result, 6)
import paddle.fluid as fluid
if fluid.core.is_compiled_with_cuda():
result = count_of_sparse_all_reduce_calls(
'test_dist_mnist_dgc_nccl_dgc_2cards_local.log')
# same as above, but use two cards
self.assertEqual(result, 6)
if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册