提交 0fe16539 编写于 作者: W WangXi 提交者: gongweibao

Fix dgc & launch tests in cpu ci (#21759)

上级 94d72752
...@@ -257,14 +257,23 @@ if(WITH_DISTRIBUTE) ...@@ -257,14 +257,23 @@ if(WITH_DISTRIBUTE)
py_test_modules(test_recv_save_op MODULES test_recv_save_op ENVS ${dist_ENVS}) py_test_modules(test_recv_save_op MODULES test_recv_save_op ENVS ${dist_ENVS})
py_test_modules(test_transpiler_ops MODULES test_transpiler_ops ENVS ${dist_ENVS}) py_test_modules(test_transpiler_ops MODULES test_transpiler_ops ENVS ${dist_ENVS})
if(WITH_DGC) if(WITH_DGC)
# if with dgc, test all dgc tests.
# NOTE. dist dgc tests is already in DIST_TEST_OPS
py_test_modules(test_dgc_op MODULES test_dgc_op) py_test_modules(test_dgc_op MODULES test_dgc_op)
py_test_modules(test_dgc_momentum_op MODULES test_dgc_momentum_op) py_test_modules(test_dgc_momentum_op MODULES test_dgc_momentum_op)
py_test_modules(test_dgc_optimizer MODULES test_dgc_optimizer) py_test_modules(test_dgc_optimizer MODULES test_dgc_optimizer)
else()
# if not with dgc, must close all dgc tests
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl")
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_se_resnext_dgc")
endif() endif()
if(NOT APPLE) if(NOT APPLE)
bash_test_modules(test_listen_and_serv_op MODULES test_listen_and_serv.sh) bash_test_modules(test_listen_and_serv_op MODULES test_listen_and_serv.sh)
bash_test_modules(test_launch MODULES test_launch.sh) if(WITH_GPU)
bash_test_modules(test_launch_ps MODULES test_launch_ps.sh) # NOTE. test_launch only work in gpu collective mode
bash_test_modules(test_launch MODULES test_launch.sh)
endif()
bash_test_modules(test_launch_ps MODULES test_launch_ps.sh)
set(dist_ut_port 1000) set(dist_ut_port 1000)
foreach(TEST_OP ${DIST_TEST_OPS}) foreach(TEST_OP ${DIST_TEST_OPS})
......
...@@ -49,13 +49,15 @@ class TestDistMnistNCCL2DGC(TestDistBase): ...@@ -49,13 +49,15 @@ class TestDistMnistNCCL2DGC(TestDistBase):
log_name=flag_name) log_name=flag_name)
def tearDown(self): def tearDown(self):
result = count_of_sparse_all_reduce_calls( import paddle.fluid as fluid
'test_dist_mnist_dgc_nccl_tr0_err.log') if fluid.core.is_compiled_with_cuda():
# only 1 layer use dgc now, run_step=5, rampup_begin_step=2, so 1 * (5 - 2) = 3 result = count_of_sparse_all_reduce_calls(
'test_dist_mnist_dgc_nccl_tr0_err.log')
# only 1 layer use dgc now, run_step=5, rampup_begin_step=2, so 1 * (5 - 2) = 3
# temp close this test. In python3 CI, the log is right, but the result # temp close this test. In python3 CI, the log is right, but the result
# has a problem, may be in multi process mode, log is not writed in time. # has a problem, may be in multi process mode, log is not writed in time.
# self.assertEqual(result, 3) # self.assertEqual(result, 3)
class TestDistMnistNCCL2DGCMultiCards(TestDistBase): class TestDistMnistNCCL2DGCMultiCards(TestDistBase):
...@@ -76,10 +78,12 @@ class TestDistMnistNCCL2DGCMultiCards(TestDistBase): ...@@ -76,10 +78,12 @@ class TestDistMnistNCCL2DGCMultiCards(TestDistBase):
log_name=flag_name) log_name=flag_name)
def tearDown(self): def tearDown(self):
result = count_of_sparse_all_reduce_calls( import paddle.fluid as fluid
'test_dist_mnist_dgc_nccl_dgc_2cards_local.log') if fluid.core.is_compiled_with_cuda():
# same as above, but use two cards result = count_of_sparse_all_reduce_calls(
self.assertEqual(result, 6) 'test_dist_mnist_dgc_nccl_dgc_2cards_local.log')
# same as above, but use two cards
self.assertEqual(result, 6)
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册