From b5882c6e66ff013d538373915cdcca7a889a1bdc Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Tue, 18 May 2021 13:59:18 +0800 Subject: [PATCH] [UnitTest]Enhance grep syntax to avoid random failed of test_dist_mnist_dgc_nccl (#32946) * Enhance grep syntax to avoid random failed * Enhance grep syntax to avoid random failed --- .../fluid/tests/unittests/test_dist_mnist_dgc_nccl.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py index 9bc48ac0a1b..eae19afb2ef 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py @@ -25,12 +25,15 @@ flag_name = os.path.splitext(__file__)[0] def count_of_sparse_all_reduce_calls(file_name): - cmd = 'grep sparse_all_reduce_op_handle ' + file_name + ' | grep in_numel | wc -l' + # NOTE(Aurelius84): The log file contains some binary contents that causes error + # while `grep`. So we add `-a` to fix it. + # -a, --text equivalent to --binary-files=text, make binaries equivalent to text. + cmd = 'grep -a sparse_all_reduce_op_handle ' + file_name + ' | grep in_numel | wc -l' child = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) result = child.communicate()[0] print('test_info: result = ' + str(result)) - # note. in python3, result is b'num', != 'num' + # NOTE: in python3, result is b'num', != 'num' return int(result) @@ -59,7 +62,7 @@ class TestDistMnistNCCL2DGC(TestDistBase): # only 1 layer use dgc now, run_step=5, rampup_begin_step=2, so 1 * (5 - 2) = 3 # temp close this test. In python3 CI, the log is right, but the result - # has a problem, may be in multi process mode, log is not writed in time. + # has a problem, may be in multi process mode, log is not written in time. # self.assertEqual(result, 3) -- GitLab