From 273f58a3c5cbd805ba4c2e4524d950cd34bb6674 Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Thu, 24 Sep 2020 16:11:10 +0800 Subject: [PATCH] Decrease Random Failure Probability for test_parallel_executor_mnist, test=develop (#27498) As the title, decrease random failure probability for test_parallel_executor_mnist The old code set larger delta when comparing reduce and all reduce, but didn't set all. I added it. On my linux machine, I run 100 times, no failure occurs. In addition, we only saw this random failure on CI two times since I worked. I thought it was rare and I just increased the delta. --- .../fluid/tests/unittests/test_parallel_executor_mnist.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py index a2f1d774608..da7e30ff106 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py @@ -124,8 +124,10 @@ class TestMNIST(TestParallelExecutorBase): def test_simple_fc_with_new_strategy(self): # use_cuda, use_reduce - self._compare_reduce_and_allreduce(simple_fc_net, True) - self._compare_reduce_and_allreduce(simple_fc_net, False) + # NOTE: the computation result of nccl_reduce is non-deterministic, + # related issue: https://github.com/NVIDIA/nccl/issues/157 + self._compare_reduce_and_allreduce(simple_fc_net, True, 1e-5, 1e-2) + self._compare_reduce_and_allreduce(simple_fc_net, False, 1e-5, 1e-2) def check_simple_fc_parallel_accuracy(self, use_cuda): if use_cuda and not core.is_compiled_with_cuda(): @@ -179,7 +181,7 @@ class TestMNIST(TestParallelExecutorBase): # NOTE: the computation result of nccl_reduce is non-deterministic, # related issue: https://github.com/NVIDIA/nccl/issues/157 self._compare_reduce_and_allreduce(fc_with_batchnorm, True, 1e-5, 1e-2) - self._compare_reduce_and_allreduce(fc_with_batchnorm, False) + self._compare_reduce_and_allreduce(fc_with_batchnorm, False, 1e-5, 1e-2) if __name__ == '__main__': -- GitLab