From 9d66385f8efc21c16fb9bd2cc6b59ebdc62f25b0 Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Mon, 1 Jun 2020 20:29:13 +0800 Subject: [PATCH] [FixUnitTest] Fix a Batch Size Random Unit Test Failure (#24718) The PR: https://github.com/PaddlePaddle/Paddle/pull/24651 seems causes new random failure of unit test test_parallel_executor_seresnext_base_cpu. The reason is that smaller batch size causes random optimization of neural network. I distinguished cpu/gpu batch size to fix the unittest. --- python/paddle/fluid/tests/unittests/seresnext_net.py | 11 +++++++---- .../fluid/tests/unittests/seresnext_test_base.py | 4 ++-- ...est_parallel_executor_seresnext_with_reduce_cpu.py | 8 ++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/seresnext_net.py b/python/paddle/fluid/tests/unittests/seresnext_net.py index c31471efbd..252ffe2c22 100644 --- a/python/paddle/fluid/tests/unittests/seresnext_net.py +++ b/python/paddle/fluid/tests/unittests/seresnext_net.py @@ -172,8 +172,11 @@ def optimizer(learning_rate=0.01): model = SE_ResNeXt50Small -def batch_size(): - return 8 +def batch_size(use_cuda): + if use_cuda: + # Paddle uses 8GB P4 GPU for unittest so we decreased the batch size. + return 8 + return 12 def iter(use_cuda): @@ -183,9 +186,9 @@ def iter(use_cuda): gpu_img, gpu_label = init_data( - batch_size=batch_size(), img_shape=img_shape, label_range=999) + batch_size=batch_size(use_cuda=True), img_shape=img_shape, label_range=999) cpu_img, cpu_label = init_data( - batch_size=batch_size(), img_shape=img_shape, label_range=999) + batch_size=batch_size(use_cuda=False), img_shape=img_shape, label_range=999) feed_dict_gpu = {"image": gpu_img, "label": gpu_label} feed_dict_cpu = {"image": cpu_img, "label": cpu_label} diff --git a/python/paddle/fluid/tests/unittests/seresnext_test_base.py b/python/paddle/fluid/tests/unittests/seresnext_test_base.py index 65879d39d9..9f055191b1 100644 --- a/python/paddle/fluid/tests/unittests/seresnext_test_base.py +++ b/python/paddle/fluid/tests/unittests/seresnext_test_base.py @@ -32,7 +32,7 @@ class TestResnetBase(TestParallelExecutorBase): seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_cuda), iter=seresnext_net.iter(use_cuda), - batch_size=seresnext_net.batch_size(), + batch_size=seresnext_net.batch_size(use_cuda), use_cuda=use_cuda, use_reduce=False, optimizer=seresnext_net.optimizer) @@ -41,7 +41,7 @@ class TestResnetBase(TestParallelExecutorBase): seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_cuda), iter=seresnext_net.iter(use_cuda), - batch_size=seresnext_net.batch_size(), + batch_size=seresnext_net.batch_size(use_cuda), use_cuda=use_cuda) if compare_seperately: diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py index 74c5999c4f..62eb7e1155 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py @@ -28,7 +28,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_cuda), iter=seresnext_net.iter(use_cuda), - batch_size=seresnext_net.batch_size(), + batch_size=seresnext_net.batch_size(use_cuda), use_cuda=use_cuda, use_reduce=False, optimizer=seresnext_net.optimizer) @@ -36,7 +36,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_cuda), iter=seresnext_net.iter(use_cuda), - batch_size=seresnext_net.batch_size(), + batch_size=seresnext_net.batch_size(use_cuda), use_cuda=use_cuda, use_reduce=True, optimizer=seresnext_net.optimizer) @@ -53,7 +53,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_cuda), iter=seresnext_net.iter(use_cuda), - batch_size=seresnext_net.batch_size(), + batch_size=seresnext_net.batch_size(use_cuda), use_cuda=use_cuda, use_reduce=False, optimizer=seresnext_net.optimizer, @@ -63,7 +63,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_cuda), iter=seresnext_net.iter(use_cuda), - batch_size=seresnext_net.batch_size(), + batch_size=seresnext_net.batch_size(use_cuda), use_cuda=use_cuda, use_reduce=True, optimizer=seresnext_net.optimizer, -- GitLab