From e3a67782a7bb27505c9dd67515f4dcdfc16c513a Mon Sep 17 00:00:00 2001 From: Haohongxiang <86215757+haohongxiang@users.noreply.github.com> Date: Thu, 17 Mar 2022 11:00:29 +0800 Subject: [PATCH] add time of unittests for dataparallel in dygraph mode (#40639) --- .../fluid/tests/unittests/CMakeLists.txt | 4 ++-- ...llel_dygraph_dataparallel_in_eager_mode.py | 20 ++++++++++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index cbe360f556..c82172780b 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1118,9 +1118,9 @@ set_tests_properties(test_cumprod_op PROPERTIES TIMEOUT 120) set_tests_properties(test_split_program PROPERTIES TIMEOUT 120) if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL) set_tests_properties(test_parallel_dygraph_dataparallel PROPERTIES TIMEOUT 120) - set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 120) + set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 150) set_tests_properties(test_parallel_dygraph_control_flow PROPERTIES TIMEOUT 200) - set_tests_properties(test_parallel_dygraph_no_sync PROPERTIES TIMEOUT 120) + set_tests_properties(test_parallel_dygraph_no_sync PROPERTIES TIMEOUT 150) set_tests_properties(test_parallel_dygraph_no_sync_gradient_check PROPERTIES TIMEOUT 30) set_tests_properties(test_parallel_dygraph_pipeline_parallel PROPERTIES TIMEOUT 200) set_tests_properties(test_parallel_dygraph_tensor_parallel PROPERTIES TIMEOUT 200) diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_in_eager_mode.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_in_eager_mode.py index 8ff68a1ce0..91c340c35d 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_in_eager_mode.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_in_eager_mode.py @@ -19,6 +19,7 @@ import unittest import os import numpy as np import random +import socket import paddle import paddle.nn as nn @@ -31,13 +32,26 @@ from paddle.optimizer import SGD from paddle.fluid.initializer import NumpyArrayInitializer +def net_is_used(port, ip='127.0.0.1'): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + s.connect((ip, port)) + s.shutdown(2) + return True + except Exception as e: + return False + + def init_process_group(strategy=None): nranks = ParallelEnv().nranks rank = ParallelEnv().local_rank is_master = True if rank == 0 else False - store = paddle.fluid.core.TCPStore("127.0.0.1", 6172, is_master, nranks) - group = core.ProcessGroupNCCL(store, rank, nranks) - return group + for port in range(20000, 21000): + if not net_is_used(port): + store = paddle.fluid.core.TCPStore("127.0.0.1", port, is_master, + nranks) + group = core.ProcessGroupNCCL(store, rank, nranks) + return group class LinearModel(nn.Layer): -- GitLab