未验证 提交 e3a67782 编写于 作者: H Haohongxiang 提交者: GitHub

add time of unittests for dataparallel in dygraph mode (#40639)

上级 ade72108
...@@ -1118,9 +1118,9 @@ set_tests_properties(test_cumprod_op PROPERTIES TIMEOUT 120) ...@@ -1118,9 +1118,9 @@ set_tests_properties(test_cumprod_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_split_program PROPERTIES TIMEOUT 120) set_tests_properties(test_split_program PROPERTIES TIMEOUT 120)
if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL) if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL)
set_tests_properties(test_parallel_dygraph_dataparallel PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_dygraph_dataparallel PROPERTIES TIMEOUT 120)
set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 150)
set_tests_properties(test_parallel_dygraph_control_flow PROPERTIES TIMEOUT 200) set_tests_properties(test_parallel_dygraph_control_flow PROPERTIES TIMEOUT 200)
set_tests_properties(test_parallel_dygraph_no_sync PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_dygraph_no_sync PROPERTIES TIMEOUT 150)
set_tests_properties(test_parallel_dygraph_no_sync_gradient_check PROPERTIES TIMEOUT 30) set_tests_properties(test_parallel_dygraph_no_sync_gradient_check PROPERTIES TIMEOUT 30)
set_tests_properties(test_parallel_dygraph_pipeline_parallel PROPERTIES TIMEOUT 200) set_tests_properties(test_parallel_dygraph_pipeline_parallel PROPERTIES TIMEOUT 200)
set_tests_properties(test_parallel_dygraph_tensor_parallel PROPERTIES TIMEOUT 200) set_tests_properties(test_parallel_dygraph_tensor_parallel PROPERTIES TIMEOUT 200)
......
...@@ -19,6 +19,7 @@ import unittest ...@@ -19,6 +19,7 @@ import unittest
import os import os
import numpy as np import numpy as np
import random import random
import socket
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
...@@ -31,13 +32,26 @@ from paddle.optimizer import SGD ...@@ -31,13 +32,26 @@ from paddle.optimizer import SGD
from paddle.fluid.initializer import NumpyArrayInitializer from paddle.fluid.initializer import NumpyArrayInitializer
def net_is_used(port, ip='127.0.0.1'):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect((ip, port))
s.shutdown(2)
return True
except Exception as e:
return False
def init_process_group(strategy=None): def init_process_group(strategy=None):
nranks = ParallelEnv().nranks nranks = ParallelEnv().nranks
rank = ParallelEnv().local_rank rank = ParallelEnv().local_rank
is_master = True if rank == 0 else False is_master = True if rank == 0 else False
store = paddle.fluid.core.TCPStore("127.0.0.1", 6172, is_master, nranks) for port in range(20000, 21000):
group = core.ProcessGroupNCCL(store, rank, nranks) if not net_is_used(port):
return group store = paddle.fluid.core.TCPStore("127.0.0.1", port, is_master,
nranks)
group = core.ProcessGroupNCCL(store, rank, nranks)
return group
class LinearModel(nn.Layer): class LinearModel(nn.Layer):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册