未验证 提交 26c3077a 编写于 作者: R Roc 提交者: GitHub

Fix timeout v2 (#53514)

上级 2aedd9db
......@@ -44,6 +44,8 @@ if((WITH_ROCM OR WITH_GPU) AND (LINUX))
test_collective_split_embedding
START_BASH
../dist_test.sh
TIMEOUT
"300"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -85,6 +87,8 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
test_collective_alltoall_single
START_BASH
../dist_test.sh
TIMEOUT
"350"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -112,6 +116,8 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
test_collective_batch_isend_irecv
START_BASH
../dist_test.sh
TIMEOUT
"350"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -176,6 +182,8 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
test_collective_process_group
START_BASH
../dist_test.sh
TIMEOUT
"350"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -202,6 +210,8 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
test_collective_reduce_scatter
START_BASH
../dist_test.sh
TIMEOUT
"350"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -391,6 +401,8 @@ if((WITH_ROCM OR WITH_GPU) AND (LINUX))
test_world_size_and_rank
START_BASH
test_world_size_and_rank.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -414,6 +426,8 @@ if((WITH_ROCM OR WITH_GPU) AND (LINUX))
test_strategy_group
START_BASH
test_strategy_group.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -425,6 +439,8 @@ if((WITH_ROCM OR WITH_GPU) AND (LINUX))
test_orthogonal_strategy
START_BASH
test_orthogonal_strategy.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......
......@@ -37,6 +37,8 @@ if(WITH_NCCL)
test_parallel_margin_cross_entropy
START_BASH
../../dist_test.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -95,6 +97,8 @@ if(WITH_NCCL)
test_parallel_dygraph_mp_layers
START_BASH
../../dist_test.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -120,6 +124,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_dygraph_sharding_stage3_for_eager
START_BASH
../../dist_test.sh
TIMEOUT
"350"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -146,6 +152,8 @@ if((WITH_GPU) AND LOCAL_ALL_PLAT)
test_parallel_dygraph_pipeline_parallel
START_BASH
../../dist_test.sh
TIMEOUT
"500"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -159,6 +167,8 @@ if((WITH_GPU) AND LOCAL_ALL_PLAT)
test_parallel_dygraph_pipeline_parallel_with_virtual_stage
START_BASH
../../dist_test.sh
TIMEOUT
"500"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -173,6 +183,8 @@ if((WITH_GPU) AND LOCAL_ALL_PLAT)
test_parallel_dygraph_pp_adaptor
START_BASH
../../dist_test.sh
TIMEOUT
"500"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -193,6 +205,8 @@ if(WITH_NCCL)
test_parallel_class_center_sample
START_BASH
../../dist_test.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -207,6 +221,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_pipeline
START_BASH
../../dist_test.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -226,6 +242,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_static_model_parallel
START_BASH
../../dist_test.sh
TIMEOUT
"240"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -239,6 +257,8 @@ if(WITH_NCCL)
test_parallel_dygraph_no_sync
START_BASH
../../dist_test.sh
TIMEOUT
"300"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -252,6 +272,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_dygraph_sharding_stage2
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -264,6 +286,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_parallel_dygraph_control_flow
START_BASH
../../dist_test.sh
TIMEOUT
"350"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -288,6 +312,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_hybrid_parallel_inference_helper
START_BASH
../../dist_test.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -365,6 +391,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_parallel_dygraph_sharding_parallel
START_BASH
../../dist_test.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -378,6 +406,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_parallel_dygraph_tensor_parallel
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -391,6 +421,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_dygraph_group_sharded_api_for_eager
START_BASH
../../dist_test.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -417,6 +449,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_parallel_dygraph_unused_variables
START_BASH
../../dist_test.sh
TIMEOUT
"350"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -442,6 +476,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_parallel_dygraph_no_sync_gradient_check
START_BASH
../../dist_test.sh
TIMEOUT
"60"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -472,6 +508,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_parallel_dygraph_qat
START_BASH
../../dist_test.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -486,6 +524,8 @@ if(WITH_NCCL)
test_parallel_dygraph_sparse_embedding
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -501,6 +541,8 @@ if((WITH_ROCM) AND LOCAL_ALL_PLAT)
test_parallel_dygraph_sparse_embedding
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -521,6 +563,8 @@ if(WITH_NCCL)
test_parallel_dygraph_sparse_embedding_over_height
START_BASH
../../dist_test.sh
TIMEOUT
"150"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -536,6 +580,8 @@ if((WITH_ROCM) AND LOCAL_ALL_PLAT)
test_parallel_dygraph_sparse_embedding_over_height
START_BASH
../../dist_test.sh
TIMEOUT
"350"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -554,6 +600,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_auto_parallel_parallelizer
START_BASH
../../dist_test.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -587,6 +635,8 @@ if((WITH_GPU OR WITH_XPU) AND (LINUX))
test_c_comm_init_op
START_BASH
test_c_comm_init_op.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -598,6 +648,8 @@ if((WITH_GPU) AND (LINUX))
test_fused_attention_pass_with_mp
START_BASH
test_fused_attention_pass_with_mp.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -610,6 +662,8 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
test_ir_pass_pipeline
START_BASH
../../dist_test.sh
TIMEOUT
"120"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -622,6 +676,8 @@ if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
test_parallel_dygraph_mnist
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -634,6 +690,8 @@ if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
test_parallel_dygraph_se_resnext
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=DIST"
ENVS
......@@ -704,6 +762,8 @@ if(LOCAL_ALL_ARCH AND (LINUX))
test_auto_checkpoint
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=EXCLUSIVE:NIGHTLY"
ENVS
......@@ -716,6 +776,8 @@ if(LOCAL_ALL_ARCH AND (LINUX))
test_auto_checkpoint1
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=EXCLUSIVE:NIGHTLY"
ENVS
......@@ -728,6 +790,8 @@ if(LOCAL_ALL_ARCH AND (LINUX))
test_auto_checkpoint2
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=EXCLUSIVE:NIGHTLY"
ENVS
......@@ -740,6 +804,8 @@ if(LOCAL_ALL_ARCH AND (LINUX))
test_auto_checkpoint3
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=EXCLUSIVE:NIGHTLY"
ENVS
......@@ -752,6 +818,8 @@ if(LOCAL_ALL_ARCH AND (LINUX))
test_auto_checkpoint_multiple
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=EXCLUSIVE:NIGHTLY"
ENVS
......@@ -764,6 +832,8 @@ if(LOCAL_ALL_ARCH AND (LINUX))
test_auto_checkpoint_dist_basic
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=EXCLUSIVE:NIGHTLY"
ENVS
......@@ -776,6 +846,8 @@ if(LOCAL_ALL_ARCH AND (LINUX))
test_hdfs1
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=EXCLUSIVE:NIGHTLY"
ENVS
......@@ -788,6 +860,8 @@ if(LOCAL_ALL_ARCH AND (LINUX))
test_hdfs2
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=EXCLUSIVE:NIGHTLY"
ENVS
......@@ -800,6 +874,8 @@ if(LOCAL_ALL_ARCH AND (LINUX))
test_hdfs3
START_BASH
../../dist_test.sh
TIMEOUT
"200"
LABELS
"RUN_TYPE=EXCLUSIVE:NIGHTLY"
ENVS
......
......@@ -97,6 +97,8 @@ def _proccess_archs(arch):
arch = arch.upper().strip()
if len(arch) > 0:
for a in arch.split(";"):
if '' == a:
continue
assert a in [
"GPU",
"ROCM",
......@@ -372,11 +374,12 @@ class DistUTPortManager:
class CMakeGenerator:
def __init__(self, current_dirs, ignore_dirs):
def __init__(self, current_dirs, only_check, ignore_dirs):
self.processed_dirs = set()
self.port_manager = DistUTPortManager(ignore_dirs)
self.current_dirs = _norm_dirs(current_dirs)
self.modified_or_created_files = []
self._only_check = only_check
def prepare_dist_ut_port(self):
for c in self._find_root_dirs():
......@@ -457,6 +460,10 @@ class CMakeGenerator:
for c in conditions:
cmd += f"if ({c})\n"
time_out_str = (
f' TIMEOUT "{timeout}"' if len(timeout.strip()) > 0 else ''
)
if launcher[-3:] == ".sh":
run_type = _process_run_type(run_type)
dist_ut_port = self.port_manager.process_dist_port_num(num_port)
......@@ -466,6 +473,7 @@ class CMakeGenerator:
{name}
START_BASH
{launcher}
{time_out_str}
LABELS
"RUN_TYPE={run_type}"
ENVS
......@@ -492,9 +500,6 @@ class CMakeGenerator:
run_type_str = (
"" if len(run_type) == 0 else f' LABELS "RUN_TYPE={run_type}"'
)
time_out_str = (
f' TIMEOUT "{timeout}"' if len(timeout.strip()) > 0 else ''
)
run_serial_str = (
f' RUN_SERIAL {run_serial}' if len(run_serial) > 0 else ''
)
......@@ -562,8 +567,13 @@ class CMakeGenerator:
# check whether the generated file are thge same with the existing file, ignoring the blank chars
# if the are same, skip the weiting process
with open(f"{current_work_dir}/CMakeLists.txt", "r") as old_cmake_file:
char_seq = old_cmake_file.read().split()
if os.path.isfile(f"{current_work_dir}/CMakeLists.txt"):
with open(
f"{current_work_dir}/CMakeLists.txt", "r"
) as old_cmake_file:
char_seq = old_cmake_file.read().split()
else:
char_seq = []
char_seq = "".join(char_seq)
if char_seq != "".join(cmds.split()):
......@@ -574,8 +584,11 @@ class CMakeGenerator:
self.modified_or_created_files.append(
f"{current_work_dir}/CMakeLists.txt"
)
with open(f"{current_work_dir}/CMakeLists.txt", "w") as cmake_file:
print(cmds, end="", file=cmake_file)
if not self._only_check:
with open(
f"{current_work_dir}/CMakeLists.txt", "w"
) as cmake_file:
print(cmds, end="", file=cmake_file)
if __name__ == "__main__":
......@@ -607,6 +620,14 @@ if __name__ == "__main__":
nargs='*',
help="To keep dist ports the same with old version cmake, old cmakelists.txt files are needed to parse dist_ports. If a directories are newly created and there is no cmakelists.txt file, the directory path must be specified by this option. The dirs are not recursive.",
)
parser.add_argument(
"--only-check-changed",
'-o',
type=lambda x: x.lower() not in ["false", "0", "off"],
required=False,
default=False,
help="Only check wheather the CMake files should be rewriten, do not write it enven if it should be write",
)
args = parser.parse_args()
assert not (
......@@ -624,7 +645,9 @@ if __name__ == "__main__":
if len(args.dirpaths) >= 1:
current_work_dirs = current_work_dirs + list(args.dirpaths)
cmake_generator = CMakeGenerator(current_work_dirs, args.ignore_cmake_dirs)
cmake_generator = CMakeGenerator(
current_work_dirs, args.only_check_changed, args.ignore_cmake_dirs
)
cmake_generator.prepare_dist_ut_port()
created = cmake_generator.parse_csvs()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册