提交 d3fed5b8 编写于 作者: L lelelelelez

increase parallel tests;notest;test=coverage;test=py3

上级 af886995
...@@ -1030,6 +1030,7 @@ function get_quickly_disable_ut() { ...@@ -1030,6 +1030,7 @@ function get_quickly_disable_ut() {
function card_test() { function card_test() {
set -m set -m
CTEST_PARALLEL_LEVEL=2
case_count $1 $2 case_count $1 $2
ut_startTime_s=`date +%s` ut_startTime_s=`date +%s`
...@@ -1098,10 +1099,8 @@ function card_test() { ...@@ -1098,10 +1099,8 @@ function card_test() {
ut_endTime_s=`date +%s` ut_endTime_s=`date +%s`
if (( $2 == -1 )); then if (( $2 == -1 )); then
echo "exclusive TestCases Total Time: $[ $ut_endTime_s - $ut_startTime_s ]s" echo "exclusive TestCases Total Time: $[ $ut_endTime_s - $ut_startTime_s ]s"
echo "ipipe_log_param_Exclusive_TestCases_Total_Time: $[ $ut_endTime_s - $ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
else else
echo "$2 card TestCases Total Time: $[ $ut_endTime_s - $ut_startTime_s ]s" echo "$2 card TestCases Total Time: $[ $ut_endTime_s - $ut_startTime_s ]s"
echo "ipipe_log_param_${2}_Cards_TestCases_Total_Time: $[ $ut_endTime_s - $ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
fi fi
set +m set +m
} }
...@@ -1153,13 +1152,17 @@ set +x ...@@ -1153,13 +1152,17 @@ set +x
test_cases=$(ctest -N -V) # get all test cases test_cases=$(ctest -N -V) # get all test cases
# Note(zhouwei): Parallel runs are relative to 'CTEST_PARALLEL_LEVEL', e.g: '4 job each time' means 4*CTEST_PARALLEL_LEVEL # Note(zhouwei): Parallel runs are relative to 'CTEST_PARALLEL_LEVEL', e.g: '4 job each time' means 4*CTEST_PARALLEL_LEVEL
single_card_tests_high_parallel='^job$' # cases list which would run the most job each time with single GPU single_card_tests_high_parallel='^job$' # cases list which would run the most job each time with single GPU
single_card_tests_two_parallel='^job$' # cases list which would run 2 job each time with single GPU single_card_tests_secondary_high_parallel='^job$'
single_card_tests_secondary_high_parallel_1='^job$'
single_card_tests_tetrad_parallel='^job$' # cases list which would run 2 job each time with single GPU
#single_card_tests_secondary_tetrad_parallel='^job$'
single_card_tests_non_parallel='^job$' # cases list which would run 1 job each time with single GPU single_card_tests_non_parallel='^job$' # cases list which would run 1 job each time with single GPU
single_card_tests='^job$' # all cases list which would take single GPU single_card_tests='^job$' # all cases list which would take single GPU
multiple_card_tests_two_parallel='^job$' # cases list which would run 2 job each time with multiple GPUs, most cases would be two GPUs multiple_card_tests_two_parallel='^job$' # cases list which would run 2 job each time with multiple GPUs, most cases would be two GPUs
multiple_card_tests_non_parallel='^job$' # cases list which would run 1 job each time with multiple GPUs, most cases would be two GPUs multiple_card_tests_non_parallel='^job$' # cases list which would run 1 job each time with multiple GPUs, most cases would be two GPUs
exclusive_tests_high_parallel='^job$'
exclusive_tests_two_parallel='^job$' # cases list which would run 2 job exclusively(with all GPUs) exclusive_tests_two_parallel='^job$' # cases list which would run 2 job exclusively(with all GPUs)
exclusive_tests_non_parallel='^job$' # cases list which would run 1 job exclusively(with all GPUs) exclusive_tests_non_parallel='^job$' # cases list which would run 1 job exclusively(with all GPUs)
...@@ -1171,9 +1174,12 @@ set +x ...@@ -1171,9 +1174,12 @@ set +x
UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d') UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d')
output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}") output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}")
cpu_parallel_job=$(echo $output | cut -d ";" -f 1) cpu_parallel_job=$(echo $output | cut -d ";" -f 1)
tetrad_parallel_job=$(echo $output | cut -d ";" -f 2) secondary_cpu_parallel_job=$(echo $output | cut -d ";" -f 2)
two_parallel_job=$(echo $output | cut -d ";" -f 3) secondary_cpu_parallel_job_1=$(echo $output | cut -d ";" -f 3)
non_parallel_job=$(echo $output | cut -d ";" -f 4) tetrad_parallel_job=$(echo $output | cut -d ";" -f 4)
#secondary_tetrad_parallel_job=$(echo $output | cut -d ";" -f 5)
two_parallel_job=$(echo $output | cut -d ";" -f 5)
non_parallel_job=$(echo $output | cut -d ";" -f 6)
while read -r line; do while read -r line; do
if [[ "$line" == "" ]]; then if [[ "$line" == "" ]]; then
continue continue
...@@ -1215,13 +1221,15 @@ set +x ...@@ -1215,13 +1221,15 @@ set +x
fi fi
if [[ "$is_exclusive" != "" ]]; then if [[ "$is_exclusive" != "" ]]; then
if [[ $(echo $cpu_parallel_job$tetrad_parallel_job$two_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then if [[ $(echo $cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then
exclusive_tests_high_parallel="$exclusive_tests_high_parallel|^$testcase$"
elif [[ $(echo $tetrad_parallel_job$two_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then
exclusive_tests_two_parallel="$exclusive_tests_two_parallel|^$testcase$" exclusive_tests_two_parallel="$exclusive_tests_two_parallel|^$testcase$"
else else
exclusive_tests_non_parallel="$exclusive_tests_non_parallel|^$testcase$" exclusive_tests_non_parallel="$exclusive_tests_non_parallel|^$testcase$"
fi fi
elif [[ "$is_multicard" != "" ]]; then elif [[ "$is_multicard" != "" ]]; then
if [[ $(echo $cpu_parallel_job$tetrad_parallel_job$two_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then if [[ $(echo $cpu_parallel_job$tetrad_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then
multiple_card_tests_two_parallel="$multiple_card_tests_two_parallel|^$testcase$" multiple_card_tests_two_parallel="$multiple_card_tests_two_parallel|^$testcase$"
else else
multiple_card_tests_non_parallel="$multiple_card_tests_non_parallel|^$testcase$" multiple_card_tests_non_parallel="$multiple_card_tests_non_parallel|^$testcase$"
...@@ -1229,8 +1237,14 @@ set +x ...@@ -1229,8 +1237,14 @@ set +x
else else
if [[ $(echo $cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then if [[ $(echo $cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then
single_card_tests_high_parallel="$single_card_tests_high_parallel|^$testcase$" single_card_tests_high_parallel="$single_card_tests_high_parallel|^$testcase$"
elif [[ $(echo $secondary_cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then
single_card_tests_secondary_high_parallel="$single_card_tests_secondary_high_parallel|^$testcase$"
elif [[ $(echo $secondary_cpu_parallel_job_1 | grep -o "\^$testcase\\$") != "" ]]; then
single_card_tests_secondary_high_parallel_1="$single_card_tests_secondary_high_parallel_1|^$testcase$"
elif [[ $(echo $tetrad_parallel_job$two_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then elif [[ $(echo $tetrad_parallel_job$two_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then
single_card_tests_two_parallel="$single_card_tests_two_parallel|^$testcase$" single_card_tests_tetrad_parallel="$single_card_tests_tetrad_parallel|^$testcase$"
#elif [[ $(echo $secondary_tetrad_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then
# single_card_tests_secondary_tetrad_parallel="$single_card_tests_secondary_tetrad_parallel|^$testcase$"
else else
single_card_tests_non_parallel="$single_card_tests_non_parallel|^$testcase$" single_card_tests_non_parallel="$single_card_tests_non_parallel|^$testcase$"
fi fi
...@@ -1243,23 +1257,43 @@ set +x ...@@ -1243,23 +1257,43 @@ set +x
testcase='' testcase=''
done <<< "$test_cases"; done <<< "$test_cases";
card_test "$single_card_tests_high_parallel" 1 6 # run cases the most each time with single GPU ut_actual_total_startTime_s=`date +%s`
card_test "$single_card_tests_two_parallel" 1 2 # run cases 2 job each time with single GPU
card_test "$single_card_tests_non_parallel" 1 # run cases 1 job each time with single GPU
single_ut_startTime_s=`date +%s`
card_test "$single_card_tests_high_parallel" 1 24 # run cases the most each time with single GPU
card_test "$single_card_tests_secondary_high_parallel" 1 12
card_test "$single_card_tests_secondary_high_parallel_1" 1 15
card_test "$single_card_tests_tetrad_parallel" 1 7 # run cases 2 job each time with single GPU
#####card_test "$single_card_tests_secondary_tetrad_parallel" 1 6
card_test "$single_card_tests_non_parallel" 1 2 # run cases 1 job each time with single GPU
single_ut_endTime_s=`date +%s`
multi_ut_startTime_s=`date +%s`
card_test "$multiple_card_tests_two_parallel" 2 4 # run cases 2 job each time with two GPUs
card_test "$multiple_card_tests_non_parallel" 2 2 # run cases 1 job each time with two GPUs
multi_ut_endTime_s=`date +%s`
exclu_ut_startTime_s=`date +%s`
card_test "$exclusive_tests_high_parallel" -1 5
card_test "$exclusive_tests_two_parallel" -1 3 # run cases exclusively, in this cases would be run with 2/4/8 GPUs
card_test "$exclusive_tests_non_parallel" -1 2 # run cases exclusively, in this cases would be run with 2/4/8 GPUs
exclu_ut_endTime_s=`date +%s`
card_test "$multiple_card_tests_two_parallel" 2 2 # run cases 2 job each time with two GPUs echo "ipipe_log_param_1_TestCases_Total_Time: $[ $single_ut_endTime_s - $single_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
card_test "$multiple_card_tests_non_parallel" 2 # run cases 1 job each time with two GPUs echo "ipipe_log_param_2_TestCases_Total_Time: $[ $multi_ut_endTime_s - $multi_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
echo "ipipe_log_param_Exclusive_TestCases_Total_Time: $[ $exclu_ut_endTime_s - $exclu_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
card_test "$exclusive_tests_two_parallel" -1 2 # run cases exclusively, in this cases would be run with 2/4/8 GPUs
card_test "$exclusive_tests_non_parallel" -1 # run cases exclusively, in this cases would be run with 2/4/8 GPUs
collect_failed_tests collect_failed_tests
rm -f $tmp_dir/* rm -f $tmp_dir/*
exec_times=0 exec_times=0
retry_unittests_record='' retry_unittests_record=''
retry_time=3 retry_time=4
exec_time_array=('first' 'second' 'third') exec_time_array=('first' 'second' 'third' 'fourth')
parallel_failed_tests_exec_retry_threshold=80
exec_retry_threshold=10 exec_retry_threshold=10
is_retry_execuate=0 is_retry_execuate=0
rerun_ut_startTime_s=`date +%s`
if [ -n "$failed_test_lists" ];then if [ -n "$failed_test_lists" ];then
if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then
bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest
...@@ -1268,14 +1302,30 @@ set +x ...@@ -1268,14 +1302,30 @@ set +x
need_retry_ut_arr=(${need_retry_ut_str}) need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]} need_retry_ut_count=${#need_retry_ut_arr[@]}
read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
while ( [ $exec_times -lt $retry_time ] ) while ( [ $exec_times -lt $retry_time ] )
do do
if [[ "${exec_times}" == "0" ]] ;then
if [ $need_retry_ut_count -lt $parallel_failed_tests_exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
elif [[ "${exec_times}" == "1" ]] ;then
read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
fi
if [[ "$is_retry_execuate" == "0" ]];then
set +e set +e
retry_unittests_record="$retry_unittests_record$failed_test_lists" retry_unittests_record="$retry_unittests_record$failed_test_lists"
failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'` failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'`
set -e set -e
if [[ "${exec_times}" == "1" ]];then if [[ "${exec_times}" == "1" ]] || [[ "${exec_times}" == "3" ]];then
if [[ "${failed_test_lists}" == "" ]];then if [[ "${failed_test_lists}" == "" ]];then
break break
else else
...@@ -1287,10 +1337,8 @@ set +x ...@@ -1287,10 +1337,8 @@ set +x
echo "=========================================" echo "========================================="
echo "The following unittest will be re-run:" echo "The following unittest will be re-run:"
echo "${retry_unittests}" echo "${retry_unittests}"
for line in ${retry_unittests[@]} ; for line in ${retry_unittests[@]} ;
do do
read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )" read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )"
read tmp_mul_tmp <<< "$( echo $multiple_card_tests | grep -oEi $line )" read tmp_mul_tmp <<< "$( echo $multiple_card_tests | grep -oEi $line )"
read exclusive_tmp <<< "$( echo $exclusive_tests | grep -oEi $line )" read exclusive_tmp <<< "$( echo $exclusive_tests | grep -oEi $line )"
...@@ -1318,7 +1366,7 @@ set +x ...@@ -1318,7 +1366,7 @@ set +x
done done
if [[ "$one_card_retry" != "" ]]; then if [[ "$one_card_retry" != "" ]]; then
card_test "$one_card_retry" 1 card_test "$one_card_retry" 1 4
fi fi
if [[ "$multiple_card_retry" != "" ]]; then if [[ "$multiple_card_retry" != "" ]]; then
...@@ -1328,7 +1376,6 @@ set +x ...@@ -1328,7 +1376,6 @@ set +x
if [[ "$exclusive_retry" != "" ]]; then if [[ "$exclusive_retry" != "" ]]; then
card_test "$exclusive_retry" -1 card_test "$exclusive_retry" -1
fi fi
exec_times=$[$exec_times+1] exec_times=$[$exec_times+1]
failed_test_lists='' failed_test_lists=''
collect_failed_tests collect_failed_tests
...@@ -1336,13 +1383,14 @@ set +x ...@@ -1336,13 +1383,14 @@ set +x
one_card_retry='' one_card_retry=''
multiple_card_retry='' multiple_card_retry=''
exclusive_retry='' exclusive_retry=''
done
else
# There are more than 10 failed unit tests, so no unit test retry
is_retry_execuate=1
fi fi
done
fi fi
rerun_ut_endTime_s=`date +%s`
echo "ipipe_log_param_Rerun_TestCases_Total_Time: $[ $rerun_ut_endTime_s - $rerun_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
ut_actual_total_endTime_s=`date +%s`
echo "ipipe_log_param_actual_TestCases_Total_Time: $[ $ut_actual_total_endTime_s - $ut_actual_total_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
if [[ "$EXIT_CODE" != "0" ]]; then if [[ "$EXIT_CODE" != "0" ]]; then
show_ut_retry_result show_ut_retry_result
fi fi
...@@ -1351,7 +1399,20 @@ set -ex ...@@ -1351,7 +1399,20 @@ set -ex
} }
function show_ut_retry_result() { function show_ut_retry_result() {
if [[ "$is_retry_execuate" != "0" ]];then if [ "$SYSTEM" == "Darwin" ]; then
exec_retry_threshold_count=10
else
exec_retry_threshold_count=80
fi
if [[ "$is_retry_execuate" != "0" ]] && [[ "${exec_times}" == "0" ]] ;then
failed_test_lists_ult=`echo "${failed_test_lists}" | grep -Po '[^ ].*$'`
echo "========================================="
echo "There are more than ${exec_retry_threshold_count} failed unit tests in parallel test, so no unit test retry!!!"
echo "========================================="
echo "The following tests FAILED: "
echo "${failed_test_lists_ult}"
exit 8;
elif [[ "$is_retry_execuate" != "0" ]] && [[ "${exec_times}" == "1" ]];then
failed_test_lists_ult=`echo "${failed_test_lists}" | grep -Po '[^ ].*$'` failed_test_lists_ult=`echo "${failed_test_lists}" | grep -Po '[^ ].*$'`
echo "=========================================" echo "========================================="
echo "There are more than 10 failed unit tests, so no unit test retry!!!" echo "There are more than 10 failed unit tests, so no unit test retry!!!"
...@@ -2291,8 +2352,8 @@ function main() { ...@@ -2291,8 +2352,8 @@ function main() {
cmake_gen_and_build ${PYTHON_ABI:-""} ${parallel_number} cmake_gen_and_build ${PYTHON_ABI:-""} ${parallel_number}
enable_unused_var_check enable_unused_var_check
parallel_test parallel_test
check_coverage #check_coverage
check_change_of_unittest ${PYTHON_ABI:-""} #check_change_of_unittest ${PYTHON_ABI:-""}
;; ;;
cpu_cicheck_coverage) cpu_cicheck_coverage)
check_approvals_of_unittest 1 check_approvals_of_unittest 1
......
...@@ -206,44 +206,8 @@ for dim_X in (1, 2, 3): ...@@ -206,44 +206,8 @@ for dim_X in (1, 2, 3):
api_test(dim_X, dim_Y, transose_x, transose_y) api_test(dim_X, dim_Y, transose_x, transose_y)
# Test case more batch_size and N, M, K
def generate_compatible_shapes(dim_X, dim_Y, transpose_X, transpose_Y,
batch_size):
BATCH_SIZE = 2
M = 3
N = 4
K = 5
if (dim_X == 1 and transpose_X) or (dim_Y == 1 and transpose_Y):
K = 1
if dim_X == 1:
if transpose_X:
shape_X = [M]
else:
shape_X = [K]
if dim_Y == 1:
if transpose_Y:
shape_Y = [N]
else:
shape_Y = [K]
if dim_X >= 2:
if transpose_X:
shape_X = [K, M]
else:
shape_X = [M, K]
if dim_X == 3:
shape_X = [BATCH_SIZE] + shape_X
if dim_Y >= 2:
if transpose_Y:
shape_Y = [N, K]
else:
shape_Y = [K, N]
if dim_Y == 3:
shape_Y = [BATCH_SIZE] + shape_Y
return shape_X, shape_Y
# Test case n-dim # Test case n-dim
def generate_compatible_shapes(dim, transpose_X, transpose_Y): def generate_compatible_shapes_ndim(dim, transpose_X, transpose_Y):
M = 2 M = 2
N = 4 N = 4
K = 3 K = 3
...@@ -270,7 +234,7 @@ for dim in [4]: ...@@ -270,7 +234,7 @@ for dim in [4]:
test_name = ( test_name = (
'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format( 'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
dim, dim, transpose_X, transpose_Y)) dim, dim, transpose_X, transpose_Y))
shape_X, shape_Y = generate_compatible_shapes(dim, transpose_X, shape_X, shape_Y = generate_compatible_shapes_ndim(dim, transpose_X,
transpose_Y) transpose_Y)
globals()[test_name] = type(test_name, (Generator, OpTest), { globals()[test_name] = type(test_name, (Generator, OpTest), {
'shape_X': shape_X, 'shape_X': shape_X,
......
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册