未验证 提交 9d985ca1 编写于 作者: Q Qi Li 提交者: GitHub

[NPU] add unit test retry for NPU UT, test=develop (#34443)

上级 b9d6c987
...@@ -1627,7 +1627,6 @@ function parallel_test_base_npu() { ...@@ -1627,7 +1627,6 @@ function parallel_test_base_npu() {
EOF EOF
set +x set +x
ut_startTime_s=`date +%s`
test_cases=$(ctest -N -V) # get all test cases test_cases=$(ctest -N -V) # get all test cases
get_quickly_disable_ut||disable_ut_quickly='' # indicate whether the case was in quickly disable list get_quickly_disable_ut||disable_ut_quickly='' # indicate whether the case was in quickly disable list
while read -r line; do while read -r line; do
...@@ -1643,12 +1642,73 @@ set +x ...@@ -1643,12 +1642,73 @@ set +x
done <<< "$test_cases"; done <<< "$test_cases";
card_test "$single_card_tests" 1 card_test "$single_card_tests" 1
collect_failed_tests collect_failed_tests
set -x # add unit test retry for NPU
ut_endTime_s=`date +%s` rm -f $tmp_dir/*
echo "NPU testCase Time: $[ $ut_endTime_s - $ut_startTime_s ]s" exec_times=0
retry_unittests_record=''
retry_time=3
exec_time_array=('first' 'second' 'third')
exec_retry_threshold=10
is_retry_execuate=0
if [ -n "$failed_test_lists" ];then
if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then
bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest
fi
read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
while ( [ $exec_times -lt $retry_time ] )
do
set +e
retry_unittests_record="$retry_unittests_record$failed_test_lists"
failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'`
set -e
if [[ "${exec_times}" == "1" ]];then
if [[ "${failed_test_lists}" == "" ]];then
break
else
read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
fi
fi
echo "========================================="
echo "This is the ${exec_time_array[$exec_times]} time to re-run"
echo "========================================="
echo "The following unittest will be re-run:"
echo "${retry_unittests}"
for line in ${retry_unittests[@]} ;
do
read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )"
if [[ "$tmp_one_tmp" != "" ]]; then
if [[ "$one_card_retry" == "" ]]; then
one_card_retry="^$line$"
else
one_card_retry="$one_card_retry|^$line$"
fi
fi
done
if [[ "$one_card_retry" != "" ]]; then
card_test "$one_card_retry" 1
fi
exec_times=$[$exec_times+1]
failed_test_lists=''
collect_failed_tests
rm -f $tmp_dir/*
one_card_retry=''
done
else
# There are more than 10 failed unit tests, so no unit test retry
is_retry_execuate=1
fi
fi
if [[ "$EXIT_CODE" != "0" ]]; then if [[ "$EXIT_CODE" != "0" ]]; then
exit 8; show_ut_retry_result
fi fi
set -ex
fi fi
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册