未验证 提交 a332352a 编写于 作者: Q Qi Li 提交者: GitHub

[NPU] fix NPU ci scripts, test=develop (#35095)

上级 de645153
......@@ -1059,7 +1059,7 @@ function get_quickly_disable_ut() {
function card_test() {
set -m
CTEST_PARALLEL_LEVEL=2
case_count $1 $2
ut_startTime_s=`date +%s`
......@@ -1725,16 +1725,22 @@ set +x
single_card_tests="$single_card_tests|^$testcase$"
fi
done <<< "$test_cases";
card_test "$single_card_tests" 1
ut_actual_total_startTime_s=`date +%s`
card_test "$single_card_tests" 1 # run cases 1 job each time with single GPU
collect_failed_tests
# add unit test retry for NPU
rm -f $tmp_dir/*
exec_times=0
retry_unittests_record=''
retry_time=3
exec_time_array=('first' 'second' 'third')
retry_time=4
exec_time_array=('first' 'second' 'third' 'fourth')
parallel_failed_tests_exec_retry_threshold=80
exec_retry_threshold=10
is_retry_execuate=0
rerun_ut_startTime_s=`date +%s`
if [ -n "$failed_test_lists" ];then
if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then
bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest
......@@ -1743,14 +1749,30 @@ set +x
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
while ( [ $exec_times -lt $retry_time ] )
do
if [[ "${exec_times}" == "0" ]] ;then
if [ $need_retry_ut_count -lt $parallel_failed_tests_exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
elif [[ "${exec_times}" == "1" ]] ;then
read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
fi
if [[ "$is_retry_execuate" == "0" ]];then
set +e
retry_unittests_record="$retry_unittests_record$failed_test_lists"
failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'`
set -e
if [[ "${exec_times}" == "1" ]];then
if [[ "${exec_times}" == "1" ]] || [[ "${exec_times}" == "3" ]];then
if [[ "${failed_test_lists}" == "" ]];then
break
else
......@@ -1762,10 +1784,10 @@ set +x
echo "========================================="
echo "The following unittest will be re-run:"
echo "${retry_unittests}"
for line in ${retry_unittests[@]} ;
do
read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )"
if [[ "$tmp_one_tmp" != "" ]]; then
if [[ "$one_card_retry" == "" ]]; then
one_card_retry="^$line$"
......@@ -1773,23 +1795,26 @@ set +x
one_card_retry="$one_card_retry|^$line$"
fi
fi
done
if [[ "$one_card_retry" != "" ]]; then
card_test "$one_card_retry" 1
card_test "$one_card_retry" 1 # run cases 1 job each time with single GPU
fi
exec_times=$[$exec_times+1]
failed_test_lists=''
collect_failed_tests
rm -f $tmp_dir/*
one_card_retry=''
done
else
# There are more than 10 failed unit tests, so no unit test retry
is_retry_execuate=1
fi
done
fi
rerun_ut_endTime_s=`date +%s`
echo "ipipe_log_param_Rerun_TestCases_Total_Time: $[ $rerun_ut_endTime_s - $rerun_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
ut_actual_total_endTime_s=`date +%s`
echo "ipipe_log_param_actual_TestCases_Total_Time: $[ $ut_actual_total_endTime_s - $ut_actual_total_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
if [[ "$EXIT_CODE" != "0" ]]; then
show_ut_retry_result
fi
......
......@@ -85,8 +85,30 @@ function gen_full_html_report_xpu() {
mv -f coverage-full.tmp coverage-full.info
}
function gen_full_html_report_npu() {
lcov --extract coverage.info \
'/paddle/paddle/fluid/operators/*npu*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f coverage-full.tmp coverage-full.info
lcov --remove coverage-full.info \
'/paddle/paddle/fluid/framework/*_test*' \
'/paddle/paddle/fluid/*/*test*' \
'/paddle/paddle/fluid/*/*/*test*' \
'/paddle/paddle/fluid/inference/tests/*' \
'/paddle/paddle/fluid/inference/api/demo_ci/*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f coverage-full.tmp coverage-full.info
}
if [ ${WITH_XPU:-OFF} == "ON" ]; then
gen_full_html_report_xpu || true
elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
gen_full_html_report_npu || true
else
gen_full_html_report || true
fi
......@@ -183,6 +205,8 @@ echo "Assert Python Diff Coverage"
if [ ${WITH_XPU:-OFF} == "ON" ]; then
echo "XPU has no python coverage!"
elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
echo "NPU has no python coverage!"
else
if [[ "${NO_PYTHON_COVERAGE_DATA}" != "1" ]];then
python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_lines.py python-coverage-diff.info 0.9 || PYTHON_COVERAGE_LINES_ASSERT=1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册