未验证 提交 a332352a 编写于 作者: Q Qi Li 提交者: GitHub

[NPU] fix NPU ci scripts, test=develop (#35095)

上级 de645153
...@@ -1059,7 +1059,7 @@ function get_quickly_disable_ut() { ...@@ -1059,7 +1059,7 @@ function get_quickly_disable_ut() {
function card_test() { function card_test() {
set -m set -m
CTEST_PARALLEL_LEVEL=2
case_count $1 $2 case_count $1 $2
ut_startTime_s=`date +%s` ut_startTime_s=`date +%s`
...@@ -1725,16 +1725,22 @@ set +x ...@@ -1725,16 +1725,22 @@ set +x
single_card_tests="$single_card_tests|^$testcase$" single_card_tests="$single_card_tests|^$testcase$"
fi fi
done <<< "$test_cases"; done <<< "$test_cases";
card_test "$single_card_tests" 1
ut_actual_total_startTime_s=`date +%s`
card_test "$single_card_tests" 1 # run cases 1 job each time with single GPU
collect_failed_tests collect_failed_tests
# add unit test retry for NPU # add unit test retry for NPU
rm -f $tmp_dir/* rm -f $tmp_dir/*
exec_times=0 exec_times=0
retry_unittests_record='' retry_unittests_record=''
retry_time=3 retry_time=4
exec_time_array=('first' 'second' 'third') exec_time_array=('first' 'second' 'third' 'fourth')
parallel_failed_tests_exec_retry_threshold=80
exec_retry_threshold=10 exec_retry_threshold=10
is_retry_execuate=0 is_retry_execuate=0
rerun_ut_startTime_s=`date +%s`
if [ -n "$failed_test_lists" ];then if [ -n "$failed_test_lists" ];then
if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then
bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest
...@@ -1743,14 +1749,30 @@ set +x ...@@ -1743,14 +1749,30 @@ set +x
need_retry_ut_arr=(${need_retry_ut_str}) need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]} need_retry_ut_count=${#need_retry_ut_arr[@]}
read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then while ( [ $exec_times -lt $retry_time ] )
while ( [ $exec_times -lt $retry_time ] ) do
do if [[ "${exec_times}" == "0" ]] ;then
if [ $need_retry_ut_count -lt $parallel_failed_tests_exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
elif [[ "${exec_times}" == "1" ]] ;then
read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
fi
if [[ "$is_retry_execuate" == "0" ]];then
set +e set +e
retry_unittests_record="$retry_unittests_record$failed_test_lists" retry_unittests_record="$retry_unittests_record$failed_test_lists"
failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'` failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'`
set -e set -e
if [[ "${exec_times}" == "1" ]];then if [[ "${exec_times}" == "1" ]] || [[ "${exec_times}" == "3" ]];then
if [[ "${failed_test_lists}" == "" ]];then if [[ "${failed_test_lists}" == "" ]];then
break break
else else
...@@ -1761,11 +1783,11 @@ set +x ...@@ -1761,11 +1783,11 @@ set +x
echo "This is the ${exec_time_array[$exec_times]} time to re-run" echo "This is the ${exec_time_array[$exec_times]} time to re-run"
echo "=========================================" echo "========================================="
echo "The following unittest will be re-run:" echo "The following unittest will be re-run:"
echo "${retry_unittests}" echo "${retry_unittests}"
for line in ${retry_unittests[@]} ; for line in ${retry_unittests[@]} ;
do do
read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )" read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )"
if [[ "$tmp_one_tmp" != "" ]]; then if [[ "$tmp_one_tmp" != "" ]]; then
if [[ "$one_card_retry" == "" ]]; then if [[ "$one_card_retry" == "" ]]; then
one_card_retry="^$line$" one_card_retry="^$line$"
...@@ -1773,23 +1795,26 @@ set +x ...@@ -1773,23 +1795,26 @@ set +x
one_card_retry="$one_card_retry|^$line$" one_card_retry="$one_card_retry|^$line$"
fi fi
fi fi
done done
if [[ "$one_card_retry" != "" ]]; then if [[ "$one_card_retry" != "" ]]; then
card_test "$one_card_retry" 1 card_test "$one_card_retry" 1 # run cases 1 job each time with single GPU
fi fi
exec_times=$[$exec_times+1] exec_times=$[$exec_times+1]
failed_test_lists='' failed_test_lists=''
collect_failed_tests collect_failed_tests
rm -f $tmp_dir/* rm -f $tmp_dir/*
one_card_retry='' one_card_retry=''
done fi
else done
# There are more than 10 failed unit tests, so no unit test retry
is_retry_execuate=1
fi
fi fi
rerun_ut_endTime_s=`date +%s`
echo "ipipe_log_param_Rerun_TestCases_Total_Time: $[ $rerun_ut_endTime_s - $rerun_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
ut_actual_total_endTime_s=`date +%s`
echo "ipipe_log_param_actual_TestCases_Total_Time: $[ $ut_actual_total_endTime_s - $ut_actual_total_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
if [[ "$EXIT_CODE" != "0" ]]; then if [[ "$EXIT_CODE" != "0" ]]; then
show_ut_retry_result show_ut_retry_result
fi fi
......
...@@ -85,8 +85,30 @@ function gen_full_html_report_xpu() { ...@@ -85,8 +85,30 @@ function gen_full_html_report_xpu() {
mv -f coverage-full.tmp coverage-full.info mv -f coverage-full.tmp coverage-full.info
} }
function gen_full_html_report_npu() {
lcov --extract coverage.info \
'/paddle/paddle/fluid/operators/*npu*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f coverage-full.tmp coverage-full.info
lcov --remove coverage-full.info \
'/paddle/paddle/fluid/framework/*_test*' \
'/paddle/paddle/fluid/*/*test*' \
'/paddle/paddle/fluid/*/*/*test*' \
'/paddle/paddle/fluid/inference/tests/*' \
'/paddle/paddle/fluid/inference/api/demo_ci/*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f coverage-full.tmp coverage-full.info
}
if [ ${WITH_XPU:-OFF} == "ON" ]; then if [ ${WITH_XPU:-OFF} == "ON" ]; then
gen_full_html_report_xpu || true gen_full_html_report_xpu || true
elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
gen_full_html_report_npu || true
else else
gen_full_html_report || true gen_full_html_report || true
fi fi
...@@ -183,6 +205,8 @@ echo "Assert Python Diff Coverage" ...@@ -183,6 +205,8 @@ echo "Assert Python Diff Coverage"
if [ ${WITH_XPU:-OFF} == "ON" ]; then if [ ${WITH_XPU:-OFF} == "ON" ]; then
echo "XPU has no python coverage!" echo "XPU has no python coverage!"
elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
echo "NPU has no python coverage!"
else else
if [[ "${NO_PYTHON_COVERAGE_DATA}" != "1" ]];then if [[ "${NO_PYTHON_COVERAGE_DATA}" != "1" ]];then
python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_lines.py python-coverage-diff.info 0.9 || PYTHON_COVERAGE_LINES_ASSERT=1 python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_lines.py python-coverage-diff.info 0.9 || PYTHON_COVERAGE_LINES_ASSERT=1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册