未验证 提交 b05f6142 编写于 作者: Z Zhou Wei 提交者: GitHub

[Parallel UT]Improve Parallel UT level on Windows/Linux (#31377)

* [Parallel UT]improve Parallel UT level on Windows/Linux

* [Parallel UT]improve Parallel UT level on Windows/Linux

* [Parallel UT]Improve Parallel UT level on Windows/Linux

* [Parallel UT]Improve Parallel UT level on Windows/Linux

* fix CI
上级 695dd371
...@@ -57,11 +57,9 @@ if(WITH_TESTING) ...@@ -57,11 +57,9 @@ if(WITH_TESTING)
if (NOT APPLE AND NOT WIN32) if (NOT APPLE AND NOT WIN32)
inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_inference_shared inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_inference_shared
ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR}) ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
set_tests_properties(test_api_impl PROPERTIES DEPENDS test_image_classification)
elseif(WIN32) elseif(WIN32)
inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS ${inference_deps} inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS ${inference_deps}
ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR}) ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
set_tests_properties(test_api_impl PROPERTIES DEPENDS test_image_classification)
endif() endif()
endif() endif()
......
...@@ -105,7 +105,7 @@ if(WITH_PYTHON) ...@@ -105,7 +105,7 @@ if(WITH_PYTHON)
set(tmp_impl_file ${impl_file}.tmp) set(tmp_impl_file ${impl_file}.tmp)
if(WIN32) if(WIN32)
if("${CMAKE_GENERATOR}" STREQUAL "Ninja") if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
set(op_function_generator_path "${CMAKE_CURRENT_BINARY_DIR}") set(op_function_generator_path "${CMAKE_CURRENT_BINARY_DIR}")
else() else()
set(op_function_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}") set(op_function_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}")
......
...@@ -499,6 +499,7 @@ setlocal enabledelayedexpansion ...@@ -499,6 +499,7 @@ setlocal enabledelayedexpansion
:: if %errorlevel% NEQ 0 exit /b 8 :: if %errorlevel% NEQ 0 exit /b 8
:: for /F %%# in ('cmd /C nvidia-smi -L ^|find "GPU" /C') do set CUDA_DEVICE_COUNT=%%# :: for /F %%# in ('cmd /C nvidia-smi -L ^|find "GPU" /C') do set CUDA_DEVICE_COUNT=%%#
set CUDA_DEVICE_COUNT=1 set CUDA_DEVICE_COUNT=1
set FLAGS_fraction_of_gpu_memory_to_use=0.92
%cache_dir%\tools\busybox64.exe bash %work_dir%\tools\windows\run_unittests.sh %NIGHTLY_MODE% %PRECISION_TEST% %cache_dir%\tools\busybox64.exe bash %work_dir%\tools\windows\run_unittests.sh %NIGHTLY_MODE% %PRECISION_TEST%
......
...@@ -991,7 +991,7 @@ function case_count(){ ...@@ -991,7 +991,7 @@ function case_count(){
EOF EOF
testcases=$1 testcases=$1
num=$(echo $testcases|grep -o '\^'|wc -l) num=$(echo $testcases|grep -o '\^'|wc -l)
if [ "$2" == "" ]; then if (( $2 == -1 )); then
echo "exclusive TestCases count is $num" echo "exclusive TestCases count is $num"
echo "ipipe_log_param_Exclusive_TestCases_Count: $num" echo "ipipe_log_param_Exclusive_TestCases_Count: $num"
else else
...@@ -1034,6 +1034,11 @@ function card_test() { ...@@ -1034,6 +1034,11 @@ function card_test() {
set -m set -m
case_count $1 $2 case_count $1 $2
ut_startTime_s=`date +%s` ut_startTime_s=`date +%s`
testcases=$1
cardnumber=$2
parallel_level_base=${CTEST_PARALLEL_LEVEL:-1}
# get the CUDA device count, XPU device count is one # get the CUDA device count, XPU device count is one
if [ "${WITH_XPU}" == "ON" ];then if [ "${WITH_XPU}" == "ON" ];then
CUDA_DEVICE_COUNT=1 CUDA_DEVICE_COUNT=1
...@@ -1043,20 +1048,13 @@ function card_test() { ...@@ -1043,20 +1048,13 @@ function card_test() {
CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l) CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l)
fi fi
testcases=$1 if (( $cardnumber == -1 ));then
parallel_level_base=${CTEST_PARALLEL_LEVEL:-1}
if (( $# > 1 )); then
cardnumber=$2
if (( $cardnumber > $CUDA_DEVICE_COUNT )); then
cardnumber=$CUDA_DEVICE_COUNT
fi
if (( $# > 2 )); then
parallel_job=`expr $3 \* $parallel_level_base`
else
parallel_job=$parallel_level_base
fi
else
cardnumber=$CUDA_DEVICE_COUNT cardnumber=$CUDA_DEVICE_COUNT
fi
if (( $# > 2 )); then
parallel_job=`expr $3 \* $parallel_level_base`
else
parallel_job=$parallel_level_base parallel_job=$parallel_level_base
fi fi
...@@ -1098,7 +1096,7 @@ function card_test() { ...@@ -1098,7 +1096,7 @@ function card_test() {
done done
wait; # wait for all subshells to finish wait; # wait for all subshells to finish
ut_endTime_s=`date +%s` ut_endTime_s=`date +%s`
if [ "$2" == "" ]; then if (( $2 == -1 )); then
echo "exclusive TestCases Total Time: $[ $ut_endTime_s - $ut_startTime_s ]s" echo "exclusive TestCases Total Time: $[ $ut_endTime_s - $ut_startTime_s ]s"
echo "ipipe_log_param_Exclusive_TestCases_Total_Time: $[ $ut_endTime_s - $ut_startTime_s ]s" echo "ipipe_log_param_Exclusive_TestCases_Total_Time: $[ $ut_endTime_s - $ut_startTime_s ]s"
else else
...@@ -1153,13 +1151,18 @@ set -x ...@@ -1153,13 +1151,18 @@ set -x
set +x set +x
EXIT_CODE=0; EXIT_CODE=0;
test_cases=$(ctest -N -V) # get all test cases test_cases=$(ctest -N -V) # get all test cases
single_card_tests_eight_parallel='^job$' # cases list which would run 8 job each time with single GPU # Note(zhouwei): Parallel runs are relative to 'CTEST_PARALLEL_LEVEL', e.g: '4 job each time' means 4*CTEST_PARALLEL_LEVEL
single_card_tests_tetrad_parallel='^job$' # cases list which would run 4 job each time with single GPU single_card_tests_high_parallel='^job$' # cases list which would run the most job each time with single GPU
single_card_tests_non_parallel_1='^job$' # cases list which would run 1 job each time with single GPU single_card_tests_two_parallel='^job$' # cases list which would run 2 job each time with single GPU
single_card_tests_non_parallel_2='^job$' # cases list which would run 1 job each time with single GPU single_card_tests_non_parallel='^job$' # cases list which would run 1 job each time with single GPU
single_card_tests='^job$' # all cases list which would take one graph card single_card_tests='^job$' # all cases list which would take single GPU
exclusive_tests='' # cases list which would be run exclusively
multiple_card_tests='' # cases list which would take multiple GPUs, most cases would be two GPUs multiple_card_tests_two_parallel='^job$' # cases list which would run 2 job each time with multiple GPUs, most cases would be two GPUs
multiple_card_tests_non_parallel='^job$' # cases list which would run 1 job each time with multiple GPUs, most cases would be two GPUs
exclusive_tests_two_parallel='^job$' # cases list which would run 2 job exclusively(with all GPUs)
exclusive_tests_non_parallel='^job$' # cases list which would run 1 job exclusively(with all GPUs)
is_exclusive='' # indicate whether the case is exclusive type is_exclusive='' # indicate whether the case is exclusive type
is_multicard='' # indicate whether the case is multiple GPUs type is_multicard='' # indicate whether the case is multiple GPUs type
is_nightly='' # indicate whether the case will only run at night is_nightly='' # indicate whether the case will only run at night
...@@ -1167,9 +1170,10 @@ set +x ...@@ -1167,9 +1170,10 @@ set +x
UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d') UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d')
output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}") output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}")
eight_parallel_job=$(echo $output | cut -d ";" -f 1) cpu_parallel_job=$(echo $output | cut -d ";" -f 1)
tetrad_parallel_jog=$(echo $output | cut -d ";" -f 2) tetrad_parallel_job=$(echo $output | cut -d ";" -f 2)
non_parallel_job=$(echo $output | cut -d ";" -f 3) two_parallel_job=$(echo $output | cut -d ";" -f 3)
non_parallel_job=$(echo $output | cut -d ";" -f 4)
while read -r line; do while read -r line; do
if [[ "$line" == "" ]]; then if [[ "$line" == "" ]]; then
continue continue
...@@ -1211,26 +1215,24 @@ set +x ...@@ -1211,26 +1215,24 @@ set +x
fi fi
if [[ "$is_exclusive" != "" ]]; then if [[ "$is_exclusive" != "" ]]; then
if [[ "$exclusive_tests" == "" ]]; then if [[ $(echo $cpu_parallel_job$tetrad_parallel_job$two_parallel_job | grep -o $testcase) != "" ]]; then
exclusive_tests="^$testcase$" exclusive_tests_two_parallel="$exclusive_tests_two_parallel|^$testcase$"
else else
exclusive_tests="$exclusive_tests|^$testcase$" exclusive_tests_non_parallel="$exclusive_tests_non_parallel|^$testcase$"
fi fi
elif [[ "$is_multicard" != "" ]]; then elif [[ "$is_multicard" != "" ]]; then
if [[ "$multiple_card_tests" == "" ]]; then if [[ $(echo $cpu_parallel_job$tetrad_parallel_job$two_parallel_job | grep -o $testcase) != "" ]]; then
multiple_card_tests="^$testcase$" multiple_card_tests_two_parallel="$multiple_card_tests_two_parallel|^$testcase$"
else else
multiple_card_tests="$multiple_card_tests|^$testcase$" multiple_card_tests_non_parallel="$multiple_card_tests_non_parallel|^$testcase$"
fi fi
else else
if [[ $(echo $eight_parallel_job | grep $testcase) != "" ]]; then if [[ $(echo $cpu_parallel_job | grep -o $testcase) != "" ]]; then
single_card_tests_eight_parallel="$single_card_tests_eight_parallel|^$testcase$" single_card_tests_high_parallel="$single_card_tests_high_parallel|^$testcase$"
elif [[ $(echo $tetrad_parallel_jog | grep $testcase) != "" ]]; then elif [[ $(echo $tetrad_parallel_job$two_parallel_job | grep -o $testcase) != "" ]]; then
single_card_tests_tetrad_parallel="$single_card_tests_tetrad_parallel|^$testcase$" single_card_tests_two_parallel="$single_card_tests_two_parallel|^$testcase$"
elif [[ "${#single_card_tests_non_parallel_1}" -gt 10000 ]];then
single_card_tests_non_parallel_2="$single_card_tests_non_parallel_2|^$testcase$"
else else
single_card_tests_non_parallel_1="$single_card_tests_non_parallel_1|^$testcase$" single_card_tests_non_parallel="$single_card_tests_non_parallel|^$testcase$"
fi fi
single_card_tests="$single_card_tests|^$testcase$" single_card_tests="$single_card_tests|^$testcase$"
fi fi
...@@ -1241,12 +1243,13 @@ set +x ...@@ -1241,12 +1243,13 @@ set +x
testcase='' testcase=''
done <<< "$test_cases"; done <<< "$test_cases";
card_test "$single_card_tests_eight_parallel" 1 8 # run cases 8 job each time with single GPU card_test "$single_card_tests_high_parallel" 1 8 # run cases the most each time with single GPU
card_test "$single_card_tests_tetrad_parallel" 1 4 # run cases 4 job each time with single GPU card_test "$single_card_tests_two_parallel" 1 2 # run cases 2 job each time with single GPU
card_test "$single_card_tests_non_parallel_1" 1 # run cases 1 job each time with single GPU card_test "$single_card_tests_non_parallel" 1 # run cases 1 job each time with single GPU
card_test "$single_card_tests_non_parallel_2" 1 # run cases 1 job each time with single GPU card_test "$multiple_card_tests_two_parallel" 2 2 # run cases 2 job each time with two GPUs
card_test "$multiple_card_tests" 2 # run cases with two GPUs card_test "$multiple_card_tests_non_parallel" 2 # run cases 1 job each time with two GPUs
card_test "$exclusive_tests" # run cases exclusively, in this cases would be run with 4/8 GPUs card_test "$exclusive_tests_two_parallel" -1 2 # run cases exclusively, in this cases would be run with 2/4/8 GPUs
card_test "$exclusive_tests_non_parallel" -1 # run cases exclusively, in this cases would be run with 2/4/8 GPUs
collect_failed_tests collect_failed_tests
rm -f $tmp_dir/* rm -f $tmp_dir/*
exec_times=0 exec_times=0
...@@ -1319,7 +1322,7 @@ set +x ...@@ -1319,7 +1322,7 @@ set +x
fi fi
if [[ "$exclusive_retry" != "" ]]; then if [[ "$exclusive_retry" != "" ]]; then
card_test "$exclusive_retry" card_test "$exclusive_retry" -1
fi fi
exec_times=$[$exec_times+1] exec_times=$[$exec_times+1]
......
...@@ -18,10 +18,10 @@ set(FLUID_CORE_NAME "core") ...@@ -18,10 +18,10 @@ set(FLUID_CORE_NAME "core")
if(WITH_AVX AND AVX_FOUND) if(WITH_AVX AND AVX_FOUND)
set(FLUID_CORE_NAME "${FLUID_CORE_NAME}_avx") set(FLUID_CORE_NAME "${FLUID_CORE_NAME}_avx")
if(NOT DEFINED NOAVX_CORE_FILE OR NOAVX_CORE_FILE STREQUAL "") if(NOT DEFINED NOAVX_CORE_FILE OR NOAVX_CORE_FILE STREQUAL "")
message(STATUS "WARNING: This is just a warning for publishing release. message(STATUS "MESSAGE: This is just a message for publishing release.
You are building AVX version without NOAVX core. You are building AVX version without NOAVX core.
So the wheel package may fail on NOAVX machine. So the wheel package may fail on NOAVX machine.
You can add -DFLUID_CORE_NAME=/path/to/your/core_noavx.* in cmake command You can add -DNOAVX_CORE_FILE=/path/to/your/core_noavx.* in cmake command
to get a full wheel package to resolve this warning. to get a full wheel package to resolve this warning.
While, this version will still work on local machine.") While, this version will still work on local machine.")
endif() endif()
......
此差异已折叠。
...@@ -214,10 +214,8 @@ echo "Windows 1 card TestCases count is $num" ...@@ -214,10 +214,8 @@ echo "Windows 1 card TestCases count is $num"
if [ ${PRECISION_TEST:-OFF} == "ON" ]; then if [ ${PRECISION_TEST:-OFF} == "ON" ]; then
python ${PADDLE_ROOT}/tools/get_pr_ut.py python ${PADDLE_ROOT}/tools/get_pr_ut.py
if [[ -f "ut_list" ]]; then if [[ -f "ut_list" ]]; then
set +x
echo "PREC length: "`wc -l ut_list` echo "PREC length: "`wc -l ut_list`
precision_cases=`cat ut_list` precision_cases=`cat ut_list`
set -x
fi fi
fi fi
...@@ -242,12 +240,11 @@ fi ...@@ -242,12 +240,11 @@ fi
set -e set -e
output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}") output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}")
eight_parallel_job=$(echo $output | cut -d ";" -f 1) cpu_parallel_job=$(echo $output | cut -d ";" -f 1)
tetrad_parallel_jog=$(echo $output | cut -d ";" -f 2) tetrad_parallel_job=$(echo $output | cut -d ";" -f 2)
non_parallel_job=$(echo $output | cut -d ";" -f 3) two_parallel_job=$(echo $output | cut -d ";" -f 3)
non_parallel_job=$(echo $output | cut -d ";" -f 4)
non_parallel_job_1=$(echo $non_parallel_job | cut -d "," -f 1)
non_parallel_job_2=$(echo $non_parallel_job | cut -d "," -f 2)
failed_test_lists='' failed_test_lists=''
tmp_dir=`mktemp -d` tmp_dir=`mktemp -d`
...@@ -270,10 +267,11 @@ function collect_failed_tests() { ...@@ -270,10 +267,11 @@ function collect_failed_tests() {
function run_unittest() { function run_unittest() {
test_case=$1 test_case=$1
parallel_job=$2 parallel_job=$2
parallel_level_base=${CTEST_PARALLEL_LEVEL:-1}
if [ "$2" == "" ]; then if [ "$2" == "" ]; then
parallel_job=1 parallel_job=$parallel_level_base
else else
parallel_job=$2 parallel_job=`expr $2 \* $parallel_level_base`
fi fi
echo "************************************************************************" echo "************************************************************************"
echo "********These unittests run $parallel_job job each time with 1 GPU**********" echo "********These unittests run $parallel_job job each time with 1 GPU**********"
...@@ -336,7 +334,7 @@ function unittests_retry(){ ...@@ -336,7 +334,7 @@ function unittests_retry(){
function show_ut_retry_result() { function show_ut_retry_result() {
if [[ "$is_retry_execuate" != "0" ]];then if [[ "$is_retry_execuate" != "0" ]];then
failed_test_lists_ult=`echo "${failed_test_lists}" | grep -Po '[^ ].*$'` failed_test_lists_ult=`echo "${failed_test_lists}" | grep -o '[^ ].*$'`
echo "=========================================" echo "========================================="
echo "There are more than 10 failed unit tests, so no unit test retry!!!" echo "There are more than 10 failed unit tests, so no unit test retry!!!"
echo "=========================================" echo "========================================="
...@@ -349,7 +347,7 @@ function show_ut_retry_result() { ...@@ -349,7 +347,7 @@ function show_ut_retry_result() {
echo "========================================" echo "========================================"
echo "There are failed tests, which have been successful after re-run:" echo "There are failed tests, which have been successful after re-run:"
echo "========================================" echo "========================================"
echo "The following tests have been re-ran:" echo "The following tests have been re-run:"
echo "${retry_unittests_record}" echo "${retry_unittests_record}"
else else
failed_ut_re=$(echo "${retry_unittests_record_judge}" | awk 'BEGIN{ all_str=""}{if (all_str==""){all_str=$1}else{all_str=all_str"|"$1}} END{print all_str}') failed_ut_re=$(echo "${retry_unittests_record_judge}" | awk 'BEGIN{ all_str=""}{if (all_str==""){all_str=$1}else{all_str=all_str"|"$1}} END{print all_str}')
...@@ -365,10 +363,10 @@ function show_ut_retry_result() { ...@@ -365,10 +363,10 @@ function show_ut_retry_result() {
} }
set +e set +e
run_unittest $eight_parallel_job 8 run_unittest $cpu_parallel_job 12
run_unittest $tetrad_parallel_jog 4 run_unittest $tetrad_parallel_job 4
run_unittest $non_parallel_job_1 run_unittest $two_parallel_job 2
run_unittest $non_parallel_job_2 run_unittest $non_parallel_job
collect_failed_tests collect_failed_tests
set -e set -e
rm -f $tmp_dir/* rm -f $tmp_dir/*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册