[Parallel UT]Improve Parallel UT level on Windows/Linux (#31377)

* [Parallel UT]improve Parallel UT level on Windows/Linux * [Parallel UT]improve Parallel UT level on Windows/Linux * [Parallel UT]Improve Parallel UT level on Windows/Linux * [Parallel UT]Improve Parallel UT level on Windows/Linux * fix CI

[Parallel UT]Improve Parallel UT level on Windows/Linux (#31377)
* [Parallel UT]improve Parallel UT level on Windows/Linux * [Parallel UT]improve Parallel UT level on Windows/Linux * [Parallel UT]Improve Parallel UT level on Windows/Linux * [Parallel UT]Improve Parallel UT level on Windows/Linux * fix CI
b05f6142 · Zhou Wei · GitHub · 695dd371 · b05f6142 · b05f6142
7 changed file
--- a/paddle/fluid/inference/api/CMakeLists.txt
+++ b/paddle/fluid/inference/api/CMakeLists.txt
@@ -57,11 +57,9 @@ if(WITH_TESTING)
  if (NOT APPLE AND NOT WIN32)
    inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_inference_shared
      ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
-    set_tests_properties(test_api_impl PROPERTIES DEPENDS test_image_classification)
  elseif(WIN32)
    inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS ${inference_deps}
      ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
-    set_tests_properties(test_api_impl PROPERTIES DEPENDS test_image_classification)
  endif()
 endif()

--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -105,7 +105,7 @@ if(WITH_PYTHON)
  set(tmp_impl_file ${impl_file}.tmp)
  if(WIN32)
-      if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
+    if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
      set(op_function_generator_path "${CMAKE_CURRENT_BINARY_DIR}")
    else()
      set(op_function_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}")

--- a/paddle/scripts/paddle_build.bat
+++ b/paddle/scripts/paddle_build.bat
@@ -499,6 +499,7 @@ setlocal enabledelayedexpansion
 :: if %errorlevel% NEQ 0 exit /b 8
 :: for /F %%# in ('cmd /C nvidia-smi -L ^|find "GPU" /C') do set CUDA_DEVICE_COUNT=%%#
 set CUDA_DEVICE_COUNT=1
+set FLAGS_fraction_of_gpu_memory_to_use=0.92
 %cache_dir%\tools\busybox64.exe bash %work_dir%\tools\windows\run_unittests.sh %NIGHTLY_MODE% %PRECISION_TEST%

--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -991,7 +991,7 @@ function case_count(){
 EOF
    testcases=$1
    num=$(echo $testcases|grep -o '\^'|wc -l)
-    if [ "$2" == "" ]; then
+    if (( $2 == -1 )); then
        echo "exclusive TestCases count is $num"
        echo "ipipe_log_param_Exclusive_TestCases_Count: $num"
    else
@@ -1034,6 +1034,11 @@ function card_test() {
    set -m
    case_count $1 $2
    ut_startTime_s=`date +%s` 
+    testcases=$1
+    cardnumber=$2
+    parallel_level_base=${CTEST_PARALLEL_LEVEL:-1}
    # get the CUDA device count, XPU device count is one
    if [ "${WITH_XPU}" == "ON" ];then
        CUDA_DEVICE_COUNT=1
@@ -1043,20 +1048,13 @@ function card_test() {
        CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l)
    fi
-    testcases=$1
+    if (( $cardnumber == -1 ));then
-    parallel_level_base=${CTEST_PARALLEL_LEVEL:-1}
-    if (( $# > 1 )); then
-        cardnumber=$2
-        if (( $cardnumber > $CUDA_DEVICE_COUNT )); then
-            cardnumber=$CUDA_DEVICE_COUNT
-        fi
-        if (( $# > 2 )); then
-            parallel_job=`expr $3 \* $parallel_level_base`
-        else
-            parallel_job=$parallel_level_base
-        fi
-    else
        cardnumber=$CUDA_DEVICE_COUNT
+    fi
+    if (( $# > 2 )); then
+        parallel_job=`expr $3 \* $parallel_level_base`
+    else
        parallel_job=$parallel_level_base
    fi
@@ -1098,7 +1096,7 @@ function card_test() {
    done
    wait; # wait for all subshells to finish
    ut_endTime_s=`date +%s`
-    if [ "$2" == "" ]; then
+    if (( $2 == -1 )); then
        echo "exclusive TestCases Total Time: $[ $ut_endTime_s - $ut_startTime_s ]s"
        echo "ipipe_log_param_Exclusive_TestCases_Total_Time: $[ $ut_endTime_s - $ut_startTime_s ]s"
    else
@@ -1153,13 +1151,18 @@ set -x
 set +x
        EXIT_CODE=0;
        test_cases=$(ctest -N -V) # get all test cases
-        single_card_tests_eight_parallel='^job$'    # cases list which would run 8 job each time with single GPU
+        # Note(zhouwei): Parallel runs are relative to 'CTEST_PARALLEL_LEVEL', e.g: '4 job each time' means 4*CTEST_PARALLEL_LEVEL
-        single_card_tests_tetrad_parallel='^job$'   # cases list which would run 4 job each time with single GPU
+        single_card_tests_high_parallel='^job$'     # cases list which would run the most job each time with single GPU
-        single_card_tests_non_parallel_1='^job$'    # cases list which would run 1 job each time with single GPU
+        single_card_tests_two_parallel='^job$'      # cases list which would run 2 job each time with single GPU
-        single_card_tests_non_parallel_2='^job$'    # cases list which would run 1 job each time with single GPU
+        single_card_tests_non_parallel='^job$'      # cases list which would run 1 job each time with single GPU
-        single_card_tests='^job$' # all cases list which would take one graph card
+        single_card_tests='^job$'                   # all cases list which would take single GPU
-        exclusive_tests=''        # cases list which would be run exclusively
-        multiple_card_tests=''    # cases list which would take multiple GPUs, most cases would be two GPUs
+        multiple_card_tests_two_parallel='^job$'    # cases list which would run 2 job each time with multiple GPUs, most cases would be two GPUs
+        multiple_card_tests_non_parallel='^job$'    # cases list which would run 1 job each time with multiple GPUs, most cases would be two GPUs
+        exclusive_tests_two_parallel='^job$'        # cases list which would run 2 job exclusively(with all GPUs)
+        exclusive_tests_non_parallel='^job$'        # cases list which would run 1 job exclusively(with all GPUs)
        is_exclusive=''           # indicate whether the case is exclusive type
        is_multicard=''           # indicate whether the case is multiple GPUs type
        is_nightly=''             # indicate whether the case will only run at night
@@ -1167,9 +1170,10 @@ set +x
        UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d')
        output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}")
-        eight_parallel_job=$(echo $output | cut -d ";" -f 1)
+        cpu_parallel_job=$(echo $output | cut -d ";" -f 1)
-        tetrad_parallel_jog=$(echo $output | cut -d ";" -f 2)
+        tetrad_parallel_job=$(echo $output | cut -d ";" -f 2)
-        non_parallel_job=$(echo $output | cut -d ";" -f 3)
+        two_parallel_job=$(echo $output | cut -d ";" -f 3)
+        non_parallel_job=$(echo $output | cut -d ";" -f 4)
        while read -r line; do
            if [[ "$line" == "" ]]; then
                continue
@@ -1211,26 +1215,24 @@ set +x
                fi
                if [[ "$is_exclusive" != "" ]]; then
-                    if [[ "$exclusive_tests" == "" ]]; then
+                    if [[ $(echo $cpu_parallel_job$tetrad_parallel_job$two_parallel_job | grep -o $testcase) != "" ]]; then
-                        exclusive_tests="^$testcase$"
+                        exclusive_tests_two_parallel="$exclusive_tests_two_parallel|^$testcase$"
                    else
-                        exclusive_tests="$exclusive_tests|^$testcase$"
+                        exclusive_tests_non_parallel="$exclusive_tests_non_parallel|^$testcase$"
                    fi
                elif [[ "$is_multicard" != "" ]]; then
-                    if [[ "$multiple_card_tests" == "" ]]; then
+                    if [[ $(echo $cpu_parallel_job$tetrad_parallel_job$two_parallel_job | grep -o $testcase) != "" ]]; then
-                        multiple_card_tests="^$testcase$"
+                        multiple_card_tests_two_parallel="$multiple_card_tests_two_parallel|^$testcase$"
                    else
-                        multiple_card_tests="$multiple_card_tests|^$testcase$"
+                        multiple_card_tests_non_parallel="$multiple_card_tests_non_parallel|^$testcase$"
                    fi
                else
-                    if [[ $(echo $eight_parallel_job | grep $testcase) != "" ]]; then
+                    if [[ $(echo $cpu_parallel_job | grep -o $testcase) != "" ]]; then
-                        single_card_tests_eight_parallel="$single_card_tests_eight_parallel|^$testcase$"
+                        single_card_tests_high_parallel="$single_card_tests_high_parallel|^$testcase$"
-                    elif [[ $(echo $tetrad_parallel_jog | grep $testcase) != "" ]]; then
+                    elif [[ $(echo $tetrad_parallel_job$two_parallel_job | grep -o $testcase) != "" ]]; then
-                        single_card_tests_tetrad_parallel="$single_card_tests_tetrad_parallel|^$testcase$"
+                        single_card_tests_two_parallel="$single_card_tests_two_parallel|^$testcase$"
-                    elif [[ "${#single_card_tests_non_parallel_1}" -gt 10000 ]];then
-                        single_card_tests_non_parallel_2="$single_card_tests_non_parallel_2|^$testcase$"
                    else
-                        single_card_tests_non_parallel_1="$single_card_tests_non_parallel_1|^$testcase$"
+                        single_card_tests_non_parallel="$single_card_tests_non_parallel|^$testcase$"
                    fi
                    single_card_tests="$single_card_tests|^$testcase$"
                fi
@@ -1241,12 +1243,13 @@ set +x
                testcase=''
        done <<< "$test_cases";
-        card_test "$single_card_tests_eight_parallel" 1 8     # run cases 8 job each time with single GPU
+        card_test "$single_card_tests_high_parallel" 1 8        # run cases the most each time with single GPU
-        card_test "$single_card_tests_tetrad_parallel" 1 4    # run cases 4 job each time with single GPU
+        card_test "$single_card_tests_two_parallel" 1 2         # run cases 2 job each time with single GPU
-        card_test "$single_card_tests_non_parallel_1" 1       # run cases 1 job each time with single GPU
+        card_test "$single_card_tests_non_parallel" 1           # run cases 1 job each time with single GPU
-        card_test "$single_card_tests_non_parallel_2" 1       # run cases 1 job each time with single GPU
+        card_test "$multiple_card_tests_two_parallel" 2 2       # run cases 2 job each time with two GPUs
-        card_test "$multiple_card_tests" 2    # run cases with two GPUs
+        card_test "$multiple_card_tests_non_parallel" 2         # run cases 1 job each time with two GPUs
-        card_test "$exclusive_tests"          # run cases exclusively, in this cases would be run with 4/8 GPUs
+        card_test "$exclusive_tests_two_parallel" -1 2          # run cases exclusively, in this cases would be run with 2/4/8 GPUs
+        card_test "$exclusive_tests_non_parallel" -1            # run cases exclusively, in this cases would be run with 2/4/8 GPUs
        collect_failed_tests
        rm -f $tmp_dir/*
        exec_times=0
@@ -1319,7 +1322,7 @@ set +x
                        fi
                        if [[ "$exclusive_retry" != "" ]]; then
-                            card_test "$exclusive_retry"
+                            card_test "$exclusive_retry" -1
                        fi
                        exec_times=$[$exec_times+1]

--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -18,10 +18,10 @@ set(FLUID_CORE_NAME "core")
 if(WITH_AVX AND AVX_FOUND)
  set(FLUID_CORE_NAME "${FLUID_CORE_NAME}_avx")
  if(NOT DEFINED NOAVX_CORE_FILE OR NOAVX_CORE_FILE STREQUAL "")
-    message(STATUS "WARNING: This is just a warning for publishing release.
+    message(STATUS "MESSAGE: This is just a message for publishing release.
      You are building AVX version without NOAVX core.
      So the wheel package may fail on NOAVX machine.
-      You can add -DFLUID_CORE_NAME=/path/to/your/core_noavx.* in cmake command
+      You can add -DNOAVX_CORE_FILE=/path/to/your/core_noavx.* in cmake command
      to get a full wheel package to resolve this warning.
      While, this version will still work on local machine.")
  endif()

--- a/tools/parallel_UT_rule.py
+++ b/tools/parallel_UT_rule.py
--- a/tools/windows/run_unittests.sh
+++ b/tools/windows/run_unittests.sh
@@ -214,10 +214,8 @@ echo "Windows 1 card TestCases count is $num"
 if [ ${PRECISION_TEST:-OFF} == "ON" ]; then
    python ${PADDLE_ROOT}/tools/get_pr_ut.py
    if [[ -f "ut_list" ]]; then
-        set +x
        echo "PREC length: "`wc -l ut_list`
        precision_cases=`cat ut_list`
-        set -x
    fi
 fi
@@ -242,12 +240,11 @@ fi
 set -e
 output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}")
-eight_parallel_job=$(echo $output | cut -d ";" -f 1)
+cpu_parallel_job=$(echo $output | cut -d ";" -f 1)
-tetrad_parallel_jog=$(echo $output | cut -d ";" -f 2)
+tetrad_parallel_job=$(echo $output | cut -d ";" -f 2)
-non_parallel_job=$(echo $output | cut -d ";" -f 3)
+two_parallel_job=$(echo $output | cut -d ";" -f 3)
+non_parallel_job=$(echo $output | cut -d ";" -f 4)
-non_parallel_job_1=$(echo $non_parallel_job | cut -d "," -f 1)
-non_parallel_job_2=$(echo $non_parallel_job | cut -d "," -f 2)
 failed_test_lists=''
 tmp_dir=`mktemp -d`
@@ -270,10 +267,11 @@ function collect_failed_tests() {
 function run_unittest() {
    test_case=$1
    parallel_job=$2
+    parallel_level_base=${CTEST_PARALLEL_LEVEL:-1}
    if [ "$2" == "" ]; then
-        parallel_job=1
+        parallel_job=$parallel_level_base
    else
-        parallel_job=$2
+        parallel_job=`expr $2 \* $parallel_level_base`
    fi
    echo "************************************************************************"
    echo "********These unittests run $parallel_job job each time with 1 GPU**********"
@@ -336,7 +334,7 @@ function unittests_retry(){
 function show_ut_retry_result() {
    if [[ "$is_retry_execuate" != "0" ]];then
-        failed_test_lists_ult=`echo "${failed_test_lists}" | grep -Po '[^ ].*$'`
+        failed_test_lists_ult=`echo "${failed_test_lists}" | grep -o '[^ ].*$'`
        echo "========================================="
        echo "There are more than 10 failed unit tests, so no unit test retry!!!"
        echo "========================================="
@@ -349,7 +347,7 @@ function show_ut_retry_result() {
            echo "========================================"
            echo "There are failed tests, which have been successful after re-run:"
            echo "========================================"
-            echo "The following tests have been re-ran:"
+            echo "The following tests have been re-run:"
            echo "${retry_unittests_record}"
        else
            failed_ut_re=$(echo "${retry_unittests_record_judge}" | awk 'BEGIN{ all_str=""}{if (all_str==""){all_str=$1}else{all_str=all_str"|"$1}} END{print all_str}')
@@ -365,10 +363,10 @@ function show_ut_retry_result() {
 }
 set +e
-run_unittest $eight_parallel_job 8
+run_unittest $cpu_parallel_job 12
-run_unittest $tetrad_parallel_jog 4
+run_unittest $tetrad_parallel_job 4
-run_unittest $non_parallel_job_1
+run_unittest $two_parallel_job 2
-run_unittest $non_parallel_job_2
+run_unittest $non_parallel_job
 collect_failed_tests
 set -e
 rm -f $tmp_dir/*