specify the cuda arch name and bin to decrease the compile time for i… (#17020)

1. specify the cuda arch name and bin to decrease the compile time for inference test=develop 2. simplify the script and add comments 3. remove the fluid process from cicheck

specify the cuda arch name and bin to decrease the compile time for i… (#17020)
1. specify the cuda arch name and bin to decrease the compile time for inference test=develop 2. simplify the script and add comments 3. remove the fluid process from cicheck
f5d6937f · wopeizl · GitHub · f7caf7d4 · f5d6937f
隐藏空白更改
内联并排

Showing with 21 addition and 71 deletion

paddle/scripts/paddle_build.sh paddle/scripts/paddle_build.sh +21 -71

未找到文件。
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -194,6 +194,7 @@ function cmake_gen() {
        -DWITH_AVX=${WITH_AVX:-OFF}
        -DWITH_GOLANG=${WITH_GOLANG:-OFF}
        -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All}
+        -DCUDA_ARCH_BIN=${CUDA_ARCH_BIN} \
        -DWITH_PYTHON=${WITH_PYTHON:-ON}
        -DCUDNN_ROOT=/usr/
        -DWITH_TESTING=${WITH_TESTING:-ON}
@@ -228,6 +229,7 @@ EOF
        -DWITH_AVX=${WITH_AVX:-OFF} \
        -DWITH_GOLANG=${WITH_GOLANG:-OFF} \
        -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \
+        -DCUDA_ARCH_BIN=${CUDA_ARCH_BIN} \
        -DWITH_PYTHON=${WITH_PYTHON:-ON} \
        -DCUDNN_ROOT=/usr/ \
        -DWITH_TESTING=${WITH_TESTING:-ON} \
@@ -561,57 +563,6 @@ function bind_test() {
    wait
 }

-function parallel_test() {
-    mkdir -p ${PADDLE_ROOT}/build
-    cd ${PADDLE_ROOT}/build
-    if [ ${WITH_TESTING:-ON} == "ON" ] ; then
-    cat <<EOF
-    ========================================
-    Running unit tests ...
-    ========================================
-EOF
-
-        # calculate and set the memory usage for each process
-        # MEM_USAGE=$(printf "%.2f" `echo "scale=5; 1.0 / $NUM_PROC" | bc`)
-        # export FLAGS_fraction_of_gpu_memory_to_use=$MEM_USAGE
-
-        EXIT_CODE=0;
-        pids=()
-
-        # get the CUDA device count
-        CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l)
-        # each test case would occupy two graph cards
-        NUM_PROC=$[CUDA_DEVICE_COUNT/2]
-        for (( i = 0; i < $NUM_PROC; i++ )); do
-            # CUDA_VISIBLE_DEVICES http://acceleware.com/blog/cudavisibledevices-masking-gpus
-            # ctest -I https://cmake.org/cmake/help/v3.0/manual/ctest.1.html?highlight=ctest
-            if [ ${TESTING_DEBUG_MODE:-OFF} == "ON" ] ; then
-                env CUDA_VISIBLE_DEVICES=$[i*2],$[i*2+1] ctest -I $i,,$NUM_PROC -V &
-                pids+=($!)
-            else
-                env CUDA_VISIBLE_DEVICES=$[i*2],$[i*2+1] ctest -I $i,,$NUM_PROC --output-on-failure &
-                pids+=($!)
-            fi
-        done
-
-        clen=`expr "${#pids[@]}" - 1` # get length of commands - 1
-        for i in `seq 0 "$clen"`; do
-            wait ${pids[$i]}
-            CODE=$?
-            if [[ "${CODE}" != "0" ]]; then
-                echo "At least one test failed with exit code => ${CODE}" ;
-                EXIT_CODE=1;
-            fi
-        done
-        wait; # wait for all subshells to finish
-
-        echo "EXIT_CODE => $EXIT_CODE"
-        if [[ "${EXIT_CODE}" != "0" ]]; then
-            exit "$EXIT_CODE"
-        fi
-    fi
-}
-
 EXIT_CODE=0;
 function caught_error() {
 for job in `jobs -p`; do
@@ -657,7 +608,6 @@ function card_test() {
                    cuda_list="$cuda_list,$[i*cardnumber+j]"
            fi
        done
-        # echo $cuda_list
        if [ ${TESTING_DEBUG_MODE:-OFF} == "ON" ] ; then
            if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then
                ctest -I $i,,$NUM_PROC -R "($testcases)" -V &
@@ -675,26 +625,27 @@ function card_test() {
    done

    wait; # wait for all subshells to finish
+    set +m
 }

-function aggresive_test() {
+function parallel_test() {
    mkdir -p ${PADDLE_ROOT}/build
    cd ${PADDLE_ROOT}/build
    if [ ${WITH_TESTING:-ON} == "ON" ] ; then
    cat <<EOF
    ========================================
-    Running unit tests ...
+    Running unit tests in parallel way ...
    ========================================
 EOF

 set +x
        EXIT_CODE=0;
-        test_cases=$(ctest -N -V)
-        exclusive_tests=''
-        single_card_tests=''
-        multiple_card_tests=''
-        is_exclusive=''
-        is_multicard=''
+        test_cases=$(ctest -N -V) # get all test cases
+        exclusive_tests=''        # cases list which would be run exclusively
+        single_card_tests=''      # cases list which would take one graph card
+        multiple_card_tests=''    # cases list which would take multiple GPUs, most cases would be two GPUs
+        is_exclusive=''           # indicate whether the case is exclusive type
+        is_multicard=''           # indicate whether the case is multiple GPUs type
        while read -r line; do
            if [[ "$line" == "" ]]; then
                continue
@@ -703,7 +654,7 @@ set +x
                if [[ "$matchstr" == "" ]]; then
                    # Any test case with LABELS property would be parse here
                    # RUN_TYPE=EXCLUSIVE mean the case would run exclusively
-                    # RUN_TYPE=DIST mean the case would take two graph cards during runtime
+                    # RUN_TYPE=DIST mean the case would take two graph GPUs during runtime
                    read is_exclusive <<< $(echo "$line"|grep -oEi "RUN_TYPE=EXCLUSIVE")
                    read is_multicard <<< $(echo "$line"|grep -oEi "RUN_TYPE=DIST")
                    continue
@@ -711,7 +662,7 @@ set +x
                read testcase <<< $(echo "$line"|grep -oEi "\w+$")

                if [[ "$is_multicard" == "" ]]; then
-                  # trick: treat all test case with prefix "test_dist" as dist case, and would run on 2 cards
+                  # trick: treat all test case with prefix "test_dist" as dist case, and would run on 2 GPUs
                  read is_multicard <<< $(echo "$testcase"|grep -oEi "test_dist")
                fi

@@ -740,9 +691,9 @@ set +x
                testcase=''
        done <<< "$test_cases";

-        card_test "$single_card_tests" 1
-        card_test "$multiple_card_tests" 2
-        card_test "$exclusive_tests"
+        card_test "$single_card_tests" 1    # run cases with single GPU
+        card_test "$multiple_card_tests" 2  # run cases with two GPUs
+        card_test "$exclusive_tests"        # run cases exclusively, in this cases would be run with 4/8 GPUs
        if [[ "$EXIT_CODE" != "0" ]]; then
            exit 1;
        fi
@@ -942,7 +893,8 @@ EOF
    if [[ "$1" != "" ]]; then
      parallel_number=$1
    fi
-    cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON
+    cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-Auto} -DCUDA_ARCH_BIN=${CUDA_ARCH_BIN}
+
    make -j ${parallel_number} fluid_lib_dist
    make -j ${parallel_number} inference_lib_dist
 }
@@ -994,7 +946,7 @@ function main() {
        gen_dockerfile ${PYTHON_ABI:-""}
        ;;
      test)
-        aggresive_test
+        parallel_test
        ;;
      single_test)
        single_test $2
@@ -1024,9 +976,7 @@ function main() {
        cmake_gen ${PYTHON_ABI:-""}
        build ${parallel_number}
        assert_api_not_changed ${PYTHON_ABI:-""}
-        aggresive_test
-        gen_fluid_lib ${parallel_number}
-        test_fluid_lib
+        parallel_test
        assert_api_spec_approvals
        ;;
      cicheck_brpc)
@@ -1057,7 +1007,7 @@ function main() {
      cicheck_py35)
        cmake_gen ${PYTHON_ABI:-""}
        build ${parallel_number}
-        aggresive_test
+        parallel_test
        assert_api_not_changed ${PYTHON_ABI:-""}
        ;;
      cmake_gen)