未验证 提交 f5d6937f 编写于 作者: W wopeizl 提交者: GitHub

specify the cuda arch name and bin to decrease the compile time for i… (#17020)

1. specify the cuda arch name and bin to decrease the compile time for inference test=develop
2. simplify the script and add comments
3. remove the fluid process from cicheck
上级 f7caf7d4
...@@ -194,6 +194,7 @@ function cmake_gen() { ...@@ -194,6 +194,7 @@ function cmake_gen() {
-DWITH_AVX=${WITH_AVX:-OFF} -DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-OFF}
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All}
-DCUDA_ARCH_BIN=${CUDA_ARCH_BIN} \
-DWITH_PYTHON=${WITH_PYTHON:-ON} -DWITH_PYTHON=${WITH_PYTHON:-ON}
-DCUDNN_ROOT=/usr/ -DCUDNN_ROOT=/usr/
-DWITH_TESTING=${WITH_TESTING:-ON} -DWITH_TESTING=${WITH_TESTING:-ON}
...@@ -228,6 +229,7 @@ EOF ...@@ -228,6 +229,7 @@ EOF
-DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-OFF} \
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \ -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \
-DCUDA_ARCH_BIN=${CUDA_ARCH_BIN} \
-DWITH_PYTHON=${WITH_PYTHON:-ON} \ -DWITH_PYTHON=${WITH_PYTHON:-ON} \
-DCUDNN_ROOT=/usr/ \ -DCUDNN_ROOT=/usr/ \
-DWITH_TESTING=${WITH_TESTING:-ON} \ -DWITH_TESTING=${WITH_TESTING:-ON} \
...@@ -561,57 +563,6 @@ function bind_test() { ...@@ -561,57 +563,6 @@ function bind_test() {
wait wait
} }
function parallel_test() {
mkdir -p ${PADDLE_ROOT}/build
cd ${PADDLE_ROOT}/build
if [ ${WITH_TESTING:-ON} == "ON" ] ; then
cat <<EOF
========================================
Running unit tests ...
========================================
EOF
# calculate and set the memory usage for each process
# MEM_USAGE=$(printf "%.2f" `echo "scale=5; 1.0 / $NUM_PROC" | bc`)
# export FLAGS_fraction_of_gpu_memory_to_use=$MEM_USAGE
EXIT_CODE=0;
pids=()
# get the CUDA device count
CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l)
# each test case would occupy two graph cards
NUM_PROC=$[CUDA_DEVICE_COUNT/2]
for (( i = 0; i < $NUM_PROC; i++ )); do
# CUDA_VISIBLE_DEVICES http://acceleware.com/blog/cudavisibledevices-masking-gpus
# ctest -I https://cmake.org/cmake/help/v3.0/manual/ctest.1.html?highlight=ctest
if [ ${TESTING_DEBUG_MODE:-OFF} == "ON" ] ; then
env CUDA_VISIBLE_DEVICES=$[i*2],$[i*2+1] ctest -I $i,,$NUM_PROC -V &
pids+=($!)
else
env CUDA_VISIBLE_DEVICES=$[i*2],$[i*2+1] ctest -I $i,,$NUM_PROC --output-on-failure &
pids+=($!)
fi
done
clen=`expr "${#pids[@]}" - 1` # get length of commands - 1
for i in `seq 0 "$clen"`; do
wait ${pids[$i]}
CODE=$?
if [[ "${CODE}" != "0" ]]; then
echo "At least one test failed with exit code => ${CODE}" ;
EXIT_CODE=1;
fi
done
wait; # wait for all subshells to finish
echo "EXIT_CODE => $EXIT_CODE"
if [[ "${EXIT_CODE}" != "0" ]]; then
exit "$EXIT_CODE"
fi
fi
}
EXIT_CODE=0; EXIT_CODE=0;
function caught_error() { function caught_error() {
for job in `jobs -p`; do for job in `jobs -p`; do
...@@ -657,7 +608,6 @@ function card_test() { ...@@ -657,7 +608,6 @@ function card_test() {
cuda_list="$cuda_list,$[i*cardnumber+j]" cuda_list="$cuda_list,$[i*cardnumber+j]"
fi fi
done done
# echo $cuda_list
if [ ${TESTING_DEBUG_MODE:-OFF} == "ON" ] ; then if [ ${TESTING_DEBUG_MODE:-OFF} == "ON" ] ; then
if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then
ctest -I $i,,$NUM_PROC -R "($testcases)" -V & ctest -I $i,,$NUM_PROC -R "($testcases)" -V &
...@@ -675,26 +625,27 @@ function card_test() { ...@@ -675,26 +625,27 @@ function card_test() {
done done
wait; # wait for all subshells to finish wait; # wait for all subshells to finish
set +m
} }
function aggresive_test() { function parallel_test() {
mkdir -p ${PADDLE_ROOT}/build mkdir -p ${PADDLE_ROOT}/build
cd ${PADDLE_ROOT}/build cd ${PADDLE_ROOT}/build
if [ ${WITH_TESTING:-ON} == "ON" ] ; then if [ ${WITH_TESTING:-ON} == "ON" ] ; then
cat <<EOF cat <<EOF
======================================== ========================================
Running unit tests ... Running unit tests in parallel way ...
======================================== ========================================
EOF EOF
set +x set +x
EXIT_CODE=0; EXIT_CODE=0;
test_cases=$(ctest -N -V) test_cases=$(ctest -N -V) # get all test cases
exclusive_tests='' exclusive_tests='' # cases list which would be run exclusively
single_card_tests='' single_card_tests='' # cases list which would take one graph card
multiple_card_tests='' multiple_card_tests='' # cases list which would take multiple GPUs, most cases would be two GPUs
is_exclusive='' is_exclusive='' # indicate whether the case is exclusive type
is_multicard='' is_multicard='' # indicate whether the case is multiple GPUs type
while read -r line; do while read -r line; do
if [[ "$line" == "" ]]; then if [[ "$line" == "" ]]; then
continue continue
...@@ -703,7 +654,7 @@ set +x ...@@ -703,7 +654,7 @@ set +x
if [[ "$matchstr" == "" ]]; then if [[ "$matchstr" == "" ]]; then
# Any test case with LABELS property would be parse here # Any test case with LABELS property would be parse here
# RUN_TYPE=EXCLUSIVE mean the case would run exclusively # RUN_TYPE=EXCLUSIVE mean the case would run exclusively
# RUN_TYPE=DIST mean the case would take two graph cards during runtime # RUN_TYPE=DIST mean the case would take two graph GPUs during runtime
read is_exclusive <<< $(echo "$line"|grep -oEi "RUN_TYPE=EXCLUSIVE") read is_exclusive <<< $(echo "$line"|grep -oEi "RUN_TYPE=EXCLUSIVE")
read is_multicard <<< $(echo "$line"|grep -oEi "RUN_TYPE=DIST") read is_multicard <<< $(echo "$line"|grep -oEi "RUN_TYPE=DIST")
continue continue
...@@ -711,7 +662,7 @@ set +x ...@@ -711,7 +662,7 @@ set +x
read testcase <<< $(echo "$line"|grep -oEi "\w+$") read testcase <<< $(echo "$line"|grep -oEi "\w+$")
if [[ "$is_multicard" == "" ]]; then if [[ "$is_multicard" == "" ]]; then
# trick: treat all test case with prefix "test_dist" as dist case, and would run on 2 cards # trick: treat all test case with prefix "test_dist" as dist case, and would run on 2 GPUs
read is_multicard <<< $(echo "$testcase"|grep -oEi "test_dist") read is_multicard <<< $(echo "$testcase"|grep -oEi "test_dist")
fi fi
...@@ -740,9 +691,9 @@ set +x ...@@ -740,9 +691,9 @@ set +x
testcase='' testcase=''
done <<< "$test_cases"; done <<< "$test_cases";
card_test "$single_card_tests" 1 card_test "$single_card_tests" 1 # run cases with single GPU
card_test "$multiple_card_tests" 2 card_test "$multiple_card_tests" 2 # run cases with two GPUs
card_test "$exclusive_tests" card_test "$exclusive_tests" # run cases exclusively, in this cases would be run with 4/8 GPUs
if [[ "$EXIT_CODE" != "0" ]]; then if [[ "$EXIT_CODE" != "0" ]]; then
exit 1; exit 1;
fi fi
...@@ -942,7 +893,8 @@ EOF ...@@ -942,7 +893,8 @@ EOF
if [[ "$1" != "" ]]; then if [[ "$1" != "" ]]; then
parallel_number=$1 parallel_number=$1
fi fi
cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-Auto} -DCUDA_ARCH_BIN=${CUDA_ARCH_BIN}
make -j ${parallel_number} fluid_lib_dist make -j ${parallel_number} fluid_lib_dist
make -j ${parallel_number} inference_lib_dist make -j ${parallel_number} inference_lib_dist
} }
...@@ -994,7 +946,7 @@ function main() { ...@@ -994,7 +946,7 @@ function main() {
gen_dockerfile ${PYTHON_ABI:-""} gen_dockerfile ${PYTHON_ABI:-""}
;; ;;
test) test)
aggresive_test parallel_test
;; ;;
single_test) single_test)
single_test $2 single_test $2
...@@ -1024,9 +976,7 @@ function main() { ...@@ -1024,9 +976,7 @@ function main() {
cmake_gen ${PYTHON_ABI:-""} cmake_gen ${PYTHON_ABI:-""}
build ${parallel_number} build ${parallel_number}
assert_api_not_changed ${PYTHON_ABI:-""} assert_api_not_changed ${PYTHON_ABI:-""}
aggresive_test parallel_test
gen_fluid_lib ${parallel_number}
test_fluid_lib
assert_api_spec_approvals assert_api_spec_approvals
;; ;;
cicheck_brpc) cicheck_brpc)
...@@ -1057,7 +1007,7 @@ function main() { ...@@ -1057,7 +1007,7 @@ function main() {
cicheck_py35) cicheck_py35)
cmake_gen ${PYTHON_ABI:-""} cmake_gen ${PYTHON_ABI:-""}
build ${parallel_number} build ${parallel_number}
aggresive_test parallel_test
assert_api_not_changed ${PYTHON_ABI:-""} assert_api_not_changed ${PYTHON_ABI:-""}
;; ;;
cmake_gen) cmake_gen)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册