未验证 提交 71f247b1 编写于 作者: Y YUNSHEN XIE 提交者: GitHub

run infer ut in A10 (#48535)

* run infer ut in A10

* 增加cuda11.2-cudnn8-trt8.4镜像

* add paddle_coverage_new.sh
上级 6f0ae156
......@@ -1326,6 +1326,17 @@ function card_test() {
cardnumber=$2
parallel_level_base=${CTEST_PARALLEL_LEVEL:-1}
# run ut based on the label
if [[ "${UT_RUN_TYPE_SETTING}" == "INFER" ]];then
run_label_mode="-L (RUN_TYPE=INFER)"
elif [[ "${UT_RUN_TYPE_SETTING}" == "DIST" ]];then
run_label_mode="-L (RUN_TYPE=DIST|RUN_TYPE=EXCLUSIVE)"
elif [[ "${UT_RUN_TYPE_SETTING}" == "WITHOUT_INFER" ]];then
run_label_mode="-LE (RUN_TYPE=INFER)"
elif [[ "${UT_RUN_TYPE_SETTING}" == "OTHER" ]];then
run_label_mode="-LE (RUN_TYPE=INFER|RUN_TYPE=DIST|RUN_TYPE=EXCLUSIVE)"
fi
# get the CUDA device count, XPU device count is one
if [ "${WITH_XPU}" == "ON" ];then
CUDA_DEVICE_COUNT=1
......@@ -1375,15 +1386,15 @@ function card_test() {
tmpfile=$tmp_dir/$tmpfile_rand"_"$i
if [ ${TESTING_DEBUG_MODE:-OFF} == "ON" ] ; then
if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then
(ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" -V --timeout 120 -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
(ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} -V --timeout 120 -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
else
(env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 -V -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
(env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 -V -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
fi
else
if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then
(ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
(ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
else
(env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
(env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
fi
fi
done
......@@ -2364,7 +2375,7 @@ set +x
if [[ "${failed_test_lists}" == "" ]];then
break
else
retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
retry_unittests=$( echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
fi
fi
echo "========================================="
......@@ -2687,10 +2698,10 @@ set +x
if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then
bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest
fi
read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
need_retry_ut_str=$(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
while ( [ $exec_times -lt $retry_time ] )
do
if [[ "${exec_times}" == "0" ]] ;then
......@@ -2700,7 +2711,7 @@ set +x
is_retry_execuate=1
fi
elif [[ "${exec_times}" == "1" ]] ;then
read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
need_retry_ut_str=$(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
......@@ -2718,7 +2729,7 @@ set +x
if [[ "${failed_test_lists}" == "" ]];then
break
else
read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
fi
fi
echo "========================================="
......
#!/usr/bin/env bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -xe
PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )"
function lcov_init(){
# install lcov
if [ ! -f "/root/.cache/lcov-1.14.tar.gz" ];then
wget -P /home https://paddle-ci.gz.bcebos.com/coverage/lcov-1.14.tar.gz --no-proxy --no-check-certificate || exit 101
cp /home/lcov-1.14.tar.gz /root/.cache/lcov-1.14.tar.gz
else
cp /root/.cache/lcov-1.14.tar.gz /home/lcov-1.14.tar.gz
fi
tar -xf /home/lcov-1.14.tar.gz -C /
cd /lcov-1.14
make install
}
function gen_cpp_covinfo(){
# run paddle coverage
cd /paddle/build
python3.7 ${PADDLE_ROOT}/tools/coverage/gcda_clean.py ${GIT_PR_ID} || exit 101
lcov --capture -d ./ -o coverage.info --rc lcov_branch_coverage=0
}
# full html report
function gen_full_html_report() {
lcov --extract coverage.info \
'/paddle/paddle/fluid/framework/*' \
'/paddle/paddle/fluid/imperative/*' \
'/paddle/paddle/fluid/inference/*' \
'/paddle/paddle/fluid/memory/*' \
'/paddle/paddle/fluid/operators/*' \
'/paddle/paddle/fluid/recordio/*' \
'/paddle/paddle/fluid/string/*' \
'/paddle/paddle/fluid/eager/*' \
'/paddle/paddle/phi/*' \
'/paddle/paddle/utils/*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f coverage-full.tmp coverage-full.info
lcov --remove coverage-full.info \
'/paddle/paddle/fluid/framework/*_test*' \
'/paddle/paddle/fluid/*/*test*' \
'/paddle/paddle/fluid/*/*/*test*' \
'/paddle/paddle/fluid/inference/tests/*' \
'/paddle/paddle/fluid/inference/api/demo_ci/*' \
'/paddle/paddle/fluid/eager/tests/*' \
'/paddle/paddle/phi/tests/*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f coverage-full.tmp coverage-full.info
}
function gen_full_html_report_xpu() {
lcov --extract coverage.info \
'/paddle/paddle/fluid/operators/*xpu*' \
'/paddle/paddle/phi/kernels/xpu/*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f coverage-full.tmp coverage-full.info
lcov --remove coverage-full.info \
'/paddle/paddle/fluid/framework/*_test*' \
'/paddle/paddle/fluid/*/*test*' \
'/paddle/paddle/fluid/*/*/*test*' \
'/paddle/paddle/fluid/inference/tests/*' \
'/paddle/paddle/fluid/inference/api/demo_ci/*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f coverage-full.tmp coverage-full.info
}
function gen_full_html_report_npu() {
lcov --extract coverage.info \
'/paddle/paddle/fluid/operators/*npu*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f coverage-full.tmp coverage-full.info
lcov --remove coverage-full.info \
'/paddle/paddle/fluid/framework/*_test*' \
'/paddle/paddle/fluid/*/*test*' \
'/paddle/paddle/fluid/*/*/*test*' \
'/paddle/paddle/fluid/inference/tests/*' \
'/paddle/paddle/fluid/inference/api/demo_ci/*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f coverage-full.tmp coverage-full.info
}
# if [ ${WITH_XPU:-OFF} == "ON" ]; then
# gen_full_html_report_xpu || true
# elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
# gen_full_html_report_npu || true
# else
# gen_full_html_report || true
# fi
# diff html report
function gen_diff_html_report() {
if [ "${GIT_PR_ID}" != "" ]; then
COVERAGE_DIFF_PATTERN="`python3.7 ${PADDLE_ROOT}/tools/coverage/pull_request.py files ${GIT_PR_ID}`"
python3.7 ${PADDLE_ROOT}/tools/coverage/pull_request.py diff ${GIT_PR_ID} > git-diff.out
fi
lcov --extract coverage-full.info \
${COVERAGE_DIFF_PATTERN} \
-o coverage-diff.info \
--rc lcov_branch_coverage=0
python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_diff.py coverage-diff.info git-diff.out > coverage-diff.tmp
mv -f coverage-diff.tmp coverage-diff.info
genhtml -o coverage-diff -t 'Diff Coverage' --no-function-coverage --no-branch-coverage coverage-diff.info
}
# gen_diff_html_report || true
function gen_py_covinfo(){
# python coverage
export COVERAGE_FILE=/paddle/build/python-coverage.data
coverage combine `$(ls python-coverage.data.*)` || NO_PYTHON_COVERAGE_DATA=1
`$(coverage xml -i -o python-coverage.xml)` || [[ "${NO_PYTHON_COVERAGE_DATA}" == "1" ]]
sed -i 's/mnt\/paddle/paddle/g' python-coverage.xml
`$(python ${PADDLE_ROOT}/tools/coverage/python_coverage.py > python-coverage.info)` || [[ "${NO_PYTHON_COVERAGE_DATA}" == "1" ]]
}
# python full html report
#
function gen_python_full_html_report() {
lcov --extract python-coverage.info \
'/paddle/python/*' \
-o python-coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f python-coverage-full.tmp python-coverage-full.info
lcov --remove python-coverage-full.info \
'/*/tests/*' \
-o python-coverage-full.tmp \
--rc lcov_branch_coverage=0
mv -f python-coverage-full.tmp python-coverage-full.info
}
# gen_python_full_html_report || true
# python diff html report
function gen_python_diff_html_report() {
if [ "${GIT_PR_ID}" != "" ]; then
COVERAGE_DIFF_PATTERN="`python3.7 ${PADDLE_ROOT}/tools/coverage/pull_request.py files ${GIT_PR_ID}`"
python3.7 ${PADDLE_ROOT}/tools/coverage/pull_request.py diff ${GIT_PR_ID} > python-git-diff.out
fi
lcov --extract python-coverage-full.info \
${COVERAGE_DIFF_PATTERN} \
-o python-coverage-diff.info \
--rc lcov_branch_coverage=0
python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_diff.py python-coverage-diff.info python-git-diff.out > python-coverage-diff.tmp
mv -f python-coverage-diff.tmp python-coverage-diff.info
genhtml -o python-coverage-diff \
-t 'Python Diff Coverage' \
--no-function-coverage \
--no-branch-coverage \
--ignore-errors source \
python-coverage-diff.info
}
# gen_python_diff_html_report || true
# assert coverage lines
function covinfo_combine_full(){
if [ -f "other-coverage.info" ];then
if [ -f "infer-coverage.info" ];then
lcov -a other-coverage.info -a infer-coverage.info -o coverage.info
else
mv other-coverage.info coverage.info
fi
elif [ -f "infer-coverage.info" ];then
mv infer-coverage.info coverage.info
else
echo "Cannot found coverage.info"
fi
if [ -f "other-python-coverage-full.info" ];then
if [ -f "infer-python-coverage-full.info" ];then
lcov -a other-python-coverage-full.info -a infer-python-coverage-full.info -o python-coverage-full.info
else
mv other-python-coverage-full.info python-coverage-full.info
fi
elif [ -f "infer-coverage.info" ];then
mv infer-python-coverage-full.info python-coverage-full.info
else
echo "Cannot found python coverage.info"
fi
}
function cov_rate_judge(){
echo "Assert CPP Diff Coverage"
python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_lines.py coverage-diff.info 0.9 || COVERAGE_LINES_ASSERT=1
echo "Assert Python Diff Coverage"
if [ ${WITH_XPU:-OFF} == "ON" ]; then
echo "XPU has no python coverage!"
elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
echo "NPU has no python coverage!"
else
if [[ python-coverage-diff.info ]];then
python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_lines.py python-coverage-diff.info 0.9 || PYTHON_COVERAGE_LINES_ASSERT=1
fi
fi
if [ "$COVERAGE_LINES_ASSERT" = "1" ] || [ "$PYTHON_COVERAGE_LINES_ASSERT" = "1" ]; then
echo "exit 9" > /tmp/paddle_coverage.result
exit 9
fi
}
function print_usage() {
echo -e "\n${RED}Usage${NONE}:
${BOLD}${SCRIPT_NAME}${NONE} [OPTION]"
echo -e "\n${RED}Options${NONE}:
${BLUE}gen_cov_info${NONE}: generate coverage info
${BLUE}test${NONE}: coverage info combine
"
}
function main () {
local CMD=$1
lcov_init
case $CMD in
gen_cov_info)
gen_cpp_covinfo
gen_py_covinfo
;;
combine_cov_info)
covinfo_combine_full
gen_diff_html_report
gen_python_diff_html_report
cov_rate_judge
;;
*)
print_usage
exit 1
;;
esac
}
main $@
......@@ -36,6 +36,11 @@ elif [[ "$1" == "trt8406" ]];then
tar -zxf TensorRT-8.4.0.6.Linux.x86_64-gnu.cuda-11.6.cudnn8.3.tar.gz -C /usr/local
cp -rf /usr/local/TensorRT-8.4.0.6/include/* /usr/include/ && cp -rf /usr/local/TensorRT-8.4.0.6/lib/* /usr/lib/
rm -f TensorRT-8.4.0.6.Linux.x86_64-gnu.cuda-11.6.cudnn8.3.tar.gz
elif [[ "$1" == "trt8431" ]];then
wget -q https://paddle-ci.gz.bcebos.com/TRT/TensorRT-8.4.3.1.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz --no-check-certificate --no-proxy
tar -zxf TensorRT-8.4.3.1.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz -C /usr/local
cp -rf /usr/local/TensorRT-8.4.3.1/include/* /usr/include/ && cp -rf /usr/local/TensorRT-8.4.3.1/lib/* /usr/lib/
rm -f TensorRT-8.4.3.1.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
elif [[ "$VERSION" == "11.2" ]];then
wget -q --no-proxy https://paddle-ci.gz.bcebos.com/TRT/TensorRT7-cuda11.1-cudnn8.1.tar.gz --no-check-certificate
tar -zxf TensorRT7-cuda11.1-cudnn8.1.tar.gz -C /usr/local
......
......@@ -166,6 +166,33 @@ function make_unbuntu18_cu117_dockerfile(){
sed -i 's# && rm /etc/apt/sources.list.d/nvidia-ml.list##g' ${dockerfile_name}
}
function make_ubuntu18_cu112_dockerfile(){
dockerfile_name="Dockerfile.cuda11.2_cudnn8.1_trt8.4_gcc8.2_ubuntu18"
sed "s#<baseimg>#nvidia/cuda:11.2.0-cudnn8-devel-ubuntu18.04#g" ./Dockerfile.ubuntu18 >${dockerfile_name}
sed -i "s#<setcuda>#ENV LD_LIBRARY_PATH=/usr/local/cuda-11.2/targets/x86_64-linux/lib:\$LD_LIBRARY_PATH #g" ${dockerfile_name}
sed -i "s#liblzma-dev#liblzma-dev openmpi-bin openmpi-doc libopenmpi-dev#g" ${dockerfile_name}
dockerfile_line=$(wc -l ${dockerfile_name}|awk '{print $1}')
sed -i 's#RUN bash /build_scripts/install_trt.sh#RUN bash /build_scripts/install_trt.sh trt8431#g' ${dockerfile_name}
sed -i "${dockerfile_line}i RUN wget --no-check-certificate -q https://paddle-edl.bj.bcebos.com/hadoop-2.7.7.tar.gz \&\& \
tar -xzf hadoop-2.7.7.tar.gz && mv hadoop-2.7.7 /usr/local/" ${dockerfile_name}
sed -i "${dockerfile_line}i RUN apt remove git -y \&\& apt install -y libsndfile1 zstd pigz libcurl4-openssl-dev gettext zstd ninja-build \&\& wget -q https://paddle-ci.gz.bcebos.com/git-2.17.1.tar.gz \&\& \
tar -xvf git-2.17.1.tar.gz \&\& \
cd git-2.17.1 \&\& \
./configure --with-openssl --with-curl --prefix=/usr/local \&\& \
make -j8 \&\& make install " ${dockerfile_name}
sed -i "${dockerfile_line}i RUN pip install wheel \&\& pip3 install PyGithub wheel \&\& pip3.7 install PyGithub distro \&\& pip3.8 install PyGithub distro" ${dockerfile_name}
sed -i 's#<install_cpu_package>##g' ${dockerfile_name}
sed -i "s#<install_gcc>#WORKDIR /usr/bin \\
COPY tools/dockerfile/build_scripts /build_scripts \\
RUN bash /build_scripts/install_gcc.sh gcc82 \&\& rm -rf /build_scripts \\
RUN cp gcc gcc.bak \&\& cp g++ g++.bak \&\& rm gcc \&\& rm g++ \\
RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc \\
RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ \\
RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc \\
RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++ \\
ENV PATH=/usr/local/gcc-8.2/bin:\$PATH #g" ${dockerfile_name}
}
function main() {
make_ubuntu_dockerfile
make_ubuntu_trt7_dockerfile
......@@ -173,6 +200,7 @@ function main() {
make_cinn_dockerfile
make_ce_framework_dockcerfile
make_unbuntu18_cu117_dockerfile
make_ubuntu18_cu112_dockerfile
}
main "$@"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册