From 8fb17fc72b228b7916c0aaab43a0537f3d4e20f0 Mon Sep 17 00:00:00 2001 From: tianshuo78520a <707759223@qq.com> Date: Mon, 16 Aug 2021 10:41:29 +0800 Subject: [PATCH] Op-benchmark CI cpu and gpu (#34631) * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;test=op_benchmark * notest;test=op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;test=op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * fix * fix --- tools/test_ci_op_benchmark.sh | 314 ++++++++++++++++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 tools/test_ci_op_benchmark.sh diff --git a/tools/test_ci_op_benchmark.sh b/tools/test_ci_op_benchmark.sh new file mode 100644 index 00000000000..ff1024ba948 --- /dev/null +++ b/tools/test_ci_op_benchmark.sh @@ -0,0 +1,314 @@ +#!/bin/bash + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set +ex + +[ -z "$PADDLE_ROOT" ] && PADDLE_ROOT=$(cd $(dirname ${BASH_SOURCE[0]})/.. && pwd) + +# PR modify op source files +CHANGE_OP_FILES=() + +# ops that will run benchmark test +declare -A CHANGE_OP_MAP + +# ops that benchmark repo has +declare -A BENCHMARK_OP_MAP + +# searched header files +declare -A INCLUDE_SEARCH_MAP + +function LOG { + echo "[$0:${BASH_LINENO[0]}] $*" >&2 +} + +# Limit cu file directory +function match_cu_file_directory { + local sub_dir cu_file_dir + cu_file_dir=$(dirname ${1}) + for sub_dir in "" "/elementwise" "/reduce_ops" + do + [ "${cu_file_dir}" == "paddle/fluid/operators${sub_dir}" ] && return 0 + done + return 1 +} + +# Load op files by header file +function load_CHANGE_OP_FILES_by_header_file { + local change_file + for change_file in $(grep -rl "${1}" paddle/fluid/operators) + do + if [[ "$change_file" =~ "_op.cu" ]] + then + # match cu file directory limit + match_cu_file_directory $change_file || continue + LOG "[INFO] Found \"${1}\" include by \"${change_file}\"." + CHANGE_OP_FILES[${#CHANGE_OP_FILES[@]}]="$change_file" + elif [[ "$change_file" =~ ".h" ]] + then + [ -n "${INCLUDE_SEARCH_MAP[$change_file]}" ] && continue + LOG "[INFO] Found \"${1}\" include by \"${change_file}\", keep searching." + INCLUDE_SEARCH_MAP[$change_file]="searched" + load_CHANGE_OP_FILES_by_header_file $change_file + fi + done +} + +# Load op files that PR changes +function load_CHANGE_OP_FILES { + local sub_dir change_file + # TODO(Avin0323): Need to filter the files added by the new OP. + for change_file in $(git diff --name-only origin/develop) + do + # match directory limit + [[ "$change_file" =~ "paddle/fluid/operators/" ]] || continue + # match file name limit + if [[ "$change_file" =~ "_op.cu" ]] + then + # match cu file directory limit + match_cu_file_directory $change_file || continue + LOG "[INFO] Found \"${change_file}\" changed." + CHANGE_OP_FILES[${#CHANGE_OP_FILES[@]}]="$change_file" + elif [[ "$change_file" =~ ".h" ]] + then + LOG "[INFO] Found \"${change_file}\" changed, keep searching." + INCLUDE_SEARCH_MAP[${change_file}]="searched" + load_CHANGE_OP_FILES_by_header_file $change_file + fi + done + [ ${#CHANGE_OP_FILES[@]} -eq 0 ] && LOG "[INFO] No op to test, skip this ci." && \ + echo "cpu_benchmark=ON" >${cfs_dir}/op_benchmark/${AGILE_PULL_ID}/${AGILE_REVISION}/pass.txt && \ + exit 0 +} + +# Clone benchmark repo +function prepare_benchmark_environment { + LOG "[INFO] Clone benchmark repo ..." + git clone https://github.com/PaddlePaddle/benchmark.git + [ $? -ne 0 ] && LOG "[FATAL] Clone benchmark repo fail." && exit -1 + LOG "[INFO] Collect api info ..." + python benchmark/api/deploy/collect_api_info.py \ + --test_module_name tests_v2 \ + --info_file api_info.txt >& 2 + [ $? -ne 0 ] && LOG "[FATAL] Collect api info fail." && exit -1 + [ ! -f benchmark/ci/scripts/op_benchmark.config ] && LOG "[FATAL] Missing op_benchmark.config!" && exit -1 +} + +# Load unique op name from CHANGE_OP_FILES +function load_CHANGE_OP_MAP { + local op_name change_file change_file_name + source benchmark/ci/scripts/op_benchmark.config + for change_file in ${CHANGE_OP_FILES[@]} + do + change_file_name=${change_file#*paddle/fluid/operators/} + if [ -n "${PADDLE_FILENAME_OP_MAP[$change_file_name]}" ] + then + for op_name in ${PADDLE_FILENAME_OP_MAP[$change_file_name]} + do + LOG "[INFO] Load op: \"${op_name}\"." + CHANGE_OP_MAP[${op_name}]="$change_file" + done + else + op_name=${change_file_name##*/} + op_name=${op_name%_cudnn_op*} + op_name=${op_name%_op*} + [ -n "${SKIP_OP_MAP[$op_name]}" ] && continue + LOG "[INFO] Load op: \"${op_name}\"." + CHANGE_OP_MAP[${op_name}]="$change_file" + fi + done +} + +# Load ops that will run benchmark test +function load_BENCHMARK_OP_MAP { + local line op_name api_name + source benchmark/ci/scripts/op_benchmark.config + for line in $(cat api_info.txt) + do + api_name=${line%%,*} + if [ -n "${BENCHMARK_APINAME_OP_MAP[$api_name]}" ] + then + op_name=${BENCHMARK_APINAME_OP_MAP[$api_name]} + else + op_name=$api_name + fi + if [ -n "${CHANGE_OP_MAP[$op_name]}" ] + then + LOG "[INFO] Load benchmark settings with op \"${op_name}\"." + BENCHMARK_OP_MAP[$op_name]=$line + fi + done +} + +# compile and install paddlepaddle +function compile_install_paddlepaddle { + LOG "[INFO] Compiling install package ..." + export WITH_GPU=ON + export WITH_AVX=ON + export WITH_MKL=ON + export RUN_TEST=OFF + export WITH_PYTHON=ON + export WITH_TESTING=OFF + export BUILD_TYPE=Release + export CUDA_ARCH_NAME=${CUDA_ARCH_NAME:-Auto} + export WITH_DISTRIBUTE=OFF + export CMAKE_BUILD_TYPE=Release + [ -d build ] && rm -rf build + bash paddle/scripts/paddle_build.sh build_only $(nproc) + [ $? -ne 0 ] && LOG "[FATAL] compile fail." && exit 7 + LOG "[INFO] Build fineshed" + mkdir -p build_whl/${branch_name} && cp build/python/dist/paddlepaddle_gpu-0.0.0-cp37-cp37m-linux_x86_64.whl build_whl/${branch_name}/ +} + +function build_whl { + for branch_name in "develop" "test" + do + git checkout ${branch_name} + [ $? -ne 0 ] && LOG "[FATAL] Missing branch ${branch_name}." && exit 7 + LOG "[INFO] Now branch name is ${branch_name}." + compile_install_paddlepaddle + done +} + +# run op benchmark test +function run_op_benchmark_test { + [ ${#BENCHMARK_OP_MAP[*]} -eq 0 ] && return + local logs_dir op_name branch_name api_info_file + [ -z "$VISIBLE_DEVICES" ] && export VISIBLE_DEVICES=0 + [ "$BENCHMARK_PRINT_FAIL_LOG" != "1" ] && export BENCHMARK_PRINT_FAIL_LOG=1 + api_info_file="$(pwd)/api_info.txt" + [ -f "$api_info_file" ] && rm -f $api_info_file + for api_info in ${BENCHMARK_OP_MAP[*]} + do + echo "$api_info" >> $api_info_file + done + # install tensorflow for testing accuary + pip install tensorflow==2.3.0 tensorflow-probability + for branch_name in "develop" "test" + do + LOG "[INFO] Uninstall Paddle ..." + pip uninstall -y paddlepaddle paddlepaddle_gpu + LOG "[INFO] Install Paddle ..." + pip install build_whl/${branch_name}/paddlepaddle_gpu-0.0.0-cp37-cp37m-linux_x86_64.whl + logs_dir="$(pwd)/logs-${branch_name}" + [ -d $logs_dir ] && rm -rf $logs_dir/* || mkdir -p $logs_dir + pushd benchmark/api > /dev/null + bash deploy/main_control.sh tests_v2 \ + tests_v2/configs \ + $logs_dir \ + $VISIBLE_DEVICES \ + "gpu" \ + "both" \ + $api_info_file \ + "paddle" + popd > /dev/null + done +} + +# check benchmark result +function check_op_benchmark_result { + local logs_dir api_info_file check_status_code + # default 3 times + [ -z "${RETRY_TIMES}" ] && RETRY_TIMES=3 + logs_dir=$(pwd)/logs-test_pr + api_info_file=$(pwd)/api_info.txt + for retry_time in $(seq 0 ${RETRY_TIMES}) + do + if [ $retry_time -gt 0 ]; then + # run op benchmark speed test + # there is no need to recompile and install paddle + LOG "[INFO] retry ${retry_time} times ..." + pushd benchmark/api > /dev/null + bash deploy/main_control.sh tests_v2 \ + tests_v2/configs \ + ${logs_dir} \ + $VISIBLE_DEVICES \ + "gpu" \ + "speed" \ + ${api_info_file} \ + "paddle" + popd > /dev/null + fi + # check current result and update the file to benchmark test + python ${PADDLE_ROOT}/tools/check_op_benchmark_result.py \ + --develop_logs_dir $(pwd)/logs-develop \ + --pr_logs_dir $(pwd)/logs-test_pr \ + --api_info_file ${api_info_file} + check_status_code=$? + # TODO(Avin0323): retry only if the performance check fails + [ $check_status_code -eq 0 ] && break + done + return $check_status_code +} + +# diff benchmakr result and miss op +function summary_problems { + local op_name exit_code + exit_code=0 + if [ ${#BENCHMARK_OP_MAP[*]} -ne 0 ] + then + check_op_benchmark_result + exit_code=$? + fi + for op_name in ${!CHANGE_OP_MAP[@]} + do + if [ -z "${BENCHMARK_OP_MAP[$op_name]}" ] + then + exit_code=8 + LOG "[ERROR] Missing test script of \"${op_name}\"(${CHANGE_OP_MAP[$op_name]}) in benchmark." + fi + done + if [ $exit_code -ne 0 ]; then + LOG "[INFO] See https://github.com/PaddlePaddle/Paddle/wiki/PR-CI-OP-benchmark-Manual for details." + LOG "[INFO] Or you can apply for one RD (Avin0323(Recommend), Xreki, luotao1) approval to pass this PR." + exit $exit_code + fi +} + + +function cpu_op_benchmark { + LOG "[INFO] Start run op benchmark cpu test ..." + load_CHANGE_OP_FILES + build_whl + LOG "[INFO] Op benchmark run success and no error!" + exit 0 +} + + +function gpu_op_benchmark { + LOG "[INFO] Start run op benchmark gpu test ..." + load_CHANGE_OP_FILES + prepare_benchmark_environment + load_CHANGE_OP_MAP + load_BENCHMARK_OP_MAP + run_op_benchmark_test + summary_problems + LOG "[INFO] Op benchmark run success and no error!" + exit 0 +} + + +case $1 in + cpu_op_benchmark) + cpu_op_benchmark + ;; + gpu_op_benchmark) + gpu_op_benchmark + ;; + *) + cpu_op_benchmark + gpu_op_benchmark + ;; +esac -- GitLab