Op-benchmark CI cpu and gpu (#34631)

* notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;test=op_benchmark * notest;test=op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;test=op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * fix * fix

Op-benchmark CI cpu and gpu (#34631)
* notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;pm-op-benchmark * notest;test=op_benchmark * notest;test=op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;test=op_benchmark * notest;op_benchmark * notest;op_benchmark * notest;op_benchmark * fix * fix
8fb17fc7 · tianshuo78520a · GitHub · 7316018d · 8fb17fc7
隐藏空白更改
内联并排

Showing with 314 addition and 0 deletion

tools/test_ci_op_benchmark.sh tools/test_ci_op_benchmark.sh +314 -0

未找到文件。
--- a/tools/test_ci_op_benchmark.sh
+++ b/tools/test_ci_op_benchmark.sh
+#!/bin/bash
+
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set +ex
+
+[ -z "$PADDLE_ROOT" ] && PADDLE_ROOT=$(cd $(dirname ${BASH_SOURCE[0]})/.. && pwd)
+
+# PR modify op source files
+CHANGE_OP_FILES=()
+
+# ops that will run benchmark test
+declare -A CHANGE_OP_MAP
+
+# ops that benchmark repo has
+declare -A BENCHMARK_OP_MAP
+
+# searched header files
+declare -A INCLUDE_SEARCH_MAP
+
+function LOG {
+  echo "[$0:${BASH_LINENO[0]}] $*" >&2
+}
+
+# Limit cu file directory
+function match_cu_file_directory {
+  local sub_dir cu_file_dir
+  cu_file_dir=$(dirname ${1})
+  for sub_dir in "" "/elementwise" "/reduce_ops"
+  do
+    [ "${cu_file_dir}" == "paddle/fluid/operators${sub_dir}" ] && return 0
+  done
+  return 1
+}
+
+# Load op files by header file
+function load_CHANGE_OP_FILES_by_header_file {
+  local change_file
+  for change_file in $(grep -rl "${1}" paddle/fluid/operators)
+  do
+    if [[ "$change_file" =~ "_op.cu" ]]
+    then
+      # match cu file directory limit
+      match_cu_file_directory $change_file || continue
+      LOG "[INFO] Found \"${1}\" include by \"${change_file}\"."
+      CHANGE_OP_FILES[${#CHANGE_OP_FILES[@]}]="$change_file"
+    elif [[ "$change_file" =~ ".h" ]]
+    then
+      [ -n "${INCLUDE_SEARCH_MAP[$change_file]}" ] && continue
+      LOG "[INFO] Found \"${1}\" include by \"${change_file}\", keep searching."
+      INCLUDE_SEARCH_MAP[$change_file]="searched"
+      load_CHANGE_OP_FILES_by_header_file $change_file
+    fi
+  done
+}
+
+# Load op files that PR changes
+function load_CHANGE_OP_FILES {
+  local sub_dir change_file
+  # TODO(Avin0323): Need to filter the files added by the new OP.
+  for change_file in $(git diff --name-only origin/develop)
+  do
+    # match directory limit
+    [[ "$change_file" =~ "paddle/fluid/operators/" ]] || continue
+    # match file name limit
+    if [[ "$change_file" =~ "_op.cu" ]]
+    then
+      # match cu file directory limit
+      match_cu_file_directory $change_file || continue
+      LOG "[INFO] Found \"${change_file}\" changed."
+      CHANGE_OP_FILES[${#CHANGE_OP_FILES[@]}]="$change_file"
+    elif [[ "$change_file" =~ ".h" ]]
+    then
+      LOG "[INFO] Found \"${change_file}\" changed, keep searching."
+      INCLUDE_SEARCH_MAP[${change_file}]="searched"
+      load_CHANGE_OP_FILES_by_header_file $change_file
+    fi
+  done
+  [ ${#CHANGE_OP_FILES[@]} -eq 0 ] && LOG "[INFO] No op to test, skip this ci." && \
+  echo "cpu_benchmark=ON" >${cfs_dir}/op_benchmark/${AGILE_PULL_ID}/${AGILE_REVISION}/pass.txt && \
+  exit 0
+}
+
+# Clone benchmark repo
+function prepare_benchmark_environment {
+  LOG "[INFO] Clone benchmark repo ..."
+  git clone https://github.com/PaddlePaddle/benchmark.git
+  [ $? -ne 0 ] && LOG "[FATAL] Clone benchmark repo fail." && exit -1
+  LOG "[INFO] Collect api info ..."
+  python benchmark/api/deploy/collect_api_info.py \
+      --test_module_name tests_v2                 \
+      --info_file api_info.txt >& 2
+  [ $? -ne 0 ] && LOG "[FATAL] Collect api info fail." && exit -1
+  [ ! -f benchmark/ci/scripts/op_benchmark.config ] && LOG "[FATAL] Missing op_benchmark.config!" && exit -1
+}
+
+# Load unique op name from CHANGE_OP_FILES
+function load_CHANGE_OP_MAP {
+  local op_name change_file change_file_name
+  source benchmark/ci/scripts/op_benchmark.config
+  for change_file in ${CHANGE_OP_FILES[@]}
+  do
+    change_file_name=${change_file#*paddle/fluid/operators/}
+    if [ -n "${PADDLE_FILENAME_OP_MAP[$change_file_name]}" ]
+    then
+      for op_name in ${PADDLE_FILENAME_OP_MAP[$change_file_name]}
+      do
+        LOG "[INFO] Load op: \"${op_name}\"."
+        CHANGE_OP_MAP[${op_name}]="$change_file"
+      done
+    else
+      op_name=${change_file_name##*/}
+      op_name=${op_name%_cudnn_op*}
+      op_name=${op_name%_op*}
+      [ -n "${SKIP_OP_MAP[$op_name]}" ] && continue
+      LOG "[INFO] Load op: \"${op_name}\"."
+      CHANGE_OP_MAP[${op_name}]="$change_file"
+    fi
+  done
+}
+
+# Load ops that will run benchmark test
+function load_BENCHMARK_OP_MAP {
+  local line op_name api_name
+  source benchmark/ci/scripts/op_benchmark.config
+  for line in $(cat api_info.txt)
+  do
+    api_name=${line%%,*}
+    if [ -n "${BENCHMARK_APINAME_OP_MAP[$api_name]}" ]
+    then
+      op_name=${BENCHMARK_APINAME_OP_MAP[$api_name]}
+    else
+      op_name=$api_name
+    fi
+    if [ -n "${CHANGE_OP_MAP[$op_name]}" ]
+    then
+      LOG "[INFO] Load benchmark settings with op \"${op_name}\"."
+      BENCHMARK_OP_MAP[$op_name]=$line
+    fi
+  done
+}
+
+# compile and install paddlepaddle
+function compile_install_paddlepaddle {
+  LOG "[INFO] Compiling install package ..."
+  export WITH_GPU=ON
+  export WITH_AVX=ON
+  export WITH_MKL=ON
+  export RUN_TEST=OFF
+  export WITH_PYTHON=ON
+  export WITH_TESTING=OFF
+  export BUILD_TYPE=Release
+  export CUDA_ARCH_NAME=${CUDA_ARCH_NAME:-Auto}
+  export WITH_DISTRIBUTE=OFF
+  export CMAKE_BUILD_TYPE=Release
+  [ -d build ] && rm -rf build
+  bash paddle/scripts/paddle_build.sh build_only $(nproc)
+  [ $? -ne 0 ] && LOG "[FATAL] compile fail." && exit 7
+  LOG "[INFO] Build fineshed"
+  mkdir -p build_whl/${branch_name} && cp build/python/dist/paddlepaddle_gpu-0.0.0-cp37-cp37m-linux_x86_64.whl build_whl/${branch_name}/
+}
+
+function build_whl {
+  for branch_name in "develop" "test"
+  do
+    git checkout ${branch_name}
+    [ $? -ne 0 ] && LOG "[FATAL] Missing branch ${branch_name}." && exit 7
+    LOG "[INFO] Now branch name is ${branch_name}."
+    compile_install_paddlepaddle
+  done
+}
+
+# run op benchmark test
+function run_op_benchmark_test {
+  [ ${#BENCHMARK_OP_MAP[*]} -eq 0 ] && return
+  local logs_dir op_name branch_name api_info_file
+  [ -z "$VISIBLE_DEVICES" ] && export VISIBLE_DEVICES=0
+  [ "$BENCHMARK_PRINT_FAIL_LOG" != "1" ] && export BENCHMARK_PRINT_FAIL_LOG=1
+  api_info_file="$(pwd)/api_info.txt"
+  [ -f "$api_info_file" ] && rm -f $api_info_file
+  for api_info in ${BENCHMARK_OP_MAP[*]}
+  do
+    echo "$api_info" >> $api_info_file
+  done
+  # install tensorflow for testing accuary
+  pip install tensorflow==2.3.0 tensorflow-probability
+  for branch_name in "develop" "test"
+  do
+    LOG "[INFO] Uninstall Paddle ..."
+    pip uninstall -y paddlepaddle paddlepaddle_gpu
+    LOG "[INFO] Install Paddle ..."
+    pip install build_whl/${branch_name}/paddlepaddle_gpu-0.0.0-cp37-cp37m-linux_x86_64.whl
+    logs_dir="$(pwd)/logs-${branch_name}"
+    [ -d $logs_dir ] && rm -rf $logs_dir/* || mkdir -p $logs_dir
+    pushd benchmark/api > /dev/null
+    bash deploy/main_control.sh tests_v2 \
+                                tests_v2/configs \
+                                $logs_dir \
+                                $VISIBLE_DEVICES \
+                                "gpu" \
+                                "both" \
+                                $api_info_file \
+                                "paddle"
+    popd > /dev/null
+  done
+}
+
+# check benchmark result
+function check_op_benchmark_result {
+  local logs_dir api_info_file check_status_code
+  # default 3 times
+  [ -z "${RETRY_TIMES}" ] && RETRY_TIMES=3
+  logs_dir=$(pwd)/logs-test_pr
+  api_info_file=$(pwd)/api_info.txt
+  for retry_time in $(seq 0 ${RETRY_TIMES})
+  do
+    if [ $retry_time -gt 0 ]; then
+      # run op benchmark speed test
+      # there is no need to recompile and install paddle
+      LOG "[INFO] retry ${retry_time} times ..."
+      pushd benchmark/api > /dev/null
+      bash deploy/main_control.sh tests_v2 \
+                                  tests_v2/configs \
+                                  ${logs_dir} \
+                                  $VISIBLE_DEVICES \
+                                  "gpu" \
+                                  "speed" \
+                                  ${api_info_file} \
+                                  "paddle"
+      popd > /dev/null
+    fi
+    # check current result and update the file to benchmark test
+    python ${PADDLE_ROOT}/tools/check_op_benchmark_result.py \
+        --develop_logs_dir $(pwd)/logs-develop \
+        --pr_logs_dir $(pwd)/logs-test_pr \
+        --api_info_file ${api_info_file}
+    check_status_code=$?
+    # TODO(Avin0323): retry only if the performance check fails
+    [ $check_status_code -eq 0 ] && break
+  done
+  return $check_status_code
+}
+
+# diff benchmakr result and miss op
+function summary_problems {
+  local op_name exit_code
+  exit_code=0
+  if [ ${#BENCHMARK_OP_MAP[*]} -ne 0 ]
+  then
+    check_op_benchmark_result
+    exit_code=$?
+  fi
+  for op_name in ${!CHANGE_OP_MAP[@]}
+  do
+    if [ -z "${BENCHMARK_OP_MAP[$op_name]}" ]
+    then
+      exit_code=8
+      LOG "[ERROR] Missing test script of \"${op_name}\"(${CHANGE_OP_MAP[$op_name]}) in benchmark."
+    fi
+  done
+  if [ $exit_code -ne 0 ]; then
+    LOG "[INFO] See https://github.com/PaddlePaddle/Paddle/wiki/PR-CI-OP-benchmark-Manual for details."
+    LOG "[INFO] Or you can apply for one RD (Avin0323(Recommend), Xreki, luotao1) approval to pass this PR."
+    exit $exit_code
+  fi
+}
+
+
+function cpu_op_benchmark {
+  LOG "[INFO] Start run op benchmark cpu test ..."
+  load_CHANGE_OP_FILES
+  build_whl
+  LOG "[INFO] Op benchmark run success and no error!"
+  exit 0
+}
+
+
+function gpu_op_benchmark {
+  LOG "[INFO] Start run op benchmark gpu test ..."
+  load_CHANGE_OP_FILES
+  prepare_benchmark_environment
+  load_CHANGE_OP_MAP
+  load_BENCHMARK_OP_MAP
+  run_op_benchmark_test
+  summary_problems
+  LOG "[INFO] Op benchmark run success and no error!"
+  exit 0
+}
+
+
+case $1 in
+  cpu_op_benchmark)
+    cpu_op_benchmark
+  ;;
+  gpu_op_benchmark)
+    gpu_op_benchmark
+  ;;
+  *)
+    cpu_op_benchmark
+    gpu_op_benchmark
+  ;;
+esac