未验证 提交 c91bb084 编写于 作者: L LoveAn 提交者: GitHub

Add op benchmark ci pipeline in Paddle repo (#28692)

上级 4b05a8be
...@@ -1609,6 +1609,10 @@ function example() { ...@@ -1609,6 +1609,10 @@ function example() {
fi fi
} }
function test_op_benchmark() {
bash ${PADDLE_ROOT}/tools/test_op_benchmark.sh
}
function summary_check_problems() { function summary_check_problems() {
set +x set +x
local check_style_code=$1 local check_style_code=$1
...@@ -1784,6 +1788,9 @@ function main() { ...@@ -1784,6 +1788,9 @@ function main() {
api_example) api_example)
example example
;; ;;
test_op_benchmark)
test_op_benchmark
;;
*) *)
print_usage print_usage
exit 1 exit 1
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import json
import logging
import argparse
def check_path_exists(path):
"""Assert whether file/directory exists.
"""
assert os.path.exists(path), "%s does not exist." % path
def parse_log_file(log_file):
"""Load one case result from log file.
"""
check_path_exists(log_file)
result = None
with open(log_file) as f:
for line in f.read().strip().split('\n')[::-1]:
try:
result = json.loads(line)
return result
except ValueError:
pass # do nothing
assert result != None, "Parse log file fail!"
def load_benchmark_result_from_logs_dir(logs_dir):
"""Load benchmark result from logs directory.
"""
check_path_exists(logs_dir)
log_file_path = lambda log_file: os.path.join(logs_dir, log_file)
result_lambda = lambda log_file: (log_file, parse_log_file(log_file_path(log_file)))
return dict(map(result_lambda, os.listdir(logs_dir)))
def compare_benchmark_result(develop_result, pr_result):
"""Compare the differences between devlop and pr.
"""
develop_speed = develop_result.get("speed")
pr_speed = pr_result.get("speed")
assert type(develop_speed) == type(
pr_speed), "The types of comparison results need to be consistent."
if isinstance(develop_speed, dict) and isinstance(pr_speed, dict):
pr_gpu_time = pr_speed.get("gpu_time")
develop_gpu_time = develop_speed.get("gpu_time")
gpu_time_diff = (pr_gpu_time - develop_gpu_time) / develop_gpu_time
pr_total_time = pr_speed.get("total")
develop_total_time = develop_speed.get("total")
total_time_diff = (
pr_total_time - develop_total_time) / develop_total_time
# TODO(Avin0323): Print all info for making relu of alart.
logging.info("------ OP: %s ------" % pr_result.get("name"))
logging.info("GPU time change: %.5f%% (develop: %.7f -> PR: %.7f)" %
(gpu_time_diff * 100, develop_gpu_time, pr_gpu_time))
logging.info("Total time change: %.5f%% (develop: %.7f -> PR: %.7f)" %
(total_time_diff * 100, develop_total_time, pr_total_time))
logging.info("backward: %s" % pr_result.get("backward"))
logging.info("parameters:")
for line in pr_result.get("parameters").strip().split("\n"):
logging.info("\t%s" % line)
else:
# TODO(Avin0323): Accuracy need to add.
pass
return True
if __name__ == "__main__":
"""Load result from log directories and compare the differences.
"""
logging.basicConfig(
level=logging.INFO,
format="[%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s")
parser = argparse.ArgumentParser()
parser.add_argument(
"--develop_logs_dir",
type=str,
required=True,
help="Specify the benchmark result directory of develop branch.")
parser.add_argument(
"--pr_logs_dir",
type=str,
required=True,
help="Specify the benchmark result directory of PR branch.")
args = parser.parse_args()
develop_result_dict = load_benchmark_result_from_logs_dir(
args.develop_logs_dir)
check_path_exists(args.pr_logs_dir)
for log_file in os.listdir(args.pr_logs_dir):
develop_result = develop_result_dict.get(log_file)
pr_result = parse_log_file(os.path.join(args.pr_logs_dir, log_file))
if develop_result is None or pr_result is None:
continue
compare_benchmark_result(develop_result, pr_result)
#!/bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set +ex
[ -z "$PADDLE_ROOT" ] && PADDLE_ROOT=$(cd $(dirname ${BASH_SOURCE[0]})/.. && pwd)
# Paddle repo file name -> op name
declare -A PADDLE_FILENAME_OP_MAP
PADDLE_FILENAME_OP_MAP=(
["arg_min_max_op_base.h"]="arg_min arg_max"
["arg_min_max_op_base.cu.h"]="arg_min arg_max"
["activation_op.cu"]="leaky_relu elu sqrt square pow exp abs log"
["activation_op.h"]="relu leaky_relu elu sqrt square pow exp abs log"
["activation_op.cc"]="relu leaky_relu elu sqrt square pow exp abs log"
)
# Benchmark repo name -> op name
declare -A BENCHMARK_APINAME_OP_MAP
BENCHMARK_APINAME_OP_MAP=(
["argmin"]="arg_min"
["argmax"]="arg_max"
)
# ops that will run benchmark test
declare -A CHANGE_OP_MAP
# ops that benchmark repo has
declare -A BENCHMARK_OP_MAP
# ops that benchmark repo missing
declare -A BENCHMARK_MISS_OP_MAP
function LOG {
echo "[$0:${BASH_LINENO[0]}] $*" >&2
}
# Load ops that will run benchmark test
function load_CHANGE_OP_MAP {
local op_name change_file change_file_name
for change_file in $(git diff --name-only origin/develop)
do
# match directory limit
[[ "$change_file" =~ "paddle/fluid/operators/" ]] || continue
LOG "[INFO] Found \"${change_file}\" changed."
change_file_name=${change_file#*paddle/fluid/operators/}
if [ -n "${PADDLE_FILENAME_OP_MAP[$change_file_name]}" ]
then
for op_name in ${PADDLE_FILENAME_OP_MAP[$change_file_name]}
do
LOG "[INFO] Load op: \"${op_name}\"."
CHANGE_OP_MAP[${op_name}]="dummy"
done
else
LOG "[INFO] Load op: \"${change_file_name%_op*}\"."
CHANGE_OP_MAP[${change_file_name%_op*}]="dummy"
fi
done
[ ${#CHANGE_OP_MAP[*]} -eq 0 ] && LOG "[INFO] No op to test, skip this ci." && exit 0
}
# Clone benchmark repo
function prepare_benchmark_environment {
LOG "[INFO] Clone benchmark repo ..."
git clone https://github.com/PaddlePaddle/benchmark.git
[ $? -ne 0 ] && LOG "[FATAL] Clone benchmark repo fail." && exit -1
LOG "[INFO] Collect api info ..."
python benchmark/api/deploy/collect_api_info.py \
--test_module_name tests_v2 \
--info_file api_info.txt >& 2
[ $? -ne 0 ] && LOG "[FATAL] Collect api info fail." && exit -1
}
# Load ops that will
function load_BENCHMARK_OP_MAP {
local line op_name api_name
prepare_benchmark_environment
for line in $(cat api_info.txt)
do
api_name=${line%%,*}
if [ -n "${BENCHMARK_APINAME_OP_MAP[$api_name]}" ]
then
op_name=${BENCHMARK_APINAME_OP_MAP[$api_name]}
else
op_name=$api_name
fi
if [ -n "${CHANGE_OP_MAP[$op_name]}" ]
then
LOG "[INFO] Load benchmark settings with op \"${op_name}\"."
BENCHMARK_OP_MAP[$op_name]=$line
fi
done
}
# compile and install paddlepaddle
function compile_install_paddlepaddle {
LOG "[DEBUG] Compiling install package ..."
export WITH_GPU=ON
export WITH_AVX=ON
export WITH_MKL=ON
export RUN_TEST=OFF
export WITH_PYTHON=ON
export WITH_TESTING=OFF
export BUILD_TYPE=Release
export WITH_DISTRIBUTE=OFF
export PYTHON_ABI=cp37-cp37m
export CMAKE_BUILD_TYPE=Release
[ -d build ] && rm -rf build
bash paddle/scripts/paddle_build.sh build
[ $? -ne 0 ] && LOG "[FATAL] compile fail." && exit 7
LOG "[DEBUG] Uninstall Paddle ..."
pip uninstall -y paddlepaddle paddlepaddle_gpu
LOG "[DEBUG] Install Paddle ..."
pip install build/python/dist/paddlepaddle_gpu-0.0.0-cp37-cp37m-linux_x86_64.whl
}
# run op benchmark test
function run_op_benchmark_test {
local logs_dir op_name branch_name api_info_file
api_info_file="$(pwd)/api_info.txt"
[ -f "$api_info_file" ] && rm -f $api_info_file
for api_info in ${BENCHMARK_OP_MAP[*]}
do
echo "$api_info" >> $api_info_file
done
LOG "[INFO] Uninstall "
for branch_name in "develop" "test_pr"
do
git checkout $branch_name
[ $? -ne 0 ] && LOG "[FATAL] Missing branh ${branch_name}." && exit 7
LOG "[INFO] Now branch name is ${branch_name}."
compile_install_paddlepaddle
logs_dir="$(pwd)/logs-${branch_name}"
[ -d $logs_dir ] && rm -rf $logs_dir/* || mkdir -p $logs_dir
[ -z "$VISIBLE_DEVICES" ] && export VISIBLE_DEVICES=0
pushd benchmark/api > /dev/null
bash deploy/main_control.sh tests_v2 \
tests_v2/configs \
$logs_dir \
$VISIBLE_DEVICES \
"gpu" \
"speed" \
$api_info_file \
"paddle"
popd > /dev/null
done
}
# diff benchmakr result and miss op
function summary_problems {
local op_name
python ${PADDLE_ROOT}/tools/check_op_benchmark_result.py \
--develop_logs_dir $(pwd)/logs-develop \
--pr_logs_dir $(pwd)/logs-test_pr
for op_name in ${!CHANGE_OP_MAP[@]}
do
if [ -z "${BENCHMARK_OP_MAP[$op_name]}" ]
then
LOG "[WARNING] Missing test script of \"${op_name}\" in benchmark."
fi
done
}
function main {
LOG "[INFO] Start run op benchmark test ..."
load_CHANGE_OP_MAP
load_BENCHMARK_OP_MAP
run_op_benchmark_test
summary_problems
LOG "[INFO] Op benchmark run success and no error!"
exit 0
}
main
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册