diff --git a/tools/check_op_benchmark_result.py b/tools/check_op_benchmark_result.py index 413424bedf4d3d4e5962eac63b428b2bf989daba..43ba2fc097b0b1872b8134ced6d8b1b5fd3ff4a1 100644 --- a/tools/check_op_benchmark_result.py +++ b/tools/check_op_benchmark_result.py @@ -55,10 +55,47 @@ def load_benchmark_result_from_logs_dir(logs_dir): return dict(map(result_lambda, os.listdir(logs_dir))) -def compare_benchmark_result(develop_result, pr_result): - """Compare the differences between devlop and pr. +def check_speed_result(case_name, develop_data, pr_data, pr_result): + """Check speed differences between develop and pr. + """ + pr_gpu_time = pr_data.get("gpu_time") + develop_gpu_time = develop_data.get("gpu_time") + gpu_time_diff = (pr_gpu_time - develop_gpu_time) / develop_gpu_time + + pr_total_time = pr_data.get("total") + develop_total_time = develop_data.get("total") + total_time_diff = (pr_total_time - develop_total_time) / develop_total_time + + logging.info("------ OP: %s ------" % case_name) + logging.info("GPU time change: %.5f%% (develop: %.7f -> PR: %.7f)" % + (gpu_time_diff * 100, develop_gpu_time, pr_gpu_time)) + logging.info("Total time change: %.5f%% (develop: %.7f -> PR: %.7f)" % + (total_time_diff * 100, develop_total_time, pr_total_time)) + logging.info("backward: %s" % pr_result.get("backward")) + logging.info("parameters:") + for line in pr_result.get("parameters").strip().split("\n"): + logging.info("\t%s" % line) + + return gpu_time_diff > 0.05 + + +def check_accuracy_result(case_name, pr_result): + """Check accuracy result. + """ + logging.info("------ OP: %s ------" % case_name) + logging.info("Accuracy diff: %s" % pr_result.get("diff")) + logging.info("backward: %s" % pr_result.get("backward")) + logging.info("parameters:") + for line in pr_result.get("parameters").strip().split("\n"): + logging.info("\t%s" % line) + + return not pr_result.get("consistent") + + +def compare_benchmark_result(case_name, develop_result, pr_result, + check_results): + """Compare the differences between develop and pr. """ - status = True develop_speed = develop_result.get("speed") pr_speed = pr_result.get("speed") @@ -66,39 +103,27 @@ def compare_benchmark_result(develop_result, pr_result): pr_speed), "The types of comparison results need to be consistent." if isinstance(develop_speed, dict) and isinstance(pr_speed, dict): - pr_gpu_time = pr_speed.get("gpu_time") - develop_gpu_time = develop_speed.get("gpu_time") - gpu_time_diff = (pr_gpu_time - develop_gpu_time) / develop_gpu_time - - pr_total_time = pr_speed.get("total") - develop_total_time = develop_speed.get("total") - total_time_diff = ( - pr_total_time - develop_total_time) / develop_total_time - - if gpu_time_diff > 0.05: - status = False - - # TODO(Avin0323): Print all info for making relu of alart. - logging.info("------ OP: %s ------" % pr_result.get("name")) - logging.info("GPU time change: %.5f%% (develop: %.7f -> PR: %.7f)" % - (gpu_time_diff * 100, develop_gpu_time, pr_gpu_time)) - logging.info("Total time change: %.5f%% (develop: %.7f -> PR: %.7f)" % - (total_time_diff * 100, develop_total_time, pr_total_time)) - logging.info("backward: %s" % pr_result.get("backward")) - logging.info("parameters:") - for line in pr_result.get("parameters").strip().split("\n"): - logging.info("\t%s" % line) + if check_speed_result(case_name, develop_speed, pr_speed, pr_result): + check_results["speed"].append(case_name) else: - if not pr_result.get("consistent"): - status = False - logging.info("------ OP: %s ------" % pr_result.get("name")) - logging.info("Accaury diff: %s" % pr_result.get("diff")) - logging.info("backward: %s" % pr_result.get("backward")) - logging.info("parameters:") - for line in pr_result.get("parameters").strip().split("\n"): - logging.info("\t%s" % line) + if check_accuracy_result(case_name, pr_result): + check_results["accuracy"].append(case_name) - return status + +def summary_results(check_results): + """Summary results and return exit code. + """ + for case_name in check_results["speed"]: + logging.error("Check speed result with case \"%s\" failed." % case_name) + + for case_name in check_results["accuracy"]: + logging.error("Check accuracy result with case \"%s\" failed." % + case_name) + + if len(check_results["speed"]) or len(check_results["accuracy"]): + return 8 + else: + return 0 if __name__ == "__main__": @@ -121,7 +146,7 @@ if __name__ == "__main__": help="Specify the benchmark result directory of PR branch.") args = parser.parse_args() - exit_code = 0 + check_results = dict(accuracy=list(), speed=list()) develop_result_dict = load_benchmark_result_from_logs_dir( args.develop_logs_dir) @@ -132,7 +157,8 @@ if __name__ == "__main__": pr_result = parse_log_file(os.path.join(args.pr_logs_dir, log_file)) if develop_result is None or pr_result is None: continue - if not compare_benchmark_result(develop_result, pr_result): - exit_code = 8 + case_name = log_file.split("-")[0] + compare_benchmark_result(case_name, develop_result, pr_result, + check_results) - exit(exit_code) + exit(summary_results(check_results)) diff --git a/tools/test_op_benchmark.sh b/tools/test_op_benchmark.sh index afe697ba98db919b687ff0bd6ae518f8200f4c2b..0932e37879db8d70adb25151befd26b8e74a3944 100644 --- a/tools/test_op_benchmark.sh +++ b/tools/test_op_benchmark.sh @@ -27,6 +27,9 @@ declare -A CHANGE_OP_MAP # ops that benchmark repo has declare -A BENCHMARK_OP_MAP +# searched header files +declare -A INCLUDE_SEARCH_MAP + function LOG { echo "[$0:${BASH_LINENO[0]}] $*" >&2 } @@ -55,7 +58,9 @@ function load_CHANGE_OP_FILES_by_header_file { CHANGE_OP_FILES[${#CHANGE_OP_FILES[@]}]="$change_file" elif [[ "$change_file" =~ ".h" ]] then + [ -n "${INCLUDE_SEARCH_MAP[$change_file]}" ] && continue LOG "[INFO] Found \"${1}\" include by \"${change_file}\", keep searching." + INCLUDE_SEARCH_MAP[$change_file]="searched" load_CHANGE_OP_FILES_by_header_file $change_file fi done @@ -79,6 +84,7 @@ function load_CHANGE_OP_FILES { elif [[ "$change_file" =~ ".h" ]] then LOG "[INFO] Found \"${change_file}\" changed, keep searching." + INCLUDE_SEARCH_MAP[${change_file}]="searched" load_CHANGE_OP_FILES_by_header_file $change_file fi done @@ -218,10 +224,14 @@ function summary_problems { if [ -z "${BENCHMARK_OP_MAP[$op_name]}" ] then exit_code=8 - LOG "[WARNING] Missing test script of \"${op_name}\"(${CHANGE_OP_MAP[$op_name]}) in benchmark." + LOG "[ERROR] Missing test script of \"${op_name}\"(${CHANGE_OP_MAP[$op_name]}) in benchmark." fi done - [ $exit_code -ne 0 ] && exit $exit_code + if [ $exit_code -ne 0 ]; then + LOG "[INFO] See https://github.com/PaddlePaddle/Paddle/wiki/PR-CI-OP-benchmark-Manual for details." + LOG "[INFO] Or you can apply for one RD (GaoWei8(Recommend), Xreki, luotao1) approval to pass this PR." + exit $exit_code + fi } function main {