From 7ba7acd197e62f75e1a6a944a866e6b81e7bce6c Mon Sep 17 00:00:00 2001 From: Liu Xudong <45041955+chalsliu@users.noreply.github.com> Date: Wed, 8 Jan 2020 11:27:19 +0800 Subject: [PATCH] Add coverage tools (#21975) Add coverage data processing tools. --- paddle/scripts/paddle_build.sh | 6 ++ tools/coverage/coverage_diff.py | 112 ++++++++++++++++++++ tools/coverage/coverage_diff_list.py | 53 ++++++++++ tools/coverage/coverage_lines.py | 63 ++++++++++++ tools/coverage/cuda_clean.py | 81 +++++++++++++++ tools/coverage/gcda_clean.py | 83 +++++++++++++++ tools/coverage/paddle_coverage.sh | 147 +++++++++++++++++++++++++++ tools/coverage/pull_request.py | 76 ++++++++++++++ tools/coverage/python_coverage.py | 65 ++++++++++++ 9 files changed, 686 insertions(+) create mode 100644 tools/coverage/coverage_diff.py create mode 100644 tools/coverage/coverage_diff_list.py create mode 100644 tools/coverage/coverage_lines.py create mode 100644 tools/coverage/cuda_clean.py create mode 100644 tools/coverage/gcda_clean.py create mode 100644 tools/coverage/paddle_coverage.sh create mode 100644 tools/coverage/pull_request.py create mode 100644 tools/coverage/python_coverage.py diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index ccb138ec2d..a4b07eb5ad 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -596,6 +596,11 @@ function assert_api_spec_approvals() { } +function check_coverage() { + /bin/bash ${PADDLE_ROOT}/tools/coverage/paddle_coverage.sh +} + + function single_test() { TEST_NAME=$1 if [ -z "${TEST_NAME}" ]; then @@ -1172,6 +1177,7 @@ function main() { build ${parallel_number} enable_unused_var_check parallel_test + check_coverage check_change_of_unittest ${PYTHON_ABI:-""} ;; cicheck_brpc) diff --git a/tools/coverage/coverage_diff.py b/tools/coverage/coverage_diff.py new file mode 100644 index 0000000000..051348d358 --- /dev/null +++ b/tools/coverage/coverage_diff.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +usage: coverage_diff.py info_file diff_file > > coverage-diff.info +""" + +import sys + + +def get_diff_file_lines(diff_file): + """ + Args: + diff_file (str): File to get modified lines. + + Returns: + dict: The diff lines of files. + """ + diff_file_lines = {} + + current_file = None + current_line = -1 + + with open(diff_file) as diff_file: + for line in diff_file: + line = line.strip() + + if line.startswith('+++ '): + current_file = line.lstrip('+++ ') + + diff_file_lines[current_file] = [] + + continue + + elif line.startswith('@@ '): + current_line = line.split()[2] + current_line = current_line.lstrip('+').split(',')[0] + current_line = int(current_line) + + continue + + elif line.startswith('-'): + continue + + elif line.startswith('+'): + diff_file_lines[current_file].append(current_line) + + current_line += 1 + + return diff_file_lines + + +def get_info_file_lines(info_file, diff_file): + """ + Args: + info_file (str): File generated by lcov. + diff_file (str): File to get modified lines. + + Returns: + None + """ + diff_file_lines = get_diff_file_lines(diff_file) + + current_lines = [] + current_lf = 0 + current_lh = 0 + + with open(info_file) as info_file: + for line in info_file: + line = line.strip() + + if line.startswith('SF:'): + current_file = line.lstrip('SF:') + + if current_file.startswith('/paddle/'): + current_file = current_file[len('/paddle/'):] + + current_lines = diff_file_lines.get(current_file, []) + + elif line.startswith('DA:'): + da = line.lstrip('DA:').split(',') + + if int(da[0]) in current_lines: + current_lf += 1 + + if not line.endswith(',0'): + current_lh += 1 + + print(line) + + continue + + elif line.startswith('LF:'): + print 'LF:{}'.format(current_lf) + + continue + + elif line.startswith('LH:'): + print 'LH:{}'.format(current_lh) + + continue + + print(line) + + +if __name__ == '__main__': + if len(sys.argv) < 3: + exit() + + info_file = sys.argv[1] + diff_file = sys.argv[2] + + get_info_file_lines(info_file, diff_file) diff --git a/tools/coverage/coverage_diff_list.py b/tools/coverage/coverage_diff_list.py new file mode 100644 index 0000000000..57222da4d9 --- /dev/null +++ b/tools/coverage/coverage_diff_list.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +usage: coverage_diff_list.py list_file max_rate > coverage-diff-list-90.out +""" + +import sys + + +def filter_by(list_file, max_rate): + """ + Args: + list_file (str): File of list. + max_rate (float): Max rate. + + Returns: + tuple: File and coverage rate. + """ + with open(list_file) as list_file: + for line in list_file: + line = line.strip() + + split = line.split('|') + + # name + + name = split[0].strip() + + if name.startswith('/paddle/'): + name = name[len('/paddle/'):] + + # rate + + try: + rate = split[1].split()[0].strip('%') + rate = float(rate) + + if rate >= max_rate: + continue + except: + pass + + print name, rate + + +if __name__ == '__main__': + if len(sys.argv) < 2: + exit() + + list_file = sys.argv[1] + max_rate = float(sys.argv[2]) + + filter_by(list_file, max_rate) diff --git a/tools/coverage/coverage_lines.py b/tools/coverage/coverage_lines.py new file mode 100644 index 0000000000..eb846cc9f2 --- /dev/null +++ b/tools/coverage/coverage_lines.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +usage: coverage_lines.py info_file expected +""" +import os +import sys + + +def get_lines(info_file): + """ + Args: + info_file (str): File generated by lcov. + + Returns: + float: Coverage rate. + """ + hits = .0 + total = .0 + + with open(info_file) as info_file: + for line in info_file: + line = line.strip() + + if not line.startswith('DA:'): + continue + + line = line[3:] + + total += 1 + + if int(line.split(',')[1]) > 0: + hits += 1 + + if total == 0: + print 'no data found' + exit() + + return hits / total + + +if __name__ == '__main__': + if len(sys.argv) < 3: + exit() + + info_file = sys.argv[1] + expected = float(sys.argv[2]) + + if not os.path.isfile(info_file): + print 'info file {} is not exists, ignored'.format(info_file) + exit() + + actual = get_lines(info_file) + actual = round(actual, 3) + + if actual < expected: + print 'expected >= {} %, actual {} %, failed'.format( + round(expected * 100, 1), round(actual * 100, 1)) + + exit(1) + + print 'expected >= {} %, actual {} %, passed'.format( + round(expected * 100, 1), round(actual * 100, 1)) diff --git a/tools/coverage/cuda_clean.py b/tools/coverage/cuda_clean.py new file mode 100644 index 0000000000..daaf9e694f --- /dev/null +++ b/tools/coverage/cuda_clean.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" usage: cuda_clean.py pull_id. """ + +import os +import sys + +from github import Github + + +def get_pull(pull_id): + """ + Args: + pull_id (int): Pull id. + + Returns: + github.PullRequest.PullRequest: The pull request. + """ + token = os.getenv('GITHUB_API_TOKEN', + 'e1f9c3cf211d5c20e65bd9ab7ec07983da284bca') + github = Github(token, timeout=60) + repo = github.get_repo('PaddlePaddle/Paddle') + pull = repo.get_pull(pull_id) + + return pull + + +def get_files(pull_id): + """ + Args: + pull_id (int): Pull id. + + Returns: + iterable: The generator will yield every filename. + """ + + pull = get_pull(pull_id) + + for file in pull.get_files(): + yield file.filename + + +def clean(pull_id): + """ + Args: + pull_id (int): Pull id. + + Returns: + None. + """ + + changed = [] + + for file in get_files(pull_id): + #changed.append('/paddle/build/{}.gcda'.format(file)) + changed.append(file) + + for parent, dirs, files in os.walk('/paddle/build/'): + for gcda in files: + if gcda.endswith('.gcda'): + file_name = gcda.replace('.gcda', '') + dir_name_list = parent.replace('/paddle/build/', '').split('/') + dir_name_list = dir_name_list[:-2] + dir_name = '/'.join(dir_name_list) + src_name = dir_name + '/' + file_name + + # remove no changed gcda + + if src_name not in changed: + unused_file = parent + '/' + gcda + #print unused_file + os.remove(gcda) + else: + print(src_name) + + +if __name__ == '__main__': + pull_id = sys.argv[1] + pull_id = int(pull_id) + + clean(pull_id) diff --git a/tools/coverage/gcda_clean.py b/tools/coverage/gcda_clean.py new file mode 100644 index 0000000000..c222c448a3 --- /dev/null +++ b/tools/coverage/gcda_clean.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" usage: gcda_clean.py pull_id. """ + +import os +import sys + +from github import Github + + +def get_pull(pull_id): + """Get pull. + + Args: + pull_id (int): Pull id. + + Returns: + github.PullRequest.PullRequest + """ + token = os.getenv('GITHUB_API_TOKEN', + 'e1f9c3cf211d5c20e65bd9ab7ec07983da284bca') + github = Github(token, timeout=60) + repo = github.get_repo('PaddlePaddle/Paddle') + pull = repo.get_pull(pull_id) + + return pull + + +def get_files(pull_id): + """Get files. + + Args: + pull_id (int): Pull id. + + Returns: + iterable: The generator will yield every filename. + """ + pull = get_pull(pull_id) + + for file in pull.get_files(): + yield file.filename + + +def clean(pull_id): + """Clean. + + Args: + pull_id (int): Pull id. + + Returns: + None. + """ + changed = [] + + for file in get_files(pull_id): + changed.append('/paddle/build/{}.gcda'.format(file)) + + for parent, dirs, files in os.walk('/paddle/build/'): + for gcda in files: + if gcda.endswith('.gcda'): + trimmed = parent + + # convert paddle/fluid/imperative/CMakeFiles/layer.dir/layer.cc.gcda + # to paddle/fluid/imperative/layer.cc.gcda + + if trimmed.endswith('.dir'): + trimmed = os.path.dirname(trimmed) + + if trimmed.endswith('CMakeFiles'): + trimmed = os.path.dirname(trimmed) + + # remove no changed gcda + + if os.path.join(trimmed, gcda) not in changed: + gcda = os.path.join(parent, gcda) + os.remove(gcda) + + +if __name__ == '__main__': + pull_id = sys.argv[1] + pull_id = int(pull_id) + + clean(pull_id) diff --git a/tools/coverage/paddle_coverage.sh b/tools/coverage/paddle_coverage.sh new file mode 100644 index 0000000000..f18ed27b14 --- /dev/null +++ b/tools/coverage/paddle_coverage.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash + +set -xe + +PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )" + +# install lcov +curl -o /lcov-1.14.tar.gz -s https://paddle-ci.gz.bcebos.com/coverage%2Flcov-1.14.tar.gz +tar -xf /lcov-1.14.tar.gz -C / +cd /lcov-1.14 +make install + +# run paddle coverage + +cd /paddle/build + +python ${PADDLE_ROOT}/tools/coverage/gcda_clean.py ${GIT_PR_ID} + +lcov --capture -d ./ -o coverage.info --gcov-tool /usr/bin/gcov-4.8 --rc lcov_branch_coverage=0 + +# full html report + +function gen_full_html_report() { + lcov --extract coverage.info \ + '/paddle/paddle/fluid/framework/*' \ + '/paddle/paddle/fluid/imperative/*' \ + '/paddle/paddle/fluid/inference/*' \ + '/paddle/paddle/fluid/memory/*' \ + '/paddle/paddle/fluid/operators/*' \ + '/paddle/paddle/fluid/recordio/*' \ + '/paddle/paddle/fluid/string/*' \ + -o coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f coverage-full.tmp coverage-full.info + + lcov --remove coverage-full.info \ + '/paddle/paddle/fluid/framework/*_test*' \ + '/paddle/paddle/fluid/*/*test*' \ + '/paddle/paddle/fluid/*/*/*test*' \ + '/paddle/paddle/fluid/inference/tests/*' \ + '/paddle/paddle/fluid/inference/api/demo_ci/*' \ + -o coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f coverage-full.tmp coverage-full.info +} + +gen_full_html_report || true + +# diff html report + +function gen_diff_html_report() { + if [ "${GIT_PR_ID}" != "" ]; then + + COVERAGE_DIFF_PATTERN="`python ${PADDLE_ROOT}/tools/coverage/pull_request.py files ${GIT_PR_ID}`" + + python ${PADDLE_ROOT}/tools/coverage/pull_request.py diff ${GIT_PR_ID} > git-diff.out + fi + + lcov --extract coverage-full.info \ + ${COVERAGE_DIFF_PATTERN} \ + -o coverage-diff.info \ + --rc lcov_branch_coverage=0 + + python ${PADDLE_ROOT}/tools/coverage/coverage_diff.py coverage-diff.info git-diff.out > coverage-diff.tmp + + mv -f coverage-diff.tmp coverage-diff.info + + genhtml -o coverage-diff -t 'Diff Coverage' --no-function-coverage --no-branch-coverage coverage-diff.info +} + +gen_diff_html_report || true + +# python coverage + +export COVERAGE_FILE=/paddle/build/python-coverage.data + +set +x +coverage combine `ls python-coverage.data.*` +set -x + +coverage xml -i -o python-coverage.xml + +python ${PADDLE_ROOT}/tools/coverage/python_coverage.py > python-coverage.info + +# python full html report +# +function gen_python_full_html_report() { + lcov --extract python-coverage.info \ + '/paddle/python/*' \ + -o python-coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f python-coverage-full.tmp python-coverage-full.info + + lcov --remove python-coverage-full.info \ + '/*/tests/*' \ + -o python-coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f python-coverage-full.tmp python-coverage-full.info +} + +gen_python_full_html_report || true + +# python diff html report + +function gen_python_diff_html_report() { + if [ "${GIT_PR_ID}" != "" ]; then + COVERAGE_DIFF_PATTERN="`python ${PADDLE_ROOT}/tools/coverage/pull_request.py files ${GIT_PR_ID}`" + + python ${PADDLE_ROOT}/tools/coverage/pull_request.py diff ${GIT_PR_ID} > python-git-diff.out + fi + + lcov --extract python-coverage-full.info \ + ${COVERAGE_DIFF_PATTERN} \ + -o python-coverage-diff.info \ + --rc lcov_branch_coverage=0 + + python ${PADDLE_ROOT}/tools/coverage/coverage_diff.py python-coverage-diff.info python-git-diff.out > python-coverage-diff.tmp + + mv -f python-coverage-diff.tmp python-coverage-diff.info + + genhtml -o python-coverage-diff \ + -t 'Python Diff Coverage' \ + --no-function-coverage \ + --no-branch-coverage \ + --ignore-errors source \ + python-coverage-diff.info +} + +gen_python_diff_html_report || true + +# assert coverage lines + +echo "Assert Diff Coverage" + +python ${PADDLE_ROOT}/tools/coverage/coverage_lines.py coverage-diff.info 0.9 || COVERAGE_LINES_ASSERT=1 + +echo "Assert Python Diff Coverage" + +python ${PADDLE_ROOT}/tools/coverage/coverage_lines.py python-coverage-diff.info 0.9 || PYTHON_COVERAGE_LINES_ASSERT=1 + +if [ "$COVERAGE_LINES_ASSERT" = "1" ] || [ "$PYTHON_COVERAGE_LINES_ASSERT" = "1" ]; then + exit 9 +fi diff --git a/tools/coverage/pull_request.py b/tools/coverage/pull_request.py new file mode 100644 index 0000000000..7bd3515b86 --- /dev/null +++ b/tools/coverage/pull_request.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +usage: pull_request.py files pull_id + pull_request.py diff pull_id +""" + +import argparse +import os + +from github import Github + +token = os.getenv('GITHUB_API_TOKEN', + 'e1f9c3cf211d5c20e65bd9ab7ec07983da284bca') + + +def get_pull(pull_id): + """ + Args: + pull_id (int): Pull id. + + Returns: + github.PullRequest.PullRequest + """ + github = Github(token, timeout=60) + repo = github.get_repo('PaddlePaddle/Paddle') + pull = repo.get_pull(pull_id) + + return pull + + +def get_files(args): + """ + Args: + args (argparse.ArgumentParser().parse_args()): Arguments. + + Returns: + None. + """ + + pull = get_pull(args.pull_id) + + for file in pull.get_files(): + print '/paddle/{}'.format(file.filename) + + +def diff(args): + """ + Args: + args (argparse.ArgumentParser().parse_args()): Arguments. + + Returns: + None. + """ + + pull = get_pull(args.pull_id) + + for file in pull.get_files(): + print '+++ {}'.format(file.filename) + print file.patch + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers() + + files_parser = subparsers.add_parser('files') + files_parser.add_argument('pull_id', type=int) + files_parser.set_defaults(func=get_files) + + diff_parser = subparsers.add_parser('diff') + diff_parser.add_argument('pull_id', type=int) + diff_parser.set_defaults(func=diff) + + args = parser.parse_args() + args.func(args) diff --git a/tools/coverage/python_coverage.py b/tools/coverage/python_coverage.py new file mode 100644 index 0000000000..ba67e12249 --- /dev/null +++ b/tools/coverage/python_coverage.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +usage: python_coverage.py > python-coverage.info +""" + +from os import path +from xml.etree import ElementTree + +tree = ElementTree.parse('python-coverage.xml') +root = tree.getroot() + +sources = root.findall('sources/source') + +if len(sources) > 1: + exit(1) + +source = sources[0].text + +for clazz in root.findall('packages/package/classes/class'): + clazz_filename = clazz.attrib.get('filename') + clazz_filename = path.join(source, clazz_filename) + + if clazz_filename.startswith('/paddle/build/python/'): + clazz_filename = '/paddle/python/' + clazz_filename[len( + '/paddle/build/python/'):] + + if not path.exists(clazz_filename): + continue + + print 'TN:' + print 'SF:{}'.format(clazz_filename) + + branch_index = 0 + + for line in clazz.findall('lines/line'): + line_hits = line.attrib.get('hits') + line_number = line.attrib.get('number') + + line_branch = line.attrib.get('branch') + line_condition_coverage = line.attrib.get('condition-coverage') + line_missing_branches = line.attrib.get('missing-branches') + + if line_branch == 'true': + line_condition_coverage = line_condition_coverage.split() + line_condition_coverage = line_condition_coverage[1].strip('()') + line_condition_coverage = line_condition_coverage.split('/') + + taken = line_condition_coverage[0] + taken = int(taken) + + for _ in range(taken): + print 'BRDA:{},{},{},{}'.format(line_number, 0, branch_index, + line_hits) + branch_index += 1 + + if line_missing_branches: + for missing_branch in line_missing_branches.split(','): + print 'BRDA:{},{},{},{}'.format(line_number, 0, + branch_index, 0) + branch_index += 1 + + print 'DA:{},{}'.format(line_number, line_hits) + + print 'end_of_record' -- GitLab