Add coverage tools (#21975)

Add coverage data processing tools.

Add coverage tools (#21975)
Add coverage data processing tools.
7ba7acd1 · Liu Xudong · Tao Luo · 6ea38091 · 7ba7acd1 · 7ba7acd1
9 changed file
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -596,6 +596,11 @@ function assert_api_spec_approvals() {
 }
+function check_coverage() {
+    /bin/bash ${PADDLE_ROOT}/tools/coverage/paddle_coverage.sh
+}
 function single_test() {
    TEST_NAME=$1
    if [ -z "${TEST_NAME}" ]; then
@@ -1172,6 +1177,7 @@ function main() {
        build ${parallel_number}
        enable_unused_var_check
        parallel_test
+        check_coverage
        check_change_of_unittest ${PYTHON_ABI:-""}
        ;;
      cicheck_brpc)

--- a/tools/coverage/coverage_diff.py
+++ b/tools/coverage/coverage_diff.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+usage: coverage_diff.py info_file diff_file > > coverage-diff.info
+"""
+import sys
+def get_diff_file_lines(diff_file):
+    """
+    Args:
+        diff_file (str): File to get modified lines.  
+    Returns:
+        dict: The diff lines of files.
+    """
+    diff_file_lines = {}
+    current_file = None
+    current_line = -1
+    with open(diff_file) as diff_file:
+        for line in diff_file:
+            line = line.strip()
+            if line.startswith('+++ '):
+                current_file = line.lstrip('+++ ')
+                diff_file_lines[current_file] = []
+                continue
+            elif line.startswith('@@ '):
+                current_line = line.split()[2]
+                current_line = current_line.lstrip('+').split(',')[0]
+                current_line = int(current_line)
+                continue
+            elif line.startswith('-'):
+                continue
+            elif line.startswith('+'):
+                diff_file_lines[current_file].append(current_line)
+            current_line += 1
+    return diff_file_lines
+def get_info_file_lines(info_file, diff_file):
+    """
+    Args:
+        info_file (str): File generated by lcov.
+        diff_file (str): File to get modified lines.  
+    Returns:
+        None
+    """
+    diff_file_lines = get_diff_file_lines(diff_file)
+    current_lines = []
+    current_lf = 0
+    current_lh = 0
+    with open(info_file) as info_file:
+        for line in info_file:
+            line = line.strip()
+            if line.startswith('SF:'):
+                current_file = line.lstrip('SF:')
+                if current_file.startswith('/paddle/'):
+                    current_file = current_file[len('/paddle/'):]
+                current_lines = diff_file_lines.get(current_file, [])
+            elif line.startswith('DA:'):
+                da = line.lstrip('DA:').split(',')
+                if int(da[0]) in current_lines:
+                    current_lf += 1
+                    if not line.endswith(',0'):
+                        current_lh += 1
+                    print(line)
+                continue
+            elif line.startswith('LF:'):
+                print 'LF:{}'.format(current_lf)
+                continue
+            elif line.startswith('LH:'):
+                print 'LH:{}'.format(current_lh)
+                continue
+            print(line)
+if __name__ == '__main__':
+    if len(sys.argv) < 3:
+        exit()
+    info_file = sys.argv[1]
+    diff_file = sys.argv[2]
+    get_info_file_lines(info_file, diff_file)
--- a/tools/coverage/coverage_diff_list.py
+++ b/tools/coverage/coverage_diff_list.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+usage: coverage_diff_list.py list_file max_rate > coverage-diff-list-90.out
+"""
+import sys
+def filter_by(list_file, max_rate):
+    """
+    Args:
+        list_file (str): File of list.  
+        max_rate (float): Max rate.  
+    Returns:
+        tuple: File and coverage rate.
+    """
+    with open(list_file) as list_file:
+        for line in list_file:
+            line = line.strip()
+            split = line.split('|')
+            # name
+            name = split[0].strip()
+            if name.startswith('/paddle/'):
+                name = name[len('/paddle/'):]
+            # rate
+            try:
+                rate = split[1].split()[0].strip('%')
+                rate = float(rate)
+                if rate >= max_rate:
+                    continue
+            except:
+                pass
+            print name, rate
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        exit()
+    list_file = sys.argv[1]
+    max_rate = float(sys.argv[2])
+    filter_by(list_file, max_rate)
--- a/tools/coverage/coverage_lines.py
+++ b/tools/coverage/coverage_lines.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+usage: coverage_lines.py info_file expected
+"""
+import os
+import sys
+def get_lines(info_file):
+    """
+    Args:
+        info_file (str): File generated by lcov.
+    Returns:
+        float: Coverage rate.
+    """
+    hits = .0
+    total = .0
+    with open(info_file) as info_file:
+        for line in info_file:
+            line = line.strip()
+            if not line.startswith('DA:'):
+                continue
+            line = line[3:]
+            total += 1
+            if int(line.split(',')[1]) > 0:
+                hits += 1
+    if total == 0:
+        print 'no data found'
+        exit()
+    return hits / total
+if __name__ == '__main__':
+    if len(sys.argv) < 3:
+        exit()
+    info_file = sys.argv[1]
+    expected = float(sys.argv[2])
+    if not os.path.isfile(info_file):
+        print 'info file {} is not exists, ignored'.format(info_file)
+        exit()
+    actual = get_lines(info_file)
+    actual = round(actual, 3)
+    if actual < expected:
+        print 'expected >= {} %, actual {} %, failed'.format(
+            round(expected * 100, 1), round(actual * 100, 1))
+        exit(1)
+    print 'expected >= {} %, actual {} %, passed'.format(
+        round(expected * 100, 1), round(actual * 100, 1))
--- a/tools/coverage/cuda_clean.py
+++ b/tools/coverage/cuda_clean.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+""" usage: cuda_clean.py pull_id. """
+import os
+import sys
+from github import Github
+def get_pull(pull_id):
+    """
+    Args:
+        pull_id (int): Pull id.
+    Returns:
+        github.PullRequest.PullRequest: The pull request.
+    """
+    token = os.getenv('GITHUB_API_TOKEN',
+                      'e1f9c3cf211d5c20e65bd9ab7ec07983da284bca')
+    github = Github(token, timeout=60)
+    repo = github.get_repo('PaddlePaddle/Paddle')
+    pull = repo.get_pull(pull_id)
+    return pull
+def get_files(pull_id):
+    """
+    Args:
+        pull_id (int): Pull id.
+    Returns:
+       iterable: The generator will yield every filename.
+    """
+    pull = get_pull(pull_id)
+    for file in pull.get_files():
+        yield file.filename
+def clean(pull_id):
+    """
+    Args:
+        pull_id (int): Pull id.
+    Returns:
+        None.
+    """
+    changed = []
+    for file in get_files(pull_id):
+        #changed.append('/paddle/build/{}.gcda'.format(file))
+        changed.append(file)
+    for parent, dirs, files in os.walk('/paddle/build/'):
+        for gcda in files:
+            if gcda.endswith('.gcda'):
+                file_name = gcda.replace('.gcda', '')
+                dir_name_list = parent.replace('/paddle/build/', '').split('/')
+                dir_name_list = dir_name_list[:-2]
+                dir_name = '/'.join(dir_name_list)
+                src_name = dir_name + '/' + file_name
+                # remove no changed gcda
+                if src_name not in changed:
+                    unused_file = parent + '/' + gcda
+                    #print unused_file
+                    os.remove(gcda)
+                else:
+                    print(src_name)
+if __name__ == '__main__':
+    pull_id = sys.argv[1]
+    pull_id = int(pull_id)
+    clean(pull_id)
--- a/tools/coverage/gcda_clean.py
+++ b/tools/coverage/gcda_clean.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+""" usage: gcda_clean.py pull_id. """
+import os
+import sys
+from github import Github
+def get_pull(pull_id):
+    """Get pull.
+    Args:
+        pull_id (int): Pull id.
+    Returns:
+        github.PullRequest.PullRequest
+    """
+    token = os.getenv('GITHUB_API_TOKEN',
+                      'e1f9c3cf211d5c20e65bd9ab7ec07983da284bca')
+    github = Github(token, timeout=60)
+    repo = github.get_repo('PaddlePaddle/Paddle')
+    pull = repo.get_pull(pull_id)
+    return pull
+def get_files(pull_id):
+    """Get files.
+    Args:
+        pull_id (int): Pull id.
+    Returns:
+       iterable: The generator will yield every filename.
+    """
+    pull = get_pull(pull_id)
+    for file in pull.get_files():
+        yield file.filename
+def clean(pull_id):
+    """Clean.
+    Args:
+        pull_id (int): Pull id.
+    Returns:
+        None.
+    """
+    changed = []
+    for file in get_files(pull_id):
+        changed.append('/paddle/build/{}.gcda'.format(file))
+    for parent, dirs, files in os.walk('/paddle/build/'):
+        for gcda in files:
+            if gcda.endswith('.gcda'):
+                trimmed = parent
+                # convert paddle/fluid/imperative/CMakeFiles/layer.dir/layer.cc.gcda
+                # to paddle/fluid/imperative/layer.cc.gcda
+                if trimmed.endswith('.dir'):
+                    trimmed = os.path.dirname(trimmed)
+                if trimmed.endswith('CMakeFiles'):
+                    trimmed = os.path.dirname(trimmed)
+                # remove no changed gcda
+                if os.path.join(trimmed, gcda) not in changed:
+                    gcda = os.path.join(parent, gcda)
+                    os.remove(gcda)
+if __name__ == '__main__':
+    pull_id = sys.argv[1]
+    pull_id = int(pull_id)
+    clean(pull_id)
--- a/tools/coverage/paddle_coverage.sh
+++ b/tools/coverage/paddle_coverage.sh
+#!/usr/bin/env bash
+set -xe
+PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )"
+# install lcov
+curl -o /lcov-1.14.tar.gz -s https://paddle-ci.gz.bcebos.com/coverage%2Flcov-1.14.tar.gz
+tar -xf /lcov-1.14.tar.gz -C /
+cd /lcov-1.14
+make install
+# run paddle coverage
+cd /paddle/build
+python ${PADDLE_ROOT}/tools/coverage/gcda_clean.py ${GIT_PR_ID}
+lcov --capture -d ./ -o coverage.info --gcov-tool /usr/bin/gcov-4.8 --rc lcov_branch_coverage=0
+# full html report
+function gen_full_html_report() {
+    lcov --extract coverage.info \
+        '/paddle/paddle/fluid/framework/*' \
+        '/paddle/paddle/fluid/imperative/*' \
+        '/paddle/paddle/fluid/inference/*' \
+        '/paddle/paddle/fluid/memory/*' \
+        '/paddle/paddle/fluid/operators/*' \
+        '/paddle/paddle/fluid/recordio/*' \
+        '/paddle/paddle/fluid/string/*' \
+        -o coverage-full.tmp \
+        --rc lcov_branch_coverage=0
+    mv -f coverage-full.tmp coverage-full.info
+    lcov --remove coverage-full.info \
+        '/paddle/paddle/fluid/framework/*_test*' \
+        '/paddle/paddle/fluid/*/*test*' \
+        '/paddle/paddle/fluid/*/*/*test*' \
+        '/paddle/paddle/fluid/inference/tests/*' \
+        '/paddle/paddle/fluid/inference/api/demo_ci/*' \
+        -o coverage-full.tmp \
+        --rc lcov_branch_coverage=0
+    mv -f coverage-full.tmp coverage-full.info
+}
+gen_full_html_report || true
+# diff html report
+function gen_diff_html_report() {
+    if [ "${GIT_PR_ID}" != "" ]; then
+        COVERAGE_DIFF_PATTERN="`python ${PADDLE_ROOT}/tools/coverage/pull_request.py files ${GIT_PR_ID}`"
+        python ${PADDLE_ROOT}/tools/coverage/pull_request.py diff ${GIT_PR_ID} > git-diff.out
+    fi
+    lcov --extract coverage-full.info \
+        ${COVERAGE_DIFF_PATTERN} \
+        -o coverage-diff.info \
+        --rc lcov_branch_coverage=0
+    python ${PADDLE_ROOT}/tools/coverage/coverage_diff.py coverage-diff.info git-diff.out > coverage-diff.tmp
+    mv -f coverage-diff.tmp coverage-diff.info
+    genhtml -o coverage-diff -t 'Diff Coverage' --no-function-coverage --no-branch-coverage coverage-diff.info
+}
+gen_diff_html_report || true
+# python coverage
+export COVERAGE_FILE=/paddle/build/python-coverage.data
+set +x
+coverage combine `ls python-coverage.data.*`
+set -x
+coverage xml -i -o python-coverage.xml
+python ${PADDLE_ROOT}/tools/coverage/python_coverage.py > python-coverage.info
+# python full html report
+#
+function gen_python_full_html_report() {
+    lcov --extract python-coverage.info \
+        '/paddle/python/*' \
+        -o python-coverage-full.tmp \
+        --rc lcov_branch_coverage=0
+    mv -f python-coverage-full.tmp python-coverage-full.info
+    lcov --remove python-coverage-full.info \
+        '/*/tests/*' \
+        -o python-coverage-full.tmp \
+        --rc lcov_branch_coverage=0
+    mv -f python-coverage-full.tmp python-coverage-full.info
+}
+gen_python_full_html_report || true
+# python diff html report
+function gen_python_diff_html_report() {
+    if [ "${GIT_PR_ID}" != "" ]; then
+        COVERAGE_DIFF_PATTERN="`python ${PADDLE_ROOT}/tools/coverage/pull_request.py files ${GIT_PR_ID}`"
+        python ${PADDLE_ROOT}/tools/coverage/pull_request.py diff ${GIT_PR_ID} > python-git-diff.out
+    fi
+    lcov --extract python-coverage-full.info \
+        ${COVERAGE_DIFF_PATTERN} \
+        -o python-coverage-diff.info \
+        --rc lcov_branch_coverage=0
+    python ${PADDLE_ROOT}/tools/coverage/coverage_diff.py python-coverage-diff.info python-git-diff.out > python-coverage-diff.tmp
+    mv -f python-coverage-diff.tmp python-coverage-diff.info
+    genhtml -o python-coverage-diff \
+        -t 'Python Diff Coverage' \
+        --no-function-coverage \
+        --no-branch-coverage \
+        --ignore-errors source \
+        python-coverage-diff.info
+}
+gen_python_diff_html_report || true
+# assert coverage lines
+echo "Assert Diff Coverage"
+python ${PADDLE_ROOT}/tools/coverage/coverage_lines.py coverage-diff.info 0.9 || COVERAGE_LINES_ASSERT=1
+echo "Assert Python Diff Coverage"
+python ${PADDLE_ROOT}/tools/coverage/coverage_lines.py python-coverage-diff.info 0.9 || PYTHON_COVERAGE_LINES_ASSERT=1
+if [ "$COVERAGE_LINES_ASSERT" = "1" ] || [ "$PYTHON_COVERAGE_LINES_ASSERT" = "1" ]; then
+    exit 9
+fi
--- a/tools/coverage/pull_request.py
+++ b/tools/coverage/pull_request.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+usage: pull_request.py files pull_id
+       pull_request.py diff  pull_id
+"""
+import argparse
+import os
+from github import Github
+token = os.getenv('GITHUB_API_TOKEN',
+                  'e1f9c3cf211d5c20e65bd9ab7ec07983da284bca')
+def get_pull(pull_id):
+    """
+    Args:
+        pull_id (int): Pull id.
+    Returns:
+        github.PullRequest.PullRequest
+    """
+    github = Github(token, timeout=60)
+    repo = github.get_repo('PaddlePaddle/Paddle')
+    pull = repo.get_pull(pull_id)
+    return pull
+def get_files(args):
+    """
+    Args:
+        args (argparse.ArgumentParser().parse_args()): Arguments. 
+    Returns:
+        None.
+    """
+    pull = get_pull(args.pull_id)
+    for file in pull.get_files():
+        print '/paddle/{}'.format(file.filename)
+def diff(args):
+    """
+    Args:
+        args (argparse.ArgumentParser().parse_args()): Arguments. 
+    Returns:
+        None.
+    """
+    pull = get_pull(args.pull_id)
+    for file in pull.get_files():
+        print '+++ {}'.format(file.filename)
+        print file.patch
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers()
+    files_parser = subparsers.add_parser('files')
+    files_parser.add_argument('pull_id', type=int)
+    files_parser.set_defaults(func=get_files)
+    diff_parser = subparsers.add_parser('diff')
+    diff_parser.add_argument('pull_id', type=int)
+    diff_parser.set_defaults(func=diff)
+    args = parser.parse_args()
+    args.func(args)
--- a/tools/coverage/python_coverage.py
+++ b/tools/coverage/python_coverage.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+usage: python_coverage.py > python-coverage.info
+"""
+from os import path
+from xml.etree import ElementTree
+tree = ElementTree.parse('python-coverage.xml')
+root = tree.getroot()
+sources = root.findall('sources/source')
+if len(sources) > 1:
+    exit(1)
+source = sources[0].text
+for clazz in root.findall('packages/package/classes/class'):
+    clazz_filename = clazz.attrib.get('filename')
+    clazz_filename = path.join(source, clazz_filename)
+    if clazz_filename.startswith('/paddle/build/python/'):
+        clazz_filename = '/paddle/python/' + clazz_filename[len(
+            '/paddle/build/python/'):]
+    if not path.exists(clazz_filename):
+        continue
+    print 'TN:'
+    print 'SF:{}'.format(clazz_filename)
+    branch_index = 0
+    for line in clazz.findall('lines/line'):
+        line_hits = line.attrib.get('hits')
+        line_number = line.attrib.get('number')
+        line_branch = line.attrib.get('branch')
+        line_condition_coverage = line.attrib.get('condition-coverage')
+        line_missing_branches = line.attrib.get('missing-branches')
+        if line_branch == 'true':
+            line_condition_coverage = line_condition_coverage.split()
+            line_condition_coverage = line_condition_coverage[1].strip('()')
+            line_condition_coverage = line_condition_coverage.split('/')
+            taken = line_condition_coverage[0]
+            taken = int(taken)
+            for _ in range(taken):
+                print 'BRDA:{},{},{},{}'.format(line_number, 0, branch_index,
+                                                line_hits)
+                branch_index += 1
+            if line_missing_branches:
+                for missing_branch in line_missing_branches.split(','):
+                    print 'BRDA:{},{},{},{}'.format(line_number, 0,
+                                                    branch_index, 0)
+                    branch_index += 1
+        print 'DA:{},{}'.format(line_number, line_hits)
+    print 'end_of_record'