get_pr_ut.py 19.4 KB
Newer Older
C
chalsliu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" For the PR that only modified the unit test, get cases in pull request. """

import json
17 18
import os
import platform
19
import re
20
import ssl
21
import subprocess
22
import sys
23
import time
24
import urllib.request
25 26

import requests
C
chalsliu 已提交
27 28 29
from github import Github

PADDLE_ROOT = os.getenv('PADDLE_ROOT', '/paddle/')
C
chalsliu 已提交
30 31
PADDLE_ROOT += '/'
PADDLE_ROOT = PADDLE_ROOT.replace('//', '/')
32
ssl._create_default_https_context = ssl._create_unverified_context
C
chalsliu 已提交
33 34


35
class PRChecker:
36
    """PR Checker."""
C
chalsliu 已提交
37 38

    def __init__(self):
39
        self.github = Github(os.getenv('GITHUB_API_TOKEN'), timeout=60)
C
chalsliu 已提交
40
        self.repo = self.github.get_repo('PaddlePaddle/Paddle')
41
        self.py_prog_oneline = re.compile(r'\d+\|\s*#.*')
42 43
        self.py_prog_multiline_a = re.compile('"""(.*?)"""', re.DOTALL)
        self.py_prog_multiline_b = re.compile("'''(.*?)'''", re.DOTALL)
44 45 46
        self.cc_prog_online = re.compile(r'\d+\|\s*//.*')
        self.cc_prog_multiline = re.compile(r'\d+\|\s*/\*.*?\*/', re.DOTALL)
        self.lineno_prog = re.compile(r'@@ \-\d+,\d+ \+(\d+),(\d+) @@')
C
chalsliu 已提交
47
        self.pr = None
48
        self.suffix = ''
C
chalsliu 已提交
49
        self.full_case = False
C
chalsliu 已提交
50 51

    def init(self):
52
        """Get pull request."""
C
chalsliu 已提交
53 54
        pr_id = os.getenv('GIT_PR_ID')
        if not pr_id:
55
            print('PREC No PR ID')
56
            sys.exit(0)
57 58 59
        suffix = os.getenv('PREC_SUFFIX')
        if suffix:
            self.suffix = suffix
C
chalsliu 已提交
60
        self.pr = self.repo.get_pull(int(pr_id))
C
chalsliu 已提交
61 62 63
        last_commit = None
        ix = 0
        while True:
Y
YUNSHEN XIE 已提交
64 65 66
            try:
                commits = self.pr.get_commits().get_page(ix)
                if len(commits) == 0:
67
                    raise ValueError(f"no commit found in {ix} page")
Y
YUNSHEN XIE 已提交
68 69
                last_commit = commits[-1].commit
            except Exception as e:
C
chalsliu 已提交
70
                break
Y
YUNSHEN XIE 已提交
71 72
            else:
                ix = ix + 1
73 74
        if last_commit.message.find('test=allcase') != -1:
            print('PREC test=allcase is set')
C
chalsliu 已提交
75
            self.full_case = True
C
chalsliu 已提交
76

77
    # todo: exception
78 79 80 81 82 83 84
    def __wget_with_retry(self, url):
        ix = 1
        proxy = '--no-proxy'
        while ix < 6:
            if ix // 2 == 0:
                proxy = ''
            else:
85 86 87 88
                if platform.system() == 'Windows':
                    proxy = '-Y off'
                else:
                    proxy = '--no-proxy'
89
            code = subprocess.call(
90
                f'wget -q {proxy} --no-check-certificate {url}',
91 92
                shell=True,
            )
93 94 95
            if code == 0:
                return True
            print(
96 97 98 99
                'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'.format(
                    url, ix, ix * 10, proxy
                )
            )
100 101 102 103
            time.sleep(ix * 10)
            ix += 1
        return False

104 105 106 107 108 109 110 111 112
    def __urlretrieve(self, url, filename):
        ix = 1
        with_proxy = urllib.request.getproxies()
        without_proxy = {'http': '', 'http': ''}
        while ix < 6:
            if ix // 2 == 0:
                cur_proxy = urllib.request.ProxyHandler(without_proxy)
            else:
                cur_proxy = urllib.request.ProxyHandler(with_proxy)
113 114 115
            opener = urllib.request.build_opener(
                cur_proxy, urllib.request.HTTPHandler
            )
116 117 118 119 120 121
            urllib.request.install_opener(opener)
            try:
                urllib.request.urlretrieve(url, filename)
            except Exception as e:
                print(e)
                print(
122 123 124 125
                    'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'.format(
                        url, ix, ix * 10, cur_proxy
                    )
                )
126 127 128 129 130 131 132 133
                continue
            else:
                return True
            time.sleep(ix * 10)
            ix += 1

        return False

C
chalsliu 已提交
134
    def get_pr_files(self):
135
        """Get files in pull request."""
C
chalsliu 已提交
136
        page = 0
Z
zhangchunle 已提交
137
        file_dict = {}
138
        file_count = 0
C
chalsliu 已提交
139 140 141 142 143
        while True:
            files = self.pr.get_files().get_page(page)
            if not files:
                break
            for f in files:
Z
zhangchunle 已提交
144
                file_dict[PADDLE_ROOT + f.filename] = f.status
145
                file_count += 1
146
            if file_count == 30:  # if pr file count = 31, nend to run all case
147
                break
C
chalsliu 已提交
148
            page += 1
Z
zhangchunle 已提交
149 150 151 152
        print("pr modify files: %s" % file_dict)
        return file_dict

    def get_is_white_file(self, filename):
153
        """judge is white file in pr's files."""
Z
zhangchunle 已提交
154
        isWhiteFile = False
155 156 157 158 159 160 161 162
        not_white_files = (
            PADDLE_ROOT + 'cmake/',
            PADDLE_ROOT + 'patches/',
            PADDLE_ROOT + 'tools/dockerfile/',
            PADDLE_ROOT + 'tools/windows/',
            PADDLE_ROOT + 'tools/test_runner.py',
            PADDLE_ROOT + 'tools/parallel_UT_rule.py',
        )
Z
zhangchunle 已提交
163 164
        if 'cmakelist' in filename.lower():
            isWhiteFile = False
165
        elif filename.startswith(not_white_files):
Z
zhangchunle 已提交
166 167 168 169
            isWhiteFile = False
        else:
            isWhiteFile = True
        return isWhiteFile
C
chalsliu 已提交
170

171 172 173 174 175
    def __get_comment_by_filetype(self, content, filetype):
        result = []
        if filetype == 'py':
            result = self.__get_comment_by_prog(content, self.py_prog_oneline)
            result.extend(
176 177
                self.__get_comment_by_prog(content, self.py_prog_multiline_a)
            )
178
            result.extend(
179 180
                self.__get_comment_by_prog(content, self.py_prog_multiline_b)
            )
181 182 183
        if filetype == 'cc':
            result = self.__get_comment_by_prog(content, self.cc_prog_oneline)
            result.extend(
184 185
                self.__get_comment_by_prog(content, self.cc_prog_multiline)
            )
186 187 188 189 190
        return result

    def __get_comment_by_prog(self, content, prog):
        result_list = prog.findall(content)
        if not result_list:
C
chalsliu 已提交
191 192
            return []
        result = []
193 194 195 196 197
        for u in result_list:
            result.extend(u.split('\n'))
        return result

    def get_comment_of_file(self, f):
198 199
        # content = self.repo.get_contents(f.replace(PADDLE_ROOT, ''), 'pull/').decoded_content
        # todo: get file from github
200
        with open(f, encoding="utf-8") as fd:
201 202 203 204
            lines = fd.readlines()
        lineno = 1
        inputs = ''
        for line in lines:
205 206
            # for line in content.split('\n'):
            # input += str(lineno) + '|' + line + '\n'
207 208 209 210 211 212 213 214
            inputs += str(lineno) + '|' + line
            lineno += 1
        fietype = ''
        if f.endswith('.h') or f.endswith('.cc') or f.endswith('.cu'):
            filetype = 'cc'
        if f.endswith('.py'):
            filetype = 'py'
        else:
C
chalsliu 已提交
215
            return []
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
        return self.__get_comment_by_filetype(inputs, filetype)

    def get_pr_diff_lines(self):
        file_to_diff_lines = {}
        r = requests.get(self.pr.diff_url)
        data = r.text
        data = data.split('\n')
        ix = 0
        while ix < len(data):
            if data[ix].startswith('+++'):
                if data[ix].rstrip('\r\n') == '+++ /dev/null':
                    ix += 1
                    continue
                filename = data[ix][6:]
                ix += 1
                while ix < len(data):
                    result = self.lineno_prog.match(data[ix])
                    if not result:
                        break
                    lineno = int(result.group(1))
                    length = int(result.group(2))
                    ix += 1
                    end = ix + length
                    while ix < end:
                        if data[ix][0] == '-':
                            end += 1
                        if data[ix][0] == '+':
                            line_list = file_to_diff_lines.get(filename)
244 245 246
                            line = '{}{}'.format(
                                lineno, data[ix].replace('+', '|', 1)
                            )
247 248 249
                            if line_list:
                                line_list.append(line)
                            else:
250 251 252
                                file_to_diff_lines[filename] = [
                                    line,
                                ]
253 254 255 256 257 258 259 260 261
                        if data[ix][0] != '-':
                            lineno += 1
                        ix += 1
            ix += 1
        return file_to_diff_lines

    def is_only_comment(self, f):
        file_to_diff_lines = self.get_pr_diff_lines()
        comment_lines = self.get_comment_of_file(f)
C
chalsliu 已提交
262 263 264
        diff_lines = file_to_diff_lines.get(f.replace(PADDLE_ROOT, '', 1))
        if not diff_lines:
            return False
265 266 267
        for l in diff_lines:
            if l not in comment_lines:
                return False
268
        print(f'PREC {f} is only comment')
269 270
        return True

Z
zhangchunle 已提交
271
    def get_all_count(self):
272
        p = subprocess.Popen(
273
            f"cd {PADDLE_ROOT}build && ctest -N",
274 275 276
            shell=True,
            stdout=subprocess.PIPE,
        )
Y
YUNSHEN XIE 已提交
277 278 279 280 281
        out, err = p.communicate()
        for line in out.splitlines():
            if 'Total Tests:' in str(line):
                all_counts = line.split()[-1]
        return int(all_counts)
Z
zhangchunle 已提交
282

R
risemeup1 已提交
283
    def file_is_unnit_test(self, unittest_path):
284
        # get all testcases by ctest-N
R
risemeup1 已提交
285 286 287 288 289 290
        all_ut_file = PADDLE_ROOT + 'build/all_ut_list'
        # all_ut_file = '%s/build/all_ut_file' % PADDLE_ROOT
        print("PADDLE_ROOT:", PADDLE_ROOT)
        print("all_ut_file path:", all_ut_file)
        build_path = PADDLE_ROOT + 'build/'
        print("build_path:", build_path)
R
risemeup1 已提交
291
        (unittest_directory, unittest_name) = os.path.split(unittest_path)
292
        # determine whether filename is in all_ut_case
293
        with open(all_ut_file, 'r') as f:
R
risemeup1 已提交
294 295 296 297
            all_unittests = f.readlines()
            for test in all_unittests:
                test = test.replace('\n', '').strip()
                if test == unittest_name.split(".")[0]:
298
                    return True
R
risemeup1 已提交
299
        return False
300

C
chalsliu 已提交
301
    def get_pr_ut(self):
302
        """Get unit tests in pull request."""
C
chalsliu 已提交
303 304
        if self.full_case:
            return ''
C
chalsliu 已提交
305
        check_added_ut = False
C
chalsliu 已提交
306 307
        ut_list = []
        file_ut_map = None
Z
zhangchunle 已提交
308

309
        ret = self.__urlretrieve(
310
            'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/ut_file_map.json',
311 312
            'ut_file_map.json',
        )
313 314
        if not ret:
            print('PREC download file_ut.json failed')
315
            sys.exit(1)
Z
zhangchunle 已提交
316

Z
zhangchunle 已提交
317
        with open('ut_file_map.json') as jsonfile:
C
chalsliu 已提交
318
            file_ut_map = json.load(jsonfile)
Z
zhangchunle 已提交
319 320 321

        current_system = platform.system()
        notHitMapFiles = []
Z
zhangchunle 已提交
322
        hitMapFiles = {}
Z
zhangchunle 已提交
323
        onlyCommentsFilesOrXpu = []
Z
zhangchunle 已提交
324 325 326
        filterFiles = []
        file_list = []
        file_dict = self.get_pr_files()
327
        if len(file_dict) == 30:  # if pr file count = 31, nend to run all case
328
            return ''
Z
zhangchunle 已提交
329
        for filename in file_dict:
Z
zhangchunle 已提交
330
            if filename.startswith(PADDLE_ROOT + 'python/'):
Z
zhangchunle 已提交
331
                file_list.append(filename)
Z
zhangchunle 已提交
332
            elif filename.startswith(PADDLE_ROOT + 'paddle/'):
J
jjyaoao 已提交
333
                if filename.startswith(PADDLE_ROOT + 'paddle/scripts'):
Z
zhangchunle 已提交
334
                    if filename.startswith(
335 336 337 338 339
                        (
                            PADDLE_ROOT + 'paddle/scripts/paddle_build.sh',
                            PADDLE_ROOT + 'paddle/scripts/paddle_build.bat',
                        )
                    ):
Z
zhangchunle 已提交
340 341 342
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
R
risemeup1 已提交
343
                elif (
Z
zhangbo9674 已提交
344 345 346
                    ('/xpu/' in filename.lower())
                    or ('/npu/' in filename.lower())
                    or ('/ipu/' in filename.lower())
R
risemeup1 已提交
347 348
                ):
                    filterFiles.append(filename)
Z
zhangchunle 已提交
349 350
                else:
                    file_list.append(filename)
R
risemeup1 已提交
351 352
            elif filename.startswith(PADDLE_ROOT + 'test/'):
                file_list.append(filename)
Z
zhangchunle 已提交
353
            else:
354
                if file_dict[filename] == 'added':
Z
zhangchunle 已提交
355 356
                    file_list.append(filename)
                else:
357
                    isWhiteFile = self.get_is_white_file(filename)
358
                    if not isWhiteFile:
359 360 361
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
Z
zhangchunle 已提交
362 363
        if len(file_list) == 0:
            ut_list.append('filterfiles_placeholder')
Z
zhangchunle 已提交
364
            ret = self.__urlretrieve(
365
                'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/prec_delta',
366 367
                'prec_delta',
            )
Z
zhangchunle 已提交
368 369 370 371 372 373
            if ret:
                with open('prec_delta') as delta:
                    for ut in delta:
                        ut_list.append(ut.rstrip('\r\n'))
            else:
                print('PREC download prec_delta failed')
374
                sys.exit(1)
Z
zhangchunle 已提交
375
            PRECISION_TEST_Cases_ratio = format(
376 377
                float(len(ut_list)) / float(self.get_all_count()), '.2f'
            )
Z
zhangchunle 已提交
378 379
            print("filterFiles: %s" % filterFiles)
            print("ipipe_log_param_PRECISION_TEST: true")
380 381 382 383 384 385 386
            print(
                "ipipe_log_param_PRECISION_TEST_Cases_count: %s" % len(ut_list)
            )
            print(
                "ipipe_log_param_PRECISION_TEST_Cases_ratio: %s"
                % PRECISION_TEST_Cases_ratio
            )
R
risemeup1 已提交
387 388 389 390
            print(
                "The unittests in prec delta is shown as following: %s"
                % ut_list
            )
Z
zhangchunle 已提交
391
            return '\n'.join(ut_list)
Z
zhangchunle 已提交
392 393
        else:
            for f in file_list:
394 395 396 397 398
                if (
                    current_system == "Darwin"
                    or current_system == "Windows"
                    or self.suffix == ".py3"
                ):
Z
zhangchunle 已提交
399 400 401 402 403 404 405
                    f_judge = f.replace(PADDLE_ROOT, '/paddle/', 1)
                    f_judge = f_judge.replace('//', '/')
                else:
                    f_judge = f
                if f_judge not in file_ut_map:
                    if f_judge.endswith('.md'):
                        ut_list.append('md_placeholder')
Z
zhangchunle 已提交
406
                        onlyCommentsFilesOrXpu.append(f_judge)
407
                    elif (
R
RedContritio 已提交
408
                        'test/xpu' in f_judge
409 410 411
                        or 'tests/unittests/npu' in f_judge
                        or 'op_npu.cc' in f_judge
                    ):
Z
zhangchunle 已提交
412 413
                        ut_list.append('xpu_npu_placeholder')
                        onlyCommentsFilesOrXpu.append(f_judge)
414
                    elif f_judge.endswith(('.h', '.cu', '.cc', '.py')):
415
                        # determine whether the new added file is a member of added_ut
416 417
                        if file_dict[f] in ['added']:
                            f_judge_in_added_ut = False
R
risemeup1 已提交
418 419 420 421 422 423 424 425 426 427 428
                            path = PADDLE_ROOT + 'added_ut'
                            print("PADDLE_ROOT:", PADDLE_ROOT)
                            print("adde_ut path:", path)
                            (unittest_directory, unittest_name) = os.path.split(
                                f_judge
                            )
                            with open(path, 'r') as f:
                                added_unittests = f.readlines()
                                for test in added_unittests:
                                    test = test.replace('\n', '').strip()
                                    if test == unittest_name.split(".")[0]:
429
                                        f_judge_in_added_ut = True
430
                            if f_judge_in_added_ut:
431 432
                                print(
                                    "Adding new unit tests not hit mapFiles: %s"
433 434
                                    % f_judge
                                )
435 436 437 438 439
                            else:
                                notHitMapFiles.append(f_judge)
                        elif file_dict[f] in ['removed']:
                            print("remove file not hit mapFiles: %s" % f_judge)
                        else:
Z
zhangchunle 已提交
440 441 442
                            if self.is_only_comment(f):
                                ut_list.append('comment_placeholder')
                                onlyCommentsFilesOrXpu.append(f_judge)
443
                            if self.file_is_unnit_test(f_judge):
R
risemeup1 已提交
444 445 446
                                ut_list.append(
                                    os.path.split(f_judge)[1].split(".")[0]
                                )
Z
zhangchunle 已提交
447 448 449
                            else:
                                notHitMapFiles.append(f_judge)
                    else:
450 451 452 453 454
                        notHitMapFiles.append(f_judge) if file_dict[
                            f
                        ] != 'removed' else print(
                            "remove file not hit mapFiles: %s" % f_judge
                        )
Z
zhangchunle 已提交
455 456
                else:
                    if file_dict[f] not in ['removed']:
Z
zhangchunle 已提交
457 458 459 460
                        if self.is_only_comment(f):
                            ut_list.append('comment_placeholder')
                            onlyCommentsFilesOrXpu.append(f_judge)
                        else:
Z
zhangchunle 已提交
461 462
                            hitMapFiles[f_judge] = len(file_ut_map[f_judge])
                            ut_list.extend(file_ut_map.get(f_judge))
463
                    else:
Z
zhangchunle 已提交
464
                        hitMapFiles[f_judge] = len(file_ut_map[f_judge])
Z
zhangchunle 已提交
465
                        ut_list.extend(file_ut_map.get(f_judge))
Z
zhangchunle 已提交
466

Z
zhangchunle 已提交
467 468 469 470
            ut_list = list(set(ut_list))
            if len(notHitMapFiles) != 0:
                print("ipipe_log_param_PRECISION_TEST: false")
                print("notHitMapFiles: %s" % notHitMapFiles)
Z
zhangchunle 已提交
471 472
                if len(filterFiles) != 0:
                    print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
473
                return ''
C
chalsliu 已提交
474
            else:
Z
zhangchunle 已提交
475 476
                if ut_list:
                    ret = self.__urlretrieve(
477
                        'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/prec_delta',
478 479
                        'prec_delta',
                    )
Z
zhangchunle 已提交
480 481 482
                    if ret:
                        with open('prec_delta') as delta:
                            for ut in delta:
R
risemeup1 已提交
483 484
                                if ut not in ut_list:
                                    ut_list.append(ut.rstrip('\r\n'))
Z
zhangchunle 已提交
485 486
                    else:
                        print('PREC download prec_delta failed')
487
                        sys.exit(1)
Z
zhangchunle 已提交
488
                    print("hitMapFiles: %s" % hitMapFiles)
Z
zhangchunle 已提交
489
                    print("ipipe_log_param_PRECISION_TEST: true")
490 491 492 493
                    print(
                        "ipipe_log_param_PRECISION_TEST_Cases_count: %s"
                        % len(ut_list)
                    )
Z
zhangchunle 已提交
494
                    PRECISION_TEST_Cases_ratio = format(
495 496 497 498 499 500
                        float(len(ut_list)) / float(self.get_all_count()), '.2f'
                    )
                    print(
                        "ipipe_log_param_PRECISION_TEST_Cases_ratio: %s"
                        % PRECISION_TEST_Cases_ratio
                    )
Z
zhangchunle 已提交
501 502
                    if len(filterFiles) != 0:
                        print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
503
                return '\n'.join(ut_list)
C
chalsliu 已提交
504 505 506 507 508


if __name__ == '__main__':
    pr_checker = PRChecker()
    pr_checker.init()
509 510
    with open('ut_list', 'w') as f:
        f.write(pr_checker.get_pr_ut())