get_pr_ut.py 19.6 KB
Newer Older
C
chalsliu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" For the PR that only modified the unit test, get cases in pull request. """

import json
17 18
import os
import platform
19
import re
20
import ssl
21
import subprocess
22
import sys
23
import time
24
import urllib.request
25 26

import requests
C
chalsliu 已提交
27 28 29
from github import Github

PADDLE_ROOT = os.getenv('PADDLE_ROOT', '/paddle/')
C
chalsliu 已提交
30 31
PADDLE_ROOT += '/'
PADDLE_ROOT = PADDLE_ROOT.replace('//', '/')
32
ssl._create_default_https_context = ssl._create_unverified_context
C
chalsliu 已提交
33 34


35
class PRChecker:
36
    """PR Checker."""
C
chalsliu 已提交
37 38

    def __init__(self):
39
        self.github = Github(os.getenv('GITHUB_API_TOKEN'), timeout=60)
C
chalsliu 已提交
40
        self.repo = self.github.get_repo('PaddlePaddle/Paddle')
41
        self.py_prog_oneline = re.compile(r'\d+\|\s*#.*')
42 43
        self.py_prog_multiline_a = re.compile('"""(.*?)"""', re.DOTALL)
        self.py_prog_multiline_b = re.compile("'''(.*?)'''", re.DOTALL)
44 45 46
        self.cc_prog_online = re.compile(r'\d+\|\s*//.*')
        self.cc_prog_multiline = re.compile(r'\d+\|\s*/\*.*?\*/', re.DOTALL)
        self.lineno_prog = re.compile(r'@@ \-\d+,\d+ \+(\d+),(\d+) @@')
C
chalsliu 已提交
47
        self.pr = None
48
        self.suffix = ''
C
chalsliu 已提交
49
        self.full_case = False
C
chalsliu 已提交
50 51

    def init(self):
52
        """Get pull request."""
C
chalsliu 已提交
53 54
        pr_id = os.getenv('GIT_PR_ID')
        if not pr_id:
55
            print('PREC No PR ID')
56
            sys.exit(0)
57 58 59
        suffix = os.getenv('PREC_SUFFIX')
        if suffix:
            self.suffix = suffix
C
chalsliu 已提交
60
        self.pr = self.repo.get_pull(int(pr_id))
C
chalsliu 已提交
61 62 63
        last_commit = None
        ix = 0
        while True:
Y
YUNSHEN XIE 已提交
64 65 66
            try:
                commits = self.pr.get_commits().get_page(ix)
                if len(commits) == 0:
67
                    raise ValueError(f"no commit found in {ix} page")
Y
YUNSHEN XIE 已提交
68 69
                last_commit = commits[-1].commit
            except Exception as e:
C
chalsliu 已提交
70
                break
Y
YUNSHEN XIE 已提交
71 72
            else:
                ix = ix + 1
73 74
        if last_commit.message.find('test=allcase') != -1:
            print('PREC test=allcase is set')
C
chalsliu 已提交
75
            self.full_case = True
C
chalsliu 已提交
76

77
    # todo: exception
78 79 80 81 82 83 84
    def __wget_with_retry(self, url):
        ix = 1
        proxy = '--no-proxy'
        while ix < 6:
            if ix // 2 == 0:
                proxy = ''
            else:
85 86 87 88
                if platform.system() == 'Windows':
                    proxy = '-Y off'
                else:
                    proxy = '--no-proxy'
89
            code = subprocess.call(
90
                f'wget -q {proxy} --no-check-certificate {url}',
91 92
                shell=True,
            )
93 94 95
            if code == 0:
                return True
            print(
96 97 98 99
                'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'.format(
                    url, ix, ix * 10, proxy
                )
            )
100 101 102 103
            time.sleep(ix * 10)
            ix += 1
        return False

104 105 106 107 108 109 110 111 112
    def __urlretrieve(self, url, filename):
        ix = 1
        with_proxy = urllib.request.getproxies()
        without_proxy = {'http': '', 'http': ''}
        while ix < 6:
            if ix // 2 == 0:
                cur_proxy = urllib.request.ProxyHandler(without_proxy)
            else:
                cur_proxy = urllib.request.ProxyHandler(with_proxy)
113 114 115
            opener = urllib.request.build_opener(
                cur_proxy, urllib.request.HTTPHandler
            )
116 117 118 119 120 121
            urllib.request.install_opener(opener)
            try:
                urllib.request.urlretrieve(url, filename)
            except Exception as e:
                print(e)
                print(
122 123 124 125
                    'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'.format(
                        url, ix, ix * 10, cur_proxy
                    )
                )
126 127 128 129 130 131 132 133
                continue
            else:
                return True
            time.sleep(ix * 10)
            ix += 1

        return False

C
chalsliu 已提交
134
    def get_pr_files(self):
135
        """Get files in pull request."""
C
chalsliu 已提交
136
        page = 0
Z
zhangchunle 已提交
137
        file_dict = {}
138
        file_count = 0
C
chalsliu 已提交
139 140 141 142 143
        while True:
            files = self.pr.get_files().get_page(page)
            if not files:
                break
            for f in files:
Z
zhangchunle 已提交
144
                file_dict[PADDLE_ROOT + f.filename] = f.status
145
                file_count += 1
146
            if file_count == 30:  # if pr file count = 31, nend to run all case
147
                break
C
chalsliu 已提交
148
            page += 1
Z
zhangchunle 已提交
149 150 151 152
        print("pr modify files: %s" % file_dict)
        return file_dict

    def get_is_white_file(self, filename):
153
        """judge is white file in pr's files."""
Z
zhangchunle 已提交
154
        isWhiteFile = False
155 156 157 158 159 160 161 162
        not_white_files = (
            PADDLE_ROOT + 'cmake/',
            PADDLE_ROOT + 'patches/',
            PADDLE_ROOT + 'tools/dockerfile/',
            PADDLE_ROOT + 'tools/windows/',
            PADDLE_ROOT + 'tools/test_runner.py',
            PADDLE_ROOT + 'tools/parallel_UT_rule.py',
        )
Z
zhangchunle 已提交
163 164
        if 'cmakelist' in filename.lower():
            isWhiteFile = False
165
        elif filename.startswith(not_white_files):
Z
zhangchunle 已提交
166 167 168 169
            isWhiteFile = False
        else:
            isWhiteFile = True
        return isWhiteFile
C
chalsliu 已提交
170

171 172 173 174 175
    def __get_comment_by_filetype(self, content, filetype):
        result = []
        if filetype == 'py':
            result = self.__get_comment_by_prog(content, self.py_prog_oneline)
            result.extend(
176 177
                self.__get_comment_by_prog(content, self.py_prog_multiline_a)
            )
178
            result.extend(
179 180
                self.__get_comment_by_prog(content, self.py_prog_multiline_b)
            )
181 182 183
        if filetype == 'cc':
            result = self.__get_comment_by_prog(content, self.cc_prog_oneline)
            result.extend(
184 185
                self.__get_comment_by_prog(content, self.cc_prog_multiline)
            )
186 187 188 189 190
        return result

    def __get_comment_by_prog(self, content, prog):
        result_list = prog.findall(content)
        if not result_list:
C
chalsliu 已提交
191 192
            return []
        result = []
193 194 195 196 197
        for u in result_list:
            result.extend(u.split('\n'))
        return result

    def get_comment_of_file(self, f):
198 199
        # content = self.repo.get_contents(f.replace(PADDLE_ROOT, ''), 'pull/').decoded_content
        # todo: get file from github
200
        with open(f, encoding="utf-8") as fd:
201 202 203 204
            lines = fd.readlines()
        lineno = 1
        inputs = ''
        for line in lines:
205 206
            # for line in content.split('\n'):
            # input += str(lineno) + '|' + line + '\n'
207 208 209 210 211 212 213 214
            inputs += str(lineno) + '|' + line
            lineno += 1
        fietype = ''
        if f.endswith('.h') or f.endswith('.cc') or f.endswith('.cu'):
            filetype = 'cc'
        if f.endswith('.py'):
            filetype = 'py'
        else:
C
chalsliu 已提交
215
            return []
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
        return self.__get_comment_by_filetype(inputs, filetype)

    def get_pr_diff_lines(self):
        file_to_diff_lines = {}
        r = requests.get(self.pr.diff_url)
        data = r.text
        data = data.split('\n')
        ix = 0
        while ix < len(data):
            if data[ix].startswith('+++'):
                if data[ix].rstrip('\r\n') == '+++ /dev/null':
                    ix += 1
                    continue
                filename = data[ix][6:]
                ix += 1
                while ix < len(data):
                    result = self.lineno_prog.match(data[ix])
                    if not result:
                        break
                    lineno = int(result.group(1))
                    length = int(result.group(2))
                    ix += 1
                    end = ix + length
                    while ix < end:
                        if data[ix][0] == '-':
                            end += 1
                        if data[ix][0] == '+':
                            line_list = file_to_diff_lines.get(filename)
244 245 246
                            line = '{}{}'.format(
                                lineno, data[ix].replace('+', '|', 1)
                            )
247 248 249
                            if line_list:
                                line_list.append(line)
                            else:
250 251 252
                                file_to_diff_lines[filename] = [
                                    line,
                                ]
253 254 255 256 257 258 259 260 261
                        if data[ix][0] != '-':
                            lineno += 1
                        ix += 1
            ix += 1
        return file_to_diff_lines

    def is_only_comment(self, f):
        file_to_diff_lines = self.get_pr_diff_lines()
        comment_lines = self.get_comment_of_file(f)
C
chalsliu 已提交
262 263 264
        diff_lines = file_to_diff_lines.get(f.replace(PADDLE_ROOT, '', 1))
        if not diff_lines:
            return False
265 266 267
        for l in diff_lines:
            if l not in comment_lines:
                return False
268
        print(f'PREC {f} is only comment')
269 270
        return True

Z
zhangchunle 已提交
271
    def get_all_count(self):
272
        p = subprocess.Popen(
273
            f"cd {PADDLE_ROOT}build && ctest -N",
274 275 276
            shell=True,
            stdout=subprocess.PIPE,
        )
Y
YUNSHEN XIE 已提交
277 278 279 280 281
        out, err = p.communicate()
        for line in out.splitlines():
            if 'Total Tests:' in str(line):
                all_counts = line.split()[-1]
        return int(all_counts)
Z
zhangchunle 已提交
282

R
risemeup1 已提交
283
    def file_is_unnit_test(self, unittest_path):
284
        # get all testcases by ctest-N
R
risemeup1 已提交
285 286 287 288 289 290
        all_ut_file = PADDLE_ROOT + 'build/all_ut_list'
        # all_ut_file = '%s/build/all_ut_file' % PADDLE_ROOT
        print("PADDLE_ROOT:", PADDLE_ROOT)
        print("all_ut_file path:", all_ut_file)
        build_path = PADDLE_ROOT + 'build/'
        print("build_path:", build_path)
R
risemeup1 已提交
291
        (unittest_directory, unittest_name) = os.path.split(unittest_path)
292
        # determine whether filename is in all_ut_case
293
        with open(all_ut_file, 'r') as f:
R
risemeup1 已提交
294 295 296 297
            all_unittests = f.readlines()
            for test in all_unittests:
                test = test.replace('\n', '').strip()
                if test == unittest_name.split(".")[0]:
298
                    return True
R
risemeup1 已提交
299
        return False
300

C
chalsliu 已提交
301
    def get_pr_ut(self):
302
        """Get unit tests in pull request."""
C
chalsliu 已提交
303 304
        if self.full_case:
            return ''
C
chalsliu 已提交
305
        check_added_ut = False
C
chalsliu 已提交
306 307
        ut_list = []
        file_ut_map = None
Z
zhangchunle 已提交
308

309
        ret = self.__urlretrieve(
310
            'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/ut_file_map.json',
311 312
            'ut_file_map.json',
        )
313 314
        if not ret:
            print('PREC download file_ut.json failed')
315
            sys.exit(1)
Z
zhangchunle 已提交
316

Z
zhangchunle 已提交
317
        with open('ut_file_map.json') as jsonfile:
C
chalsliu 已提交
318
            file_ut_map = json.load(jsonfile)
Z
zhangchunle 已提交
319 320 321

        current_system = platform.system()
        notHitMapFiles = []
Z
zhangchunle 已提交
322
        hitMapFiles = {}
Z
zhangchunle 已提交
323
        onlyCommentsFilesOrXpu = []
Z
zhangchunle 已提交
324 325 326
        filterFiles = []
        file_list = []
        file_dict = self.get_pr_files()
327
        if len(file_dict) == 30:  # if pr file count = 31, nend to run all case
328
            return ''
Z
zhangchunle 已提交
329
        for filename in file_dict:
Z
zhangchunle 已提交
330
            if filename.startswith(PADDLE_ROOT + 'python/'):
Z
zhangchunle 已提交
331
                file_list.append(filename)
Z
zhangchunle 已提交
332
            elif filename.startswith(PADDLE_ROOT + 'paddle/'):
333
                if filename.startswith(PADDLE_ROOT + 'paddle/infrt'):
Z
zhangchunle 已提交
334 335 336
                    filterFiles.append(filename)
                elif filename.startswith(PADDLE_ROOT + 'paddle/scripts'):
                    if filename.startswith(
337 338 339 340 341
                        (
                            PADDLE_ROOT + 'paddle/scripts/paddle_build.sh',
                            PADDLE_ROOT + 'paddle/scripts/paddle_build.bat',
                        )
                    ):
Z
zhangchunle 已提交
342 343 344
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
R
risemeup1 已提交
345
                elif (
Z
zhangbo9674 已提交
346 347 348 349
                    ('/xpu/' in filename.lower())
                    or ('/npu/' in filename.lower())
                    or ('/mlu/' in filename.lower())
                    or ('/ipu/' in filename.lower())
R
risemeup1 已提交
350 351
                ):
                    filterFiles.append(filename)
Z
zhangchunle 已提交
352 353
                else:
                    file_list.append(filename)
R
risemeup1 已提交
354 355
            elif filename.startswith(PADDLE_ROOT + 'test/'):
                file_list.append(filename)
Z
zhangchunle 已提交
356
            else:
357
                if file_dict[filename] == 'added':
Z
zhangchunle 已提交
358 359
                    file_list.append(filename)
                else:
360
                    isWhiteFile = self.get_is_white_file(filename)
361
                    if not isWhiteFile:
362 363 364
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
Z
zhangchunle 已提交
365 366
        if len(file_list) == 0:
            ut_list.append('filterfiles_placeholder')
Z
zhangchunle 已提交
367
            ret = self.__urlretrieve(
368
                'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/prec_delta',
369 370
                'prec_delta',
            )
Z
zhangchunle 已提交
371 372 373 374 375 376
            if ret:
                with open('prec_delta') as delta:
                    for ut in delta:
                        ut_list.append(ut.rstrip('\r\n'))
            else:
                print('PREC download prec_delta failed')
377
                sys.exit(1)
Z
zhangchunle 已提交
378
            PRECISION_TEST_Cases_ratio = format(
379 380
                float(len(ut_list)) / float(self.get_all_count()), '.2f'
            )
Z
zhangchunle 已提交
381 382
            print("filterFiles: %s" % filterFiles)
            print("ipipe_log_param_PRECISION_TEST: true")
383 384 385 386 387 388 389
            print(
                "ipipe_log_param_PRECISION_TEST_Cases_count: %s" % len(ut_list)
            )
            print(
                "ipipe_log_param_PRECISION_TEST_Cases_ratio: %s"
                % PRECISION_TEST_Cases_ratio
            )
R
risemeup1 已提交
390 391 392 393
            print(
                "The unittests in prec delta is shown as following: %s"
                % ut_list
            )
Z
zhangchunle 已提交
394
            return '\n'.join(ut_list)
Z
zhangchunle 已提交
395 396
        else:
            for f in file_list:
397 398 399 400 401
                if (
                    current_system == "Darwin"
                    or current_system == "Windows"
                    or self.suffix == ".py3"
                ):
Z
zhangchunle 已提交
402 403 404 405 406 407 408
                    f_judge = f.replace(PADDLE_ROOT, '/paddle/', 1)
                    f_judge = f_judge.replace('//', '/')
                else:
                    f_judge = f
                if f_judge not in file_ut_map:
                    if f_judge.endswith('.md'):
                        ut_list.append('md_placeholder')
Z
zhangchunle 已提交
409
                        onlyCommentsFilesOrXpu.append(f_judge)
410 411 412 413 414
                    elif (
                        'tests/unittests/xpu' in f_judge
                        or 'tests/unittests/npu' in f_judge
                        or 'op_npu.cc' in f_judge
                    ):
Z
zhangchunle 已提交
415 416
                        ut_list.append('xpu_npu_placeholder')
                        onlyCommentsFilesOrXpu.append(f_judge)
417
                    elif f_judge.endswith(('.h', '.cu', '.cc', '.py')):
418
                        # determine whether the new added file is a member of added_ut
419 420
                        if file_dict[f] in ['added']:
                            f_judge_in_added_ut = False
R
risemeup1 已提交
421 422 423 424 425 426 427 428 429 430 431
                            path = PADDLE_ROOT + 'added_ut'
                            print("PADDLE_ROOT:", PADDLE_ROOT)
                            print("adde_ut path:", path)
                            (unittest_directory, unittest_name) = os.path.split(
                                f_judge
                            )
                            with open(path, 'r') as f:
                                added_unittests = f.readlines()
                                for test in added_unittests:
                                    test = test.replace('\n', '').strip()
                                    if test == unittest_name.split(".")[0]:
432
                                        f_judge_in_added_ut = True
433
                            if f_judge_in_added_ut:
434 435
                                print(
                                    "Adding new unit tests not hit mapFiles: %s"
436 437
                                    % f_judge
                                )
438 439 440 441 442
                            else:
                                notHitMapFiles.append(f_judge)
                        elif file_dict[f] in ['removed']:
                            print("remove file not hit mapFiles: %s" % f_judge)
                        else:
Z
zhangchunle 已提交
443 444 445
                            if self.is_only_comment(f):
                                ut_list.append('comment_placeholder')
                                onlyCommentsFilesOrXpu.append(f_judge)
446
                            if self.file_is_unnit_test(f_judge):
R
risemeup1 已提交
447 448 449
                                ut_list.append(
                                    os.path.split(f_judge)[1].split(".")[0]
                                )
Z
zhangchunle 已提交
450 451 452
                            else:
                                notHitMapFiles.append(f_judge)
                    else:
453 454 455 456 457
                        notHitMapFiles.append(f_judge) if file_dict[
                            f
                        ] != 'removed' else print(
                            "remove file not hit mapFiles: %s" % f_judge
                        )
Z
zhangchunle 已提交
458 459
                else:
                    if file_dict[f] not in ['removed']:
Z
zhangchunle 已提交
460 461 462 463
                        if self.is_only_comment(f):
                            ut_list.append('comment_placeholder')
                            onlyCommentsFilesOrXpu.append(f_judge)
                        else:
Z
zhangchunle 已提交
464 465
                            hitMapFiles[f_judge] = len(file_ut_map[f_judge])
                            ut_list.extend(file_ut_map.get(f_judge))
466
                    else:
Z
zhangchunle 已提交
467
                        hitMapFiles[f_judge] = len(file_ut_map[f_judge])
Z
zhangchunle 已提交
468
                        ut_list.extend(file_ut_map.get(f_judge))
Z
zhangchunle 已提交
469

Z
zhangchunle 已提交
470 471 472 473
            ut_list = list(set(ut_list))
            if len(notHitMapFiles) != 0:
                print("ipipe_log_param_PRECISION_TEST: false")
                print("notHitMapFiles: %s" % notHitMapFiles)
Z
zhangchunle 已提交
474 475
                if len(filterFiles) != 0:
                    print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
476
                return ''
C
chalsliu 已提交
477
            else:
Z
zhangchunle 已提交
478 479
                if ut_list:
                    ret = self.__urlretrieve(
480
                        'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/prec_delta',
481 482
                        'prec_delta',
                    )
Z
zhangchunle 已提交
483 484 485
                    if ret:
                        with open('prec_delta') as delta:
                            for ut in delta:
R
risemeup1 已提交
486 487
                                if ut not in ut_list:
                                    ut_list.append(ut.rstrip('\r\n'))
Z
zhangchunle 已提交
488 489
                    else:
                        print('PREC download prec_delta failed')
490
                        sys.exit(1)
Z
zhangchunle 已提交
491
                    print("hitMapFiles: %s" % hitMapFiles)
Z
zhangchunle 已提交
492
                    print("ipipe_log_param_PRECISION_TEST: true")
493 494 495 496
                    print(
                        "ipipe_log_param_PRECISION_TEST_Cases_count: %s"
                        % len(ut_list)
                    )
Z
zhangchunle 已提交
497
                    PRECISION_TEST_Cases_ratio = format(
498 499 500 501 502 503
                        float(len(ut_list)) / float(self.get_all_count()), '.2f'
                    )
                    print(
                        "ipipe_log_param_PRECISION_TEST_Cases_ratio: %s"
                        % PRECISION_TEST_Cases_ratio
                    )
Z
zhangchunle 已提交
504 505
                    if len(filterFiles) != 0:
                        print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
506
                return '\n'.join(ut_list)
C
chalsliu 已提交
507 508 509 510 511


if __name__ == '__main__':
    pr_checker = PRChecker()
    pr_checker.init()
512 513
    with open('ut_list', 'w') as f:
        f.write(pr_checker.get_pr_ut())