get_pr_ut.py 19.5 KB
Newer Older
C
chalsliu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" For the PR that only modified the unit test, get cases in pull request. """

import json
17 18
import os
import platform
19
import re
20
import ssl
21
import subprocess
22
import sys
23
import time
24
import urllib.request
25 26

import requests
C
chalsliu 已提交
27 28 29
from github import Github

PADDLE_ROOT = os.getenv('PADDLE_ROOT', '/paddle/')
C
chalsliu 已提交
30 31
PADDLE_ROOT += '/'
PADDLE_ROOT = PADDLE_ROOT.replace('//', '/')
32
ssl._create_default_https_context = ssl._create_unverified_context
C
chalsliu 已提交
33 34


35
class PRChecker:
36
    """PR Checker."""
C
chalsliu 已提交
37 38

    def __init__(self):
39
        self.github = Github(os.getenv('GITHUB_API_TOKEN'), timeout=60)
C
chalsliu 已提交
40
        self.repo = self.github.get_repo('PaddlePaddle/Paddle')
41
        self.py_prog_oneline = re.compile(r'\d+\|\s*#.*')
42 43
        self.py_prog_multiline_a = re.compile('"""(.*?)"""', re.DOTALL)
        self.py_prog_multiline_b = re.compile("'''(.*?)'''", re.DOTALL)
44 45 46
        self.cc_prog_online = re.compile(r'\d+\|\s*//.*')
        self.cc_prog_multiline = re.compile(r'\d+\|\s*/\*.*?\*/', re.DOTALL)
        self.lineno_prog = re.compile(r'@@ \-\d+,\d+ \+(\d+),(\d+) @@')
C
chalsliu 已提交
47
        self.pr = None
48
        self.suffix = ''
C
chalsliu 已提交
49
        self.full_case = False
C
chalsliu 已提交
50 51

    def init(self):
52
        """Get pull request."""
C
chalsliu 已提交
53 54
        pr_id = os.getenv('GIT_PR_ID')
        if not pr_id:
55
            print('PREC No PR ID')
56
            sys.exit(0)
57 58 59
        suffix = os.getenv('PREC_SUFFIX')
        if suffix:
            self.suffix = suffix
C
chalsliu 已提交
60
        self.pr = self.repo.get_pull(int(pr_id))
C
chalsliu 已提交
61 62 63
        last_commit = None
        ix = 0
        while True:
Y
YUNSHEN XIE 已提交
64 65 66
            try:
                commits = self.pr.get_commits().get_page(ix)
                if len(commits) == 0:
67
                    raise ValueError(f"no commit found in {ix} page")
Y
YUNSHEN XIE 已提交
68 69
                last_commit = commits[-1].commit
            except Exception as e:
C
chalsliu 已提交
70
                break
Y
YUNSHEN XIE 已提交
71 72
            else:
                ix = ix + 1
73 74
        if last_commit.message.find('test=allcase') != -1:
            print('PREC test=allcase is set')
C
chalsliu 已提交
75
            self.full_case = True
C
chalsliu 已提交
76

77
    # todo: exception
78 79 80 81 82 83 84
    def __wget_with_retry(self, url):
        ix = 1
        proxy = '--no-proxy'
        while ix < 6:
            if ix // 2 == 0:
                proxy = ''
            else:
85 86 87 88
                if platform.system() == 'Windows':
                    proxy = '-Y off'
                else:
                    proxy = '--no-proxy'
89
            code = subprocess.call(
90
                f'wget -q {proxy} --no-check-certificate {url}',
91 92
                shell=True,
            )
93 94 95
            if code == 0:
                return True
            print(
96 97 98 99
                'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'.format(
                    url, ix, ix * 10, proxy
                )
            )
100 101 102 103
            time.sleep(ix * 10)
            ix += 1
        return False

104 105 106 107 108 109 110 111 112
    def __urlretrieve(self, url, filename):
        ix = 1
        with_proxy = urllib.request.getproxies()
        without_proxy = {'http': '', 'http': ''}
        while ix < 6:
            if ix // 2 == 0:
                cur_proxy = urllib.request.ProxyHandler(without_proxy)
            else:
                cur_proxy = urllib.request.ProxyHandler(with_proxy)
113 114 115
            opener = urllib.request.build_opener(
                cur_proxy, urllib.request.HTTPHandler
            )
116 117 118 119 120 121
            urllib.request.install_opener(opener)
            try:
                urllib.request.urlretrieve(url, filename)
            except Exception as e:
                print(e)
                print(
122 123 124 125
                    'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'.format(
                        url, ix, ix * 10, cur_proxy
                    )
                )
126 127 128 129 130 131 132 133
                continue
            else:
                return True
            time.sleep(ix * 10)
            ix += 1

        return False

C
chalsliu 已提交
134
    def get_pr_files(self):
135
        """Get files in pull request."""
C
chalsliu 已提交
136
        page = 0
Z
zhangchunle 已提交
137
        file_dict = {}
138
        file_count = 0
C
chalsliu 已提交
139 140 141 142 143
        while True:
            files = self.pr.get_files().get_page(page)
            if not files:
                break
            for f in files:
Z
zhangchunle 已提交
144
                file_dict[PADDLE_ROOT + f.filename] = f.status
145
                file_count += 1
146
            if file_count == 30:  # if pr file count = 31, nend to run all case
147
                break
C
chalsliu 已提交
148
            page += 1
Z
zhangchunle 已提交
149 150 151 152
        print("pr modify files: %s" % file_dict)
        return file_dict

    def get_is_white_file(self, filename):
153
        """judge is white file in pr's files."""
Z
zhangchunle 已提交
154
        isWhiteFile = False
155 156 157 158 159 160 161 162
        not_white_files = (
            PADDLE_ROOT + 'cmake/',
            PADDLE_ROOT + 'patches/',
            PADDLE_ROOT + 'tools/dockerfile/',
            PADDLE_ROOT + 'tools/windows/',
            PADDLE_ROOT + 'tools/test_runner.py',
            PADDLE_ROOT + 'tools/parallel_UT_rule.py',
        )
Z
zhangchunle 已提交
163 164
        if 'cmakelist' in filename.lower():
            isWhiteFile = False
165
        elif filename.startswith(not_white_files):
Z
zhangchunle 已提交
166 167 168 169
            isWhiteFile = False
        else:
            isWhiteFile = True
        return isWhiteFile
C
chalsliu 已提交
170

171 172 173 174 175
    def __get_comment_by_filetype(self, content, filetype):
        result = []
        if filetype == 'py':
            result = self.__get_comment_by_prog(content, self.py_prog_oneline)
            result.extend(
176 177
                self.__get_comment_by_prog(content, self.py_prog_multiline_a)
            )
178
            result.extend(
179 180
                self.__get_comment_by_prog(content, self.py_prog_multiline_b)
            )
181 182 183
        if filetype == 'cc':
            result = self.__get_comment_by_prog(content, self.cc_prog_oneline)
            result.extend(
184 185
                self.__get_comment_by_prog(content, self.cc_prog_multiline)
            )
186 187 188 189 190
        return result

    def __get_comment_by_prog(self, content, prog):
        result_list = prog.findall(content)
        if not result_list:
C
chalsliu 已提交
191 192
            return []
        result = []
193 194 195 196 197
        for u in result_list:
            result.extend(u.split('\n'))
        return result

    def get_comment_of_file(self, f):
198 199
        # content = self.repo.get_contents(f.replace(PADDLE_ROOT, ''), 'pull/').decoded_content
        # todo: get file from github
200
        with open(f, encoding="utf-8") as fd:
201 202 203 204
            lines = fd.readlines()
        lineno = 1
        inputs = ''
        for line in lines:
205 206
            # for line in content.split('\n'):
            # input += str(lineno) + '|' + line + '\n'
207 208 209 210 211 212 213 214
            inputs += str(lineno) + '|' + line
            lineno += 1
        fietype = ''
        if f.endswith('.h') or f.endswith('.cc') or f.endswith('.cu'):
            filetype = 'cc'
        if f.endswith('.py'):
            filetype = 'py'
        else:
C
chalsliu 已提交
215
            return []
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
        return self.__get_comment_by_filetype(inputs, filetype)

    def get_pr_diff_lines(self):
        file_to_diff_lines = {}
        r = requests.get(self.pr.diff_url)
        data = r.text
        data = data.split('\n')
        ix = 0
        while ix < len(data):
            if data[ix].startswith('+++'):
                if data[ix].rstrip('\r\n') == '+++ /dev/null':
                    ix += 1
                    continue
                filename = data[ix][6:]
                ix += 1
                while ix < len(data):
                    result = self.lineno_prog.match(data[ix])
                    if not result:
                        break
                    lineno = int(result.group(1))
                    length = int(result.group(2))
                    ix += 1
                    end = ix + length
                    while ix < end:
                        if data[ix][0] == '-':
                            end += 1
                        if data[ix][0] == '+':
                            line_list = file_to_diff_lines.get(filename)
244 245 246
                            line = '{}{}'.format(
                                lineno, data[ix].replace('+', '|', 1)
                            )
247 248 249
                            if line_list:
                                line_list.append(line)
                            else:
250 251 252
                                file_to_diff_lines[filename] = [
                                    line,
                                ]
253 254 255 256 257 258 259 260 261
                        if data[ix][0] != '-':
                            lineno += 1
                        ix += 1
            ix += 1
        return file_to_diff_lines

    def is_only_comment(self, f):
        file_to_diff_lines = self.get_pr_diff_lines()
        comment_lines = self.get_comment_of_file(f)
C
chalsliu 已提交
262 263 264
        diff_lines = file_to_diff_lines.get(f.replace(PADDLE_ROOT, '', 1))
        if not diff_lines:
            return False
265 266 267
        for l in diff_lines:
            if l not in comment_lines:
                return False
268
        print(f'PREC {f} is only comment')
269 270
        return True

Z
zhangchunle 已提交
271
    def get_all_count(self):
272
        p = subprocess.Popen(
273
            f"cd {PADDLE_ROOT}build && ctest -N",
274 275 276
            shell=True,
            stdout=subprocess.PIPE,
        )
Y
YUNSHEN XIE 已提交
277 278 279 280 281
        out, err = p.communicate()
        for line in out.splitlines():
            if 'Total Tests:' in str(line):
                all_counts = line.split()[-1]
        return int(all_counts)
Z
zhangchunle 已提交
282

R
risemeup1 已提交
283
    def file_is_unnit_test(self, unittest_path):
284
        # get all testcases by ctest-N
R
risemeup1 已提交
285 286 287 288 289 290
        all_ut_file = PADDLE_ROOT + 'build/all_ut_list'
        # all_ut_file = '%s/build/all_ut_file' % PADDLE_ROOT
        print("PADDLE_ROOT:", PADDLE_ROOT)
        print("all_ut_file path:", all_ut_file)
        build_path = PADDLE_ROOT + 'build/'
        print("build_path:", build_path)
R
risemeup1 已提交
291
        (unittest_directory, unittest_name) = os.path.split(unittest_path)
292
        # determine whether filename is in all_ut_case
293
        with open(all_ut_file, 'r') as f:
R
risemeup1 已提交
294 295 296 297
            all_unittests = f.readlines()
            for test in all_unittests:
                test = test.replace('\n', '').strip()
                if test == unittest_name.split(".")[0]:
298
                    return True
R
risemeup1 已提交
299
        return False
300

C
chalsliu 已提交
301
    def get_pr_ut(self):
302
        """Get unit tests in pull request."""
C
chalsliu 已提交
303 304
        if self.full_case:
            return ''
C
chalsliu 已提交
305
        check_added_ut = False
C
chalsliu 已提交
306 307
        ut_list = []
        file_ut_map = None
Z
zhangchunle 已提交
308

309
        ret = self.__urlretrieve(
310
            'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/ut_file_map.json',
311 312
            'ut_file_map.json',
        )
313 314
        if not ret:
            print('PREC download file_ut.json failed')
315
            sys.exit(1)
Z
zhangchunle 已提交
316

Z
zhangchunle 已提交
317
        with open('ut_file_map.json') as jsonfile:
C
chalsliu 已提交
318
            file_ut_map = json.load(jsonfile)
Z
zhangchunle 已提交
319 320 321

        current_system = platform.system()
        notHitMapFiles = []
Z
zhangchunle 已提交
322
        hitMapFiles = {}
Z
zhangchunle 已提交
323
        onlyCommentsFilesOrXpu = []
Z
zhangchunle 已提交
324 325 326
        filterFiles = []
        file_list = []
        file_dict = self.get_pr_files()
327
        if len(file_dict) == 30:  # if pr file count = 31, nend to run all case
328
            return ''
Z
zhangchunle 已提交
329
        for filename in file_dict:
Z
zhangchunle 已提交
330
            if filename.startswith(PADDLE_ROOT + 'python/'):
Z
zhangchunle 已提交
331
                file_list.append(filename)
Z
zhangchunle 已提交
332
            elif filename.startswith(PADDLE_ROOT + 'paddle/'):
333
                if filename.startswith(PADDLE_ROOT + 'paddle/infrt'):
Z
zhangchunle 已提交
334 335 336
                    filterFiles.append(filename)
                elif filename.startswith(PADDLE_ROOT + 'paddle/scripts'):
                    if filename.startswith(
337 338 339 340 341
                        (
                            PADDLE_ROOT + 'paddle/scripts/paddle_build.sh',
                            PADDLE_ROOT + 'paddle/scripts/paddle_build.bat',
                        )
                    ):
Z
zhangchunle 已提交
342 343 344
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
R
risemeup1 已提交
345
                elif (
Z
zhangbo9674 已提交
346 347 348 349
                    ('/xpu/' in filename.lower())
                    or ('/npu/' in filename.lower())
                    or ('/mlu/' in filename.lower())
                    or ('/ipu/' in filename.lower())
R
risemeup1 已提交
350 351
                ):
                    filterFiles.append(filename)
Z
zhangchunle 已提交
352 353
                else:
                    file_list.append(filename)
Z
zhangchunle 已提交
354
            else:
355
                if file_dict[filename] == 'added':
Z
zhangchunle 已提交
356 357
                    file_list.append(filename)
                else:
358
                    isWhiteFile = self.get_is_white_file(filename)
359
                    if not isWhiteFile:
360 361 362
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
Z
zhangchunle 已提交
363 364
        if len(file_list) == 0:
            ut_list.append('filterfiles_placeholder')
Z
zhangchunle 已提交
365
            ret = self.__urlretrieve(
366
                'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/prec_delta',
367 368
                'prec_delta',
            )
Z
zhangchunle 已提交
369 370 371 372 373 374
            if ret:
                with open('prec_delta') as delta:
                    for ut in delta:
                        ut_list.append(ut.rstrip('\r\n'))
            else:
                print('PREC download prec_delta failed')
375
                sys.exit(1)
Z
zhangchunle 已提交
376
            PRECISION_TEST_Cases_ratio = format(
377 378
                float(len(ut_list)) / float(self.get_all_count()), '.2f'
            )
Z
zhangchunle 已提交
379 380
            print("filterFiles: %s" % filterFiles)
            print("ipipe_log_param_PRECISION_TEST: true")
381 382 383 384 385 386 387
            print(
                "ipipe_log_param_PRECISION_TEST_Cases_count: %s" % len(ut_list)
            )
            print(
                "ipipe_log_param_PRECISION_TEST_Cases_ratio: %s"
                % PRECISION_TEST_Cases_ratio
            )
R
risemeup1 已提交
388 389 390 391
            print(
                "The unittests in prec delta is shown as following: %s"
                % ut_list
            )
Z
zhangchunle 已提交
392
            return '\n'.join(ut_list)
Z
zhangchunle 已提交
393 394
        else:
            for f in file_list:
395 396 397 398 399
                if (
                    current_system == "Darwin"
                    or current_system == "Windows"
                    or self.suffix == ".py3"
                ):
Z
zhangchunle 已提交
400 401 402 403 404 405 406
                    f_judge = f.replace(PADDLE_ROOT, '/paddle/', 1)
                    f_judge = f_judge.replace('//', '/')
                else:
                    f_judge = f
                if f_judge not in file_ut_map:
                    if f_judge.endswith('.md'):
                        ut_list.append('md_placeholder')
Z
zhangchunle 已提交
407
                        onlyCommentsFilesOrXpu.append(f_judge)
408 409 410 411 412
                    elif (
                        'tests/unittests/xpu' in f_judge
                        or 'tests/unittests/npu' in f_judge
                        or 'op_npu.cc' in f_judge
                    ):
Z
zhangchunle 已提交
413 414
                        ut_list.append('xpu_npu_placeholder')
                        onlyCommentsFilesOrXpu.append(f_judge)
415
                    elif f_judge.endswith(('.h', '.cu', '.cc', '.py')):
416
                        # determine whether the new added file is a member of added_ut
417 418
                        if file_dict[f] in ['added']:
                            f_judge_in_added_ut = False
R
risemeup1 已提交
419 420 421 422 423 424 425 426 427 428 429
                            path = PADDLE_ROOT + 'added_ut'
                            print("PADDLE_ROOT:", PADDLE_ROOT)
                            print("adde_ut path:", path)
                            (unittest_directory, unittest_name) = os.path.split(
                                f_judge
                            )
                            with open(path, 'r') as f:
                                added_unittests = f.readlines()
                                for test in added_unittests:
                                    test = test.replace('\n', '').strip()
                                    if test == unittest_name.split(".")[0]:
430
                                        f_judge_in_added_ut = True
431
                            if f_judge_in_added_ut:
432 433
                                print(
                                    "Adding new unit tests not hit mapFiles: %s"
434 435
                                    % f_judge
                                )
436 437 438 439 440
                            else:
                                notHitMapFiles.append(f_judge)
                        elif file_dict[f] in ['removed']:
                            print("remove file not hit mapFiles: %s" % f_judge)
                        else:
Z
zhangchunle 已提交
441 442 443
                            if self.is_only_comment(f):
                                ut_list.append('comment_placeholder')
                                onlyCommentsFilesOrXpu.append(f_judge)
444
                            if self.file_is_unnit_test(f_judge):
R
risemeup1 已提交
445 446 447
                                ut_list.append(
                                    os.path.split(f_judge)[1].split(".")[0]
                                )
Z
zhangchunle 已提交
448 449 450
                            else:
                                notHitMapFiles.append(f_judge)
                    else:
451 452 453 454 455
                        notHitMapFiles.append(f_judge) if file_dict[
                            f
                        ] != 'removed' else print(
                            "remove file not hit mapFiles: %s" % f_judge
                        )
Z
zhangchunle 已提交
456 457
                else:
                    if file_dict[f] not in ['removed']:
Z
zhangchunle 已提交
458 459 460 461
                        if self.is_only_comment(f):
                            ut_list.append('comment_placeholder')
                            onlyCommentsFilesOrXpu.append(f_judge)
                        else:
Z
zhangchunle 已提交
462 463
                            hitMapFiles[f_judge] = len(file_ut_map[f_judge])
                            ut_list.extend(file_ut_map.get(f_judge))
464
                    else:
Z
zhangchunle 已提交
465
                        hitMapFiles[f_judge] = len(file_ut_map[f_judge])
Z
zhangchunle 已提交
466
                        ut_list.extend(file_ut_map.get(f_judge))
Z
zhangchunle 已提交
467

Z
zhangchunle 已提交
468 469 470 471
            ut_list = list(set(ut_list))
            if len(notHitMapFiles) != 0:
                print("ipipe_log_param_PRECISION_TEST: false")
                print("notHitMapFiles: %s" % notHitMapFiles)
Z
zhangchunle 已提交
472 473
                if len(filterFiles) != 0:
                    print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
474
                return ''
C
chalsliu 已提交
475
            else:
Z
zhangchunle 已提交
476 477
                if ut_list:
                    ret = self.__urlretrieve(
478
                        'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/prec_delta',
479 480
                        'prec_delta',
                    )
Z
zhangchunle 已提交
481 482 483
                    if ret:
                        with open('prec_delta') as delta:
                            for ut in delta:
R
risemeup1 已提交
484 485
                                if ut not in ut_list:
                                    ut_list.append(ut.rstrip('\r\n'))
Z
zhangchunle 已提交
486 487
                    else:
                        print('PREC download prec_delta failed')
488
                        sys.exit(1)
Z
zhangchunle 已提交
489
                    print("hitMapFiles: %s" % hitMapFiles)
Z
zhangchunle 已提交
490
                    print("ipipe_log_param_PRECISION_TEST: true")
491 492 493 494
                    print(
                        "ipipe_log_param_PRECISION_TEST_Cases_count: %s"
                        % len(ut_list)
                    )
Z
zhangchunle 已提交
495
                    PRECISION_TEST_Cases_ratio = format(
496 497 498 499 500 501
                        float(len(ut_list)) / float(self.get_all_count()), '.2f'
                    )
                    print(
                        "ipipe_log_param_PRECISION_TEST_Cases_ratio: %s"
                        % PRECISION_TEST_Cases_ratio
                    )
Z
zhangchunle 已提交
502 503
                    if len(filterFiles) != 0:
                        print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
504
                return '\n'.join(ut_list)
C
chalsliu 已提交
505 506 507 508 509


if __name__ == '__main__':
    pr_checker = PRChecker()
    pr_checker.init()
510 511
    with open('ut_list', 'w') as f:
        f.write(pr_checker.get_pr_ut())