get_pr_ut.py 19.2 KB
Newer Older
C
chalsliu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" For the PR that only modified the unit test, get cases in pull request. """

import json
17 18
import os
import platform
19
import re
20
import ssl
21
import subprocess
22
import sys
23
import time
24
import urllib.request
25 26

import requests
C
chalsliu 已提交
27 28 29
from github import Github

PADDLE_ROOT = os.getenv('PADDLE_ROOT', '/paddle/')
C
chalsliu 已提交
30 31
PADDLE_ROOT += '/'
PADDLE_ROOT = PADDLE_ROOT.replace('//', '/')
32
ssl._create_default_https_context = ssl._create_unverified_context
C
chalsliu 已提交
33 34


35
class PRChecker:
36
    """PR Checker."""
C
chalsliu 已提交
37 38

    def __init__(self):
39
        self.github = Github(os.getenv('GITHUB_API_TOKEN'), timeout=60)
C
chalsliu 已提交
40
        self.repo = self.github.get_repo('PaddlePaddle/Paddle')
41
        self.py_prog_oneline = re.compile(r'\d+\|\s*#.*')
42 43
        self.py_prog_multiline_a = re.compile('"""(.*?)"""', re.DOTALL)
        self.py_prog_multiline_b = re.compile("'''(.*?)'''", re.DOTALL)
44 45 46
        self.cc_prog_online = re.compile(r'\d+\|\s*//.*')
        self.cc_prog_multiline = re.compile(r'\d+\|\s*/\*.*?\*/', re.DOTALL)
        self.lineno_prog = re.compile(r'@@ \-\d+,\d+ \+(\d+),(\d+) @@')
C
chalsliu 已提交
47
        self.pr = None
48
        self.suffix = ''
C
chalsliu 已提交
49
        self.full_case = False
C
chalsliu 已提交
50 51

    def init(self):
52
        """Get pull request."""
C
chalsliu 已提交
53 54
        pr_id = os.getenv('GIT_PR_ID')
        if not pr_id:
55
            print('PREC No PR ID')
56
            sys.exit(0)
57 58 59
        suffix = os.getenv('PREC_SUFFIX')
        if suffix:
            self.suffix = suffix
C
chalsliu 已提交
60
        self.pr = self.repo.get_pull(int(pr_id))
C
chalsliu 已提交
61 62 63
        last_commit = None
        ix = 0
        while True:
Y
YUNSHEN XIE 已提交
64 65 66
            try:
                commits = self.pr.get_commits().get_page(ix)
                if len(commits) == 0:
67
                    raise ValueError(f"no commit found in {ix} page")
Y
YUNSHEN XIE 已提交
68 69
                last_commit = commits[-1].commit
            except Exception as e:
C
chalsliu 已提交
70
                break
Y
YUNSHEN XIE 已提交
71 72
            else:
                ix = ix + 1
73 74
        if last_commit.message.find('test=allcase') != -1:
            print('PREC test=allcase is set')
C
chalsliu 已提交
75
            self.full_case = True
C
chalsliu 已提交
76

77
    # todo: exception
78 79 80 81 82 83 84
    def __wget_with_retry(self, url):
        ix = 1
        proxy = '--no-proxy'
        while ix < 6:
            if ix // 2 == 0:
                proxy = ''
            else:
85 86 87 88
                if platform.system() == 'Windows':
                    proxy = '-Y off'
                else:
                    proxy = '--no-proxy'
89
            code = subprocess.call(
90
                f'wget -q {proxy} --no-check-certificate {url}',
91 92
                shell=True,
            )
93 94 95
            if code == 0:
                return True
            print(
96 97 98 99
                'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'.format(
                    url, ix, ix * 10, proxy
                )
            )
100 101 102 103
            time.sleep(ix * 10)
            ix += 1
        return False

104 105 106 107 108 109 110 111 112
    def __urlretrieve(self, url, filename):
        ix = 1
        with_proxy = urllib.request.getproxies()
        without_proxy = {'http': '', 'http': ''}
        while ix < 6:
            if ix // 2 == 0:
                cur_proxy = urllib.request.ProxyHandler(without_proxy)
            else:
                cur_proxy = urllib.request.ProxyHandler(with_proxy)
113 114 115
            opener = urllib.request.build_opener(
                cur_proxy, urllib.request.HTTPHandler
            )
116 117 118 119 120 121
            urllib.request.install_opener(opener)
            try:
                urllib.request.urlretrieve(url, filename)
            except Exception as e:
                print(e)
                print(
122 123 124 125
                    'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'.format(
                        url, ix, ix * 10, cur_proxy
                    )
                )
126 127 128 129 130 131 132 133
                continue
            else:
                return True
            time.sleep(ix * 10)
            ix += 1

        return False

C
chalsliu 已提交
134
    def get_pr_files(self):
135
        """Get files in pull request."""
C
chalsliu 已提交
136
        page = 0
Z
zhangchunle 已提交
137
        file_dict = {}
138
        file_count = 0
C
chalsliu 已提交
139 140 141 142 143
        while True:
            files = self.pr.get_files().get_page(page)
            if not files:
                break
            for f in files:
Z
zhangchunle 已提交
144
                file_dict[PADDLE_ROOT + f.filename] = f.status
145
                file_count += 1
146
            if file_count == 30:  # if pr file count = 31, nend to run all case
147
                break
C
chalsliu 已提交
148
            page += 1
Z
zhangchunle 已提交
149 150 151 152
        print("pr modify files: %s" % file_dict)
        return file_dict

    def get_is_white_file(self, filename):
153
        """judge is white file in pr's files."""
Z
zhangchunle 已提交
154
        isWhiteFile = False
155 156 157 158 159 160 161 162
        not_white_files = (
            PADDLE_ROOT + 'cmake/',
            PADDLE_ROOT + 'patches/',
            PADDLE_ROOT + 'tools/dockerfile/',
            PADDLE_ROOT + 'tools/windows/',
            PADDLE_ROOT + 'tools/test_runner.py',
            PADDLE_ROOT + 'tools/parallel_UT_rule.py',
        )
Z
zhangchunle 已提交
163 164
        if 'cmakelist' in filename.lower():
            isWhiteFile = False
165
        elif filename.startswith(not_white_files):
Z
zhangchunle 已提交
166 167 168 169
            isWhiteFile = False
        else:
            isWhiteFile = True
        return isWhiteFile
C
chalsliu 已提交
170

171 172 173 174 175
    def __get_comment_by_filetype(self, content, filetype):
        result = []
        if filetype == 'py':
            result = self.__get_comment_by_prog(content, self.py_prog_oneline)
            result.extend(
176 177
                self.__get_comment_by_prog(content, self.py_prog_multiline_a)
            )
178
            result.extend(
179 180
                self.__get_comment_by_prog(content, self.py_prog_multiline_b)
            )
181 182 183
        if filetype == 'cc':
            result = self.__get_comment_by_prog(content, self.cc_prog_oneline)
            result.extend(
184 185
                self.__get_comment_by_prog(content, self.cc_prog_multiline)
            )
186 187 188 189 190
        return result

    def __get_comment_by_prog(self, content, prog):
        result_list = prog.findall(content)
        if not result_list:
C
chalsliu 已提交
191 192
            return []
        result = []
193 194 195 196 197
        for u in result_list:
            result.extend(u.split('\n'))
        return result

    def get_comment_of_file(self, f):
198 199
        # content = self.repo.get_contents(f.replace(PADDLE_ROOT, ''), 'pull/').decoded_content
        # todo: get file from github
200
        with open(f, encoding="utf-8") as fd:
201 202 203 204
            lines = fd.readlines()
        lineno = 1
        inputs = ''
        for line in lines:
205 206
            # for line in content.split('\n'):
            # input += str(lineno) + '|' + line + '\n'
207 208 209 210 211 212 213 214
            inputs += str(lineno) + '|' + line
            lineno += 1
        fietype = ''
        if f.endswith('.h') or f.endswith('.cc') or f.endswith('.cu'):
            filetype = 'cc'
        if f.endswith('.py'):
            filetype = 'py'
        else:
C
chalsliu 已提交
215
            return []
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
        return self.__get_comment_by_filetype(inputs, filetype)

    def get_pr_diff_lines(self):
        file_to_diff_lines = {}
        r = requests.get(self.pr.diff_url)
        data = r.text
        data = data.split('\n')
        ix = 0
        while ix < len(data):
            if data[ix].startswith('+++'):
                if data[ix].rstrip('\r\n') == '+++ /dev/null':
                    ix += 1
                    continue
                filename = data[ix][6:]
                ix += 1
                while ix < len(data):
                    result = self.lineno_prog.match(data[ix])
                    if not result:
                        break
                    lineno = int(result.group(1))
                    length = int(result.group(2))
                    ix += 1
                    end = ix + length
                    while ix < end:
                        if data[ix][0] == '-':
                            end += 1
                        if data[ix][0] == '+':
                            line_list = file_to_diff_lines.get(filename)
244 245 246
                            line = '{}{}'.format(
                                lineno, data[ix].replace('+', '|', 1)
                            )
247 248 249
                            if line_list:
                                line_list.append(line)
                            else:
250 251 252
                                file_to_diff_lines[filename] = [
                                    line,
                                ]
253 254 255 256 257 258 259 260 261
                        if data[ix][0] != '-':
                            lineno += 1
                        ix += 1
            ix += 1
        return file_to_diff_lines

    def is_only_comment(self, f):
        file_to_diff_lines = self.get_pr_diff_lines()
        comment_lines = self.get_comment_of_file(f)
C
chalsliu 已提交
262 263 264
        diff_lines = file_to_diff_lines.get(f.replace(PADDLE_ROOT, '', 1))
        if not diff_lines:
            return False
265 266 267
        for l in diff_lines:
            if l not in comment_lines:
                return False
268
        print(f'PREC {f} is only comment')
269 270
        return True

Z
zhangchunle 已提交
271
    def get_all_count(self):
272
        p = subprocess.Popen(
273
            f"cd {PADDLE_ROOT}build && ctest -N",
274 275 276
            shell=True,
            stdout=subprocess.PIPE,
        )
Y
YUNSHEN XIE 已提交
277 278 279 280 281
        out, err = p.communicate()
        for line in out.splitlines():
            if 'Total Tests:' in str(line):
                all_counts = line.split()[-1]
        return int(all_counts)
Z
zhangchunle 已提交
282

R
risemeup1 已提交
283
    def file_is_unnit_test(self, unittest_path):
284
        # get all testcases by ctest-N
R
risemeup1 已提交
285 286 287 288 289 290
        all_ut_file = PADDLE_ROOT + 'build/all_ut_list'
        # all_ut_file = '%s/build/all_ut_file' % PADDLE_ROOT
        print("PADDLE_ROOT:", PADDLE_ROOT)
        print("all_ut_file path:", all_ut_file)
        build_path = PADDLE_ROOT + 'build/'
        print("build_path:", build_path)
R
risemeup1 已提交
291
        (unittest_directory, unittest_name) = os.path.split(unittest_path)
292
        # determine whether filename is in all_ut_case
293
        with open(all_ut_file, 'r') as f:
R
risemeup1 已提交
294 295 296 297
            all_unittests = f.readlines()
            for test in all_unittests:
                test = test.replace('\n', '').strip()
                if test == unittest_name.split(".")[0]:
298
                    return True
R
risemeup1 已提交
299
        return False
300

C
chalsliu 已提交
301
    def get_pr_ut(self):
302
        """Get unit tests in pull request."""
C
chalsliu 已提交
303 304
        if self.full_case:
            return ''
C
chalsliu 已提交
305
        check_added_ut = False
C
chalsliu 已提交
306 307
        ut_list = []
        file_ut_map = None
Z
zhangchunle 已提交
308

309
        ret = self.__urlretrieve(
310
            'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/ut_file_map.json',
311 312
            'ut_file_map.json',
        )
313 314
        if not ret:
            print('PREC download file_ut.json failed')
315
            sys.exit(1)
Z
zhangchunle 已提交
316

Z
zhangchunle 已提交
317
        with open('ut_file_map.json') as jsonfile:
C
chalsliu 已提交
318
            file_ut_map = json.load(jsonfile)
Z
zhangchunle 已提交
319 320 321

        current_system = platform.system()
        notHitMapFiles = []
Z
zhangchunle 已提交
322
        hitMapFiles = {}
Z
zhangchunle 已提交
323
        onlyCommentsFilesOrXpu = []
Z
zhangchunle 已提交
324 325 326
        filterFiles = []
        file_list = []
        file_dict = self.get_pr_files()
327
        if len(file_dict) == 30:  # if pr file count = 31, nend to run all case
328
            return ''
Z
zhangchunle 已提交
329
        for filename in file_dict:
Z
zhangchunle 已提交
330
            if filename.startswith(PADDLE_ROOT + 'python/'):
Z
zhangchunle 已提交
331
                file_list.append(filename)
Z
zhangchunle 已提交
332
            elif filename.startswith(PADDLE_ROOT + 'paddle/'):
J
jjyaoao 已提交
333
                if filename.startswith(PADDLE_ROOT + 'paddle/scripts'):
Z
zhangchunle 已提交
334
                    if filename.startswith(
335 336 337 338 339
                        (
                            PADDLE_ROOT + 'paddle/scripts/paddle_build.sh',
                            PADDLE_ROOT + 'paddle/scripts/paddle_build.bat',
                        )
                    ):
Z
zhangchunle 已提交
340 341 342
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
J
jjyaoao 已提交
343 344
                elif ('/xpu/' in filename.lower()) or (
                    '/ipu/' in filename.lower()
R
risemeup1 已提交
345 346
                ):
                    filterFiles.append(filename)
Z
zhangchunle 已提交
347 348
                else:
                    file_list.append(filename)
R
risemeup1 已提交
349 350
            elif filename.startswith(PADDLE_ROOT + 'test/'):
                file_list.append(filename)
Z
zhangchunle 已提交
351
            else:
352
                if file_dict[filename] == 'added':
Z
zhangchunle 已提交
353 354
                    file_list.append(filename)
                else:
355
                    isWhiteFile = self.get_is_white_file(filename)
356
                    if not isWhiteFile:
357 358 359
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
Z
zhangchunle 已提交
360 361
        if len(file_list) == 0:
            ut_list.append('filterfiles_placeholder')
Z
zhangchunle 已提交
362
            ret = self.__urlretrieve(
363
                'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/prec_delta',
364 365
                'prec_delta',
            )
Z
zhangchunle 已提交
366 367 368 369 370 371
            if ret:
                with open('prec_delta') as delta:
                    for ut in delta:
                        ut_list.append(ut.rstrip('\r\n'))
            else:
                print('PREC download prec_delta failed')
372
                sys.exit(1)
Z
zhangchunle 已提交
373
            PRECISION_TEST_Cases_ratio = format(
374 375
                float(len(ut_list)) / float(self.get_all_count()), '.2f'
            )
Z
zhangchunle 已提交
376 377
            print("filterFiles: %s" % filterFiles)
            print("ipipe_log_param_PRECISION_TEST: true")
378 379 380 381 382 383 384
            print(
                "ipipe_log_param_PRECISION_TEST_Cases_count: %s" % len(ut_list)
            )
            print(
                "ipipe_log_param_PRECISION_TEST_Cases_ratio: %s"
                % PRECISION_TEST_Cases_ratio
            )
R
risemeup1 已提交
385 386 387 388
            print(
                "The unittests in prec delta is shown as following: %s"
                % ut_list
            )
Z
zhangchunle 已提交
389
            return '\n'.join(ut_list)
Z
zhangchunle 已提交
390 391
        else:
            for f in file_list:
392 393 394 395 396
                if (
                    current_system == "Darwin"
                    or current_system == "Windows"
                    or self.suffix == ".py3"
                ):
Z
zhangchunle 已提交
397 398 399 400 401 402 403
                    f_judge = f.replace(PADDLE_ROOT, '/paddle/', 1)
                    f_judge = f_judge.replace('//', '/')
                else:
                    f_judge = f
                if f_judge not in file_ut_map:
                    if f_judge.endswith('.md'):
                        ut_list.append('md_placeholder')
Z
zhangchunle 已提交
404
                        onlyCommentsFilesOrXpu.append(f_judge)
张春乔 已提交
405
                    elif 'test/xpu' in f_judge:
Z
zhangchunle 已提交
406 407
                        ut_list.append('xpu_npu_placeholder')
                        onlyCommentsFilesOrXpu.append(f_judge)
408
                    elif f_judge.endswith(('.h', '.cu', '.cc', '.py')):
409
                        # determine whether the new added file is a member of added_ut
410 411
                        if file_dict[f] in ['added']:
                            f_judge_in_added_ut = False
R
risemeup1 已提交
412 413 414 415 416 417 418 419 420 421 422
                            path = PADDLE_ROOT + 'added_ut'
                            print("PADDLE_ROOT:", PADDLE_ROOT)
                            print("adde_ut path:", path)
                            (unittest_directory, unittest_name) = os.path.split(
                                f_judge
                            )
                            with open(path, 'r') as f:
                                added_unittests = f.readlines()
                                for test in added_unittests:
                                    test = test.replace('\n', '').strip()
                                    if test == unittest_name.split(".")[0]:
423
                                        f_judge_in_added_ut = True
424
                            if f_judge_in_added_ut:
425 426
                                print(
                                    "Adding new unit tests not hit mapFiles: %s"
427 428
                                    % f_judge
                                )
429 430 431 432 433
                            else:
                                notHitMapFiles.append(f_judge)
                        elif file_dict[f] in ['removed']:
                            print("remove file not hit mapFiles: %s" % f_judge)
                        else:
Z
zhangchunle 已提交
434 435 436
                            if self.is_only_comment(f):
                                ut_list.append('comment_placeholder')
                                onlyCommentsFilesOrXpu.append(f_judge)
437
                            if self.file_is_unnit_test(f_judge):
R
risemeup1 已提交
438 439 440
                                ut_list.append(
                                    os.path.split(f_judge)[1].split(".")[0]
                                )
Z
zhangchunle 已提交
441 442 443
                            else:
                                notHitMapFiles.append(f_judge)
                    else:
444 445 446 447 448
                        notHitMapFiles.append(f_judge) if file_dict[
                            f
                        ] != 'removed' else print(
                            "remove file not hit mapFiles: %s" % f_judge
                        )
Z
zhangchunle 已提交
449 450
                else:
                    if file_dict[f] not in ['removed']:
Z
zhangchunle 已提交
451 452 453 454
                        if self.is_only_comment(f):
                            ut_list.append('comment_placeholder')
                            onlyCommentsFilesOrXpu.append(f_judge)
                        else:
Z
zhangchunle 已提交
455 456
                            hitMapFiles[f_judge] = len(file_ut_map[f_judge])
                            ut_list.extend(file_ut_map.get(f_judge))
457
                    else:
Z
zhangchunle 已提交
458
                        hitMapFiles[f_judge] = len(file_ut_map[f_judge])
Z
zhangchunle 已提交
459
                        ut_list.extend(file_ut_map.get(f_judge))
Z
zhangchunle 已提交
460

Z
zhangchunle 已提交
461 462 463 464
            ut_list = list(set(ut_list))
            if len(notHitMapFiles) != 0:
                print("ipipe_log_param_PRECISION_TEST: false")
                print("notHitMapFiles: %s" % notHitMapFiles)
Z
zhangchunle 已提交
465 466
                if len(filterFiles) != 0:
                    print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
467
                return ''
C
chalsliu 已提交
468
            else:
Z
zhangchunle 已提交
469 470
                if ut_list:
                    ret = self.__urlretrieve(
471
                        'https://paddle-docker-tar.bj.bcebos.com/new_precise_test_map/prec_delta',
472 473
                        'prec_delta',
                    )
Z
zhangchunle 已提交
474 475 476
                    if ret:
                        with open('prec_delta') as delta:
                            for ut in delta:
R
risemeup1 已提交
477 478
                                if ut not in ut_list:
                                    ut_list.append(ut.rstrip('\r\n'))
Z
zhangchunle 已提交
479 480
                    else:
                        print('PREC download prec_delta failed')
481
                        sys.exit(1)
Z
zhangchunle 已提交
482
                    print("hitMapFiles: %s" % hitMapFiles)
Z
zhangchunle 已提交
483
                    print("ipipe_log_param_PRECISION_TEST: true")
484 485 486 487
                    print(
                        "ipipe_log_param_PRECISION_TEST_Cases_count: %s"
                        % len(ut_list)
                    )
Z
zhangchunle 已提交
488
                    PRECISION_TEST_Cases_ratio = format(
489 490 491 492 493 494
                        float(len(ut_list)) / float(self.get_all_count()), '.2f'
                    )
                    print(
                        "ipipe_log_param_PRECISION_TEST_Cases_ratio: %s"
                        % PRECISION_TEST_Cases_ratio
                    )
Z
zhangchunle 已提交
495 496
                    if len(filterFiles) != 0:
                        print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
497
                return '\n'.join(ut_list)
C
chalsliu 已提交
498 499 500 501 502


if __name__ == '__main__':
    pr_checker = PRChecker()
    pr_checker.init()
503 504
    with open('ut_list', 'w') as f:
        f.write(pr_checker.get_pr_ut())