get_pr_ut.py 18.7 KB
Newer Older
C
chalsliu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" For the PR that only modified the unit test, get cases in pull request. """

import os
import json
18
import re
19 20
import time
import subprocess
21
import requests
22 23
import urllib.request
import ssl
Y
YUNSHEN XIE 已提交
24
import platform
C
chalsliu 已提交
25 26 27
from github import Github

PADDLE_ROOT = os.getenv('PADDLE_ROOT', '/paddle/')
C
chalsliu 已提交
28 29
PADDLE_ROOT += '/'
PADDLE_ROOT = PADDLE_ROOT.replace('//', '/')
30
ssl._create_default_https_context = ssl._create_unverified_context
C
chalsliu 已提交
31 32 33


class PRChecker(object):
34
    """PR Checker."""
C
chalsliu 已提交
35 36

    def __init__(self):
37
        self.github = Github(os.getenv('GITHUB_API_TOKEN'), timeout=60)
C
chalsliu 已提交
38
        self.repo = self.github.get_repo('PaddlePaddle/Paddle')
39
        self.py_prog_oneline = re.compile(r'\d+\|\s*#.*')
40 41
        self.py_prog_multiline_a = re.compile('"""(.*?)"""', re.DOTALL)
        self.py_prog_multiline_b = re.compile("'''(.*?)'''", re.DOTALL)
42 43 44
        self.cc_prog_online = re.compile(r'\d+\|\s*//.*')
        self.cc_prog_multiline = re.compile(r'\d+\|\s*/\*.*?\*/', re.DOTALL)
        self.lineno_prog = re.compile(r'@@ \-\d+,\d+ \+(\d+),(\d+) @@')
C
chalsliu 已提交
45
        self.pr = None
46
        self.suffix = ''
C
chalsliu 已提交
47
        self.full_case = False
C
chalsliu 已提交
48 49

    def init(self):
50
        """Get pull request."""
C
chalsliu 已提交
51 52
        pr_id = os.getenv('GIT_PR_ID')
        if not pr_id:
53
            print('PREC No PR ID')
C
chalsliu 已提交
54
            exit(0)
55 56 57
        suffix = os.getenv('PREC_SUFFIX')
        if suffix:
            self.suffix = suffix
C
chalsliu 已提交
58
        self.pr = self.repo.get_pull(int(pr_id))
C
chalsliu 已提交
59 60 61
        last_commit = None
        ix = 0
        while True:
Y
YUNSHEN XIE 已提交
62 63 64 65 66 67
            try:
                commits = self.pr.get_commits().get_page(ix)
                if len(commits) == 0:
                    raise ValueError("no commit found in {} page".format(ix))
                last_commit = commits[-1].commit
            except Exception as e:
C
chalsliu 已提交
68
                break
Y
YUNSHEN XIE 已提交
69 70
            else:
                ix = ix + 1
71 72
        if last_commit.message.find('test=allcase') != -1:
            print('PREC test=allcase is set')
C
chalsliu 已提交
73
            self.full_case = True
C
chalsliu 已提交
74

75
    # todo: exception
76 77 78 79 80 81 82
    def __wget_with_retry(self, url):
        ix = 1
        proxy = '--no-proxy'
        while ix < 6:
            if ix // 2 == 0:
                proxy = ''
            else:
83 84 85 86
                if platform.system() == 'Windows':
                    proxy = '-Y off'
                else:
                    proxy = '--no-proxy'
87 88
            code = subprocess.call(
                'wget -q {} --no-check-certificate {}'.format(proxy, url),
89 90
                shell=True,
            )
91 92 93
            if code == 0:
                return True
            print(
94 95 96 97
                'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'.format(
                    url, ix, ix * 10, proxy
                )
            )
98 99 100 101
            time.sleep(ix * 10)
            ix += 1
        return False

102 103 104 105 106 107 108 109 110
    def __urlretrieve(self, url, filename):
        ix = 1
        with_proxy = urllib.request.getproxies()
        without_proxy = {'http': '', 'http': ''}
        while ix < 6:
            if ix // 2 == 0:
                cur_proxy = urllib.request.ProxyHandler(without_proxy)
            else:
                cur_proxy = urllib.request.ProxyHandler(with_proxy)
111 112 113
            opener = urllib.request.build_opener(
                cur_proxy, urllib.request.HTTPHandler
            )
114 115 116 117 118 119
            urllib.request.install_opener(opener)
            try:
                urllib.request.urlretrieve(url, filename)
            except Exception as e:
                print(e)
                print(
120 121 122 123
                    'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'.format(
                        url, ix, ix * 10, cur_proxy
                    )
                )
124 125 126 127 128 129 130 131
                continue
            else:
                return True
            time.sleep(ix * 10)
            ix += 1

        return False

C
chalsliu 已提交
132
    def get_pr_files(self):
133
        """Get files in pull request."""
C
chalsliu 已提交
134
        page = 0
Z
zhangchunle 已提交
135
        file_dict = {}
136
        file_count = 0
C
chalsliu 已提交
137 138 139 140 141
        while True:
            files = self.pr.get_files().get_page(page)
            if not files:
                break
            for f in files:
Z
zhangchunle 已提交
142
                file_dict[PADDLE_ROOT + f.filename] = f.status
143
                file_count += 1
144
            if file_count == 30:  # if pr file count = 31, nend to run all case
145
                break
C
chalsliu 已提交
146
            page += 1
Z
zhangchunle 已提交
147 148 149 150
        print("pr modify files: %s" % file_dict)
        return file_dict

    def get_is_white_file(self, filename):
151
        """judge is white file in pr's files."""
Z
zhangchunle 已提交
152
        isWhiteFile = False
153 154 155 156 157 158 159 160
        not_white_files = (
            PADDLE_ROOT + 'cmake/',
            PADDLE_ROOT + 'patches/',
            PADDLE_ROOT + 'tools/dockerfile/',
            PADDLE_ROOT + 'tools/windows/',
            PADDLE_ROOT + 'tools/test_runner.py',
            PADDLE_ROOT + 'tools/parallel_UT_rule.py',
        )
Z
zhangchunle 已提交
161 162
        if 'cmakelist' in filename.lower():
            isWhiteFile = False
163
        elif filename.startswith((not_white_files)):
Z
zhangchunle 已提交
164 165 166 167
            isWhiteFile = False
        else:
            isWhiteFile = True
        return isWhiteFile
C
chalsliu 已提交
168

169 170 171 172 173
    def __get_comment_by_filetype(self, content, filetype):
        result = []
        if filetype == 'py':
            result = self.__get_comment_by_prog(content, self.py_prog_oneline)
            result.extend(
174 175
                self.__get_comment_by_prog(content, self.py_prog_multiline_a)
            )
176
            result.extend(
177 178
                self.__get_comment_by_prog(content, self.py_prog_multiline_b)
            )
179 180 181
        if filetype == 'cc':
            result = self.__get_comment_by_prog(content, self.cc_prog_oneline)
            result.extend(
182 183
                self.__get_comment_by_prog(content, self.cc_prog_multiline)
            )
184 185 186 187 188
        return result

    def __get_comment_by_prog(self, content, prog):
        result_list = prog.findall(content)
        if not result_list:
C
chalsliu 已提交
189 190
            return []
        result = []
191 192 193 194 195
        for u in result_list:
            result.extend(u.split('\n'))
        return result

    def get_comment_of_file(self, f):
196 197
        # content = self.repo.get_contents(f.replace(PADDLE_ROOT, ''), 'pull/').decoded_content
        # todo: get file from github
198
        with open(f, encoding="utf-8") as fd:
199 200 201 202
            lines = fd.readlines()
        lineno = 1
        inputs = ''
        for line in lines:
203 204
            # for line in content.split('\n'):
            # input += str(lineno) + '|' + line + '\n'
205 206 207 208 209 210 211 212
            inputs += str(lineno) + '|' + line
            lineno += 1
        fietype = ''
        if f.endswith('.h') or f.endswith('.cc') or f.endswith('.cu'):
            filetype = 'cc'
        if f.endswith('.py'):
            filetype = 'py'
        else:
C
chalsliu 已提交
213
            return []
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
        return self.__get_comment_by_filetype(inputs, filetype)

    def get_pr_diff_lines(self):
        file_to_diff_lines = {}
        r = requests.get(self.pr.diff_url)
        data = r.text
        data = data.split('\n')
        ix = 0
        while ix < len(data):
            if data[ix].startswith('+++'):
                if data[ix].rstrip('\r\n') == '+++ /dev/null':
                    ix += 1
                    continue
                filename = data[ix][6:]
                ix += 1
                while ix < len(data):
                    result = self.lineno_prog.match(data[ix])
                    if not result:
                        break
                    lineno = int(result.group(1))
                    length = int(result.group(2))
                    ix += 1
                    end = ix + length
                    while ix < end:
                        if data[ix][0] == '-':
                            end += 1
                        if data[ix][0] == '+':
                            line_list = file_to_diff_lines.get(filename)
242 243 244
                            line = '{}{}'.format(
                                lineno, data[ix].replace('+', '|', 1)
                            )
245 246 247
                            if line_list:
                                line_list.append(line)
                            else:
248 249 250
                                file_to_diff_lines[filename] = [
                                    line,
                                ]
251 252 253 254 255 256 257 258 259
                        if data[ix][0] != '-':
                            lineno += 1
                        ix += 1
            ix += 1
        return file_to_diff_lines

    def is_only_comment(self, f):
        file_to_diff_lines = self.get_pr_diff_lines()
        comment_lines = self.get_comment_of_file(f)
C
chalsliu 已提交
260 261 262
        diff_lines = file_to_diff_lines.get(f.replace(PADDLE_ROOT, '', 1))
        if not diff_lines:
            return False
263 264 265
        for l in diff_lines:
            if l not in comment_lines:
                return False
266
        print('PREC {} is only comment'.format(f))
267 268
        return True

Z
zhangchunle 已提交
269
    def get_all_count(self):
270 271 272 273 274
        p = subprocess.Popen(
            "cd {}build && ctest -N".format(PADDLE_ROOT),
            shell=True,
            stdout=subprocess.PIPE,
        )
Y
YUNSHEN XIE 已提交
275 276 277 278 279
        out, err = p.communicate()
        for line in out.splitlines():
            if 'Total Tests:' in str(line):
                all_counts = line.split()[-1]
        return int(all_counts)
Z
zhangchunle 已提交
280

281
    def file_is_unnit_test(self, filename):
282
        # get all testcases by ctest-N
283 284 285
        all_ut_file = '%s/build/all_ut_file' % PADDLE_ROOT
        os.system(
            "cd %s/build && ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d' > %s"
286 287 288
            % (PADDLE_ROOT, all_ut_file)
        )
        # determine whether filename is in all_ut_case
289 290 291 292 293 294 295 296
        with open(all_ut_file, 'r') as f:
            (filepath, tempfilename) = os.path.split(filename)
            for f_file in f:
                if f_file.strip('\n') == tempfilename.split(".")[0]:
                    return True
            else:
                return False

C
chalsliu 已提交
297
    def get_pr_ut(self):
298
        """Get unit tests in pull request."""
C
chalsliu 已提交
299 300
        if self.full_case:
            return ''
C
chalsliu 已提交
301
        check_added_ut = False
C
chalsliu 已提交
302 303
        ut_list = []
        file_ut_map = None
Z
zhangchunle 已提交
304

305
        ret = self.__urlretrieve(
R
risemeup1 已提交
306
            'https://paddle-docker-tar.bj.bcebos.com/pre_test_tmp/ut_file_map.json',
307 308
            'ut_file_map.json',
        )
309 310 311
        if not ret:
            print('PREC download file_ut.json failed')
            exit(1)
Z
zhangchunle 已提交
312

Z
zhangchunle 已提交
313
        with open('ut_file_map.json') as jsonfile:
C
chalsliu 已提交
314
            file_ut_map = json.load(jsonfile)
Z
zhangchunle 已提交
315 316 317

        current_system = platform.system()
        notHitMapFiles = []
Z
zhangchunle 已提交
318
        hitMapFiles = {}
Z
zhangchunle 已提交
319
        onlyCommentsFilesOrXpu = []
Z
zhangchunle 已提交
320 321 322
        filterFiles = []
        file_list = []
        file_dict = self.get_pr_files()
323
        if len(file_dict) == 30:  # if pr file count = 31, nend to run all case
324
            return ''
Z
zhangchunle 已提交
325
        for filename in file_dict:
Z
zhangchunle 已提交
326
            if filename.startswith(PADDLE_ROOT + 'python/'):
Z
zhangchunle 已提交
327
                file_list.append(filename)
Z
zhangchunle 已提交
328
            elif filename.startswith(PADDLE_ROOT + 'paddle/'):
R
risemeup1 已提交
329
                if filename.startswith((PADDLE_ROOT + 'paddle/infrt')):
Z
zhangchunle 已提交
330 331 332
                    filterFiles.append(filename)
                elif filename.startswith(PADDLE_ROOT + 'paddle/scripts'):
                    if filename.startswith(
333 334 335 336 337
                        (
                            PADDLE_ROOT + 'paddle/scripts/paddle_build.sh',
                            PADDLE_ROOT + 'paddle/scripts/paddle_build.bat',
                        )
                    ):
Z
zhangchunle 已提交
338 339 340 341 342
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
                else:
                    file_list.append(filename)
Z
zhangchunle 已提交
343
            else:
344
                if file_dict[filename] == 'added':
Z
zhangchunle 已提交
345 346
                    file_list.append(filename)
                else:
347 348 349 350 351
                    isWhiteFile = self.get_is_white_file(filename)
                    if isWhiteFile == False:
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
Z
zhangchunle 已提交
352 353
        if len(file_list) == 0:
            ut_list.append('filterfiles_placeholder')
Z
zhangchunle 已提交
354
            ret = self.__urlretrieve(
R
risemeup1 已提交
355
                'https://paddle-docker-tar.bj.bcebos.com/pre_test_tmp/prec_delta',
356 357
                'prec_delta',
            )
Z
zhangchunle 已提交
358 359 360 361 362 363 364 365
            if ret:
                with open('prec_delta') as delta:
                    for ut in delta:
                        ut_list.append(ut.rstrip('\r\n'))
            else:
                print('PREC download prec_delta failed')
                exit(1)
            PRECISION_TEST_Cases_ratio = format(
366 367
                float(len(ut_list)) / float(self.get_all_count()), '.2f'
            )
Z
zhangchunle 已提交
368 369
            print("filterFiles: %s" % filterFiles)
            print("ipipe_log_param_PRECISION_TEST: true")
370 371 372 373 374 375 376
            print(
                "ipipe_log_param_PRECISION_TEST_Cases_count: %s" % len(ut_list)
            )
            print(
                "ipipe_log_param_PRECISION_TEST_Cases_ratio: %s"
                % PRECISION_TEST_Cases_ratio
            )
Z
zhangchunle 已提交
377
            return '\n'.join(ut_list)
Z
zhangchunle 已提交
378 379
        else:
            for f in file_list:
380 381 382 383 384
                if (
                    current_system == "Darwin"
                    or current_system == "Windows"
                    or self.suffix == ".py3"
                ):
Z
zhangchunle 已提交
385 386 387 388 389 390 391
                    f_judge = f.replace(PADDLE_ROOT, '/paddle/', 1)
                    f_judge = f_judge.replace('//', '/')
                else:
                    f_judge = f
                if f_judge not in file_ut_map:
                    if f_judge.endswith('.md'):
                        ut_list.append('md_placeholder')
Z
zhangchunle 已提交
392
                        onlyCommentsFilesOrXpu.append(f_judge)
393 394 395 396 397
                    elif (
                        'tests/unittests/xpu' in f_judge
                        or 'tests/unittests/npu' in f_judge
                        or 'op_npu.cc' in f_judge
                    ):
Z
zhangchunle 已提交
398 399
                        ut_list.append('xpu_npu_placeholder')
                        onlyCommentsFilesOrXpu.append(f_judge)
400
                    elif f_judge.endswith(('.h', '.cu', '.cc', '.py')):
401
                        # determine whether the new added file is a member of added_ut
402 403
                        if file_dict[f] in ['added']:
                            f_judge_in_added_ut = False
404 405 406 407 408 409
                            with open(
                                '{}/added_ut'.format(PADDLE_ROOT)
                            ) as utfile:
                                (filepath, tempfilename) = os.path.split(
                                    f_judge
                                )
410
                                for f_file in utfile:
411 412 413 414
                                    if (
                                        f_file.strip('\n')
                                        == tempfilename.split(".")[0]
                                    ):
415 416 417 418
                                        f_judge_in_added_ut = True
                            if f_judge_in_added_ut == True:
                                print(
                                    "Adding new unit tests not hit mapFiles: %s"
419 420
                                    % f_judge
                                )
421 422 423 424 425
                            else:
                                notHitMapFiles.append(f_judge)
                        elif file_dict[f] in ['removed']:
                            print("remove file not hit mapFiles: %s" % f_judge)
                        else:
Z
zhangchunle 已提交
426 427 428
                            if self.is_only_comment(f):
                                ut_list.append('comment_placeholder')
                                onlyCommentsFilesOrXpu.append(f_judge)
429 430
                            if self.file_is_unnit_test(f_judge):
                                ut_list.append(f_judge.split(".")[0])
Z
zhangchunle 已提交
431 432 433
                            else:
                                notHitMapFiles.append(f_judge)
                    else:
434 435 436 437 438
                        notHitMapFiles.append(f_judge) if file_dict[
                            f
                        ] != 'removed' else print(
                            "remove file not hit mapFiles: %s" % f_judge
                        )
Z
zhangchunle 已提交
439 440
                else:
                    if file_dict[f] not in ['removed']:
Z
zhangchunle 已提交
441 442 443 444
                        if self.is_only_comment(f):
                            ut_list.append('comment_placeholder')
                            onlyCommentsFilesOrXpu.append(f_judge)
                        else:
Z
zhangchunle 已提交
445 446
                            hitMapFiles[f_judge] = len(file_ut_map[f_judge])
                            ut_list.extend(file_ut_map.get(f_judge))
447
                    else:
Z
zhangchunle 已提交
448
                        hitMapFiles[f_judge] = len(file_ut_map[f_judge])
Z
zhangchunle 已提交
449
                        ut_list.extend(file_ut_map.get(f_judge))
Z
zhangchunle 已提交
450

Z
zhangchunle 已提交
451 452 453 454
            ut_list = list(set(ut_list))
            if len(notHitMapFiles) != 0:
                print("ipipe_log_param_PRECISION_TEST: false")
                print("notHitMapFiles: %s" % notHitMapFiles)
Z
zhangchunle 已提交
455 456
                if len(filterFiles) != 0:
                    print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
457
                return ''
C
chalsliu 已提交
458
            else:
Z
zhangchunle 已提交
459 460
                if ut_list:
                    ret = self.__urlretrieve(
R
risemeup1 已提交
461
                        'https://paddle-docker-tar.bj.bcebos.com/pre_test_tmp/prec_delta',
462 463
                        'prec_delta',
                    )
Z
zhangchunle 已提交
464 465 466 467 468 469 470
                    if ret:
                        with open('prec_delta') as delta:
                            for ut in delta:
                                ut_list.append(ut.rstrip('\r\n'))
                    else:
                        print('PREC download prec_delta failed')
                        exit(1)
Z
zhangchunle 已提交
471
                    print("hitMapFiles: %s" % hitMapFiles)
Z
zhangchunle 已提交
472
                    print("ipipe_log_param_PRECISION_TEST: true")
473 474 475 476
                    print(
                        "ipipe_log_param_PRECISION_TEST_Cases_count: %s"
                        % len(ut_list)
                    )
Z
zhangchunle 已提交
477
                    PRECISION_TEST_Cases_ratio = format(
478 479 480 481 482 483
                        float(len(ut_list)) / float(self.get_all_count()), '.2f'
                    )
                    print(
                        "ipipe_log_param_PRECISION_TEST_Cases_ratio: %s"
                        % PRECISION_TEST_Cases_ratio
                    )
Z
zhangchunle 已提交
484 485
                    if len(filterFiles) != 0:
                        print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
486
                return '\n'.join(ut_list)
C
chalsliu 已提交
487 488 489 490 491


if __name__ == '__main__':
    pr_checker = PRChecker()
    pr_checker.init()
492 493
    with open('ut_list', 'w') as f:
        f.write(pr_checker.get_pr_ut())