get_pr_ut.py 18.0 KB
Newer Older
C
chalsliu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" For the PR that only modified the unit test, get cases in pull request. """

import os
import json
18
import re
19 20
import time
import subprocess
21
import requests
22 23
import urllib.request
import ssl
Y
YUNSHEN XIE 已提交
24
import platform
C
chalsliu 已提交
25 26 27
from github import Github

PADDLE_ROOT = os.getenv('PADDLE_ROOT', '/paddle/')
C
chalsliu 已提交
28 29
PADDLE_ROOT += '/'
PADDLE_ROOT = PADDLE_ROOT.replace('//', '/')
30
ssl._create_default_https_context = ssl._create_unverified_context
C
chalsliu 已提交
31 32 33 34 35 36


class PRChecker(object):
    """ PR Checker. """

    def __init__(self):
37
        self.github = Github(os.getenv('GITHUB_API_TOKEN'), timeout=60)
C
chalsliu 已提交
38
        self.repo = self.github.get_repo('PaddlePaddle/Paddle')
39
        self.py_prog_oneline = re.compile(r'\d+\|\s*#.*')
40 41
        self.py_prog_multiline_a = re.compile('"""(.*?)"""', re.DOTALL)
        self.py_prog_multiline_b = re.compile("'''(.*?)'''", re.DOTALL)
42 43 44
        self.cc_prog_online = re.compile(r'\d+\|\s*//.*')
        self.cc_prog_multiline = re.compile(r'\d+\|\s*/\*.*?\*/', re.DOTALL)
        self.lineno_prog = re.compile(r'@@ \-\d+,\d+ \+(\d+),(\d+) @@')
C
chalsliu 已提交
45
        self.pr = None
46
        self.suffix = ''
C
chalsliu 已提交
47
        self.full_case = False
C
chalsliu 已提交
48 49 50 51 52

    def init(self):
        """ Get pull request. """
        pr_id = os.getenv('GIT_PR_ID')
        if not pr_id:
53
            print('PREC No PR ID')
C
chalsliu 已提交
54
            exit(0)
55 56 57
        suffix = os.getenv('PREC_SUFFIX')
        if suffix:
            self.suffix = suffix
C
chalsliu 已提交
58
        self.pr = self.repo.get_pull(int(pr_id))
C
chalsliu 已提交
59 60 61
        last_commit = None
        ix = 0
        while True:
Y
YUNSHEN XIE 已提交
62 63 64 65 66 67
            try:
                commits = self.pr.get_commits().get_page(ix)
                if len(commits) == 0:
                    raise ValueError("no commit found in {} page".format(ix))
                last_commit = commits[-1].commit
            except Exception as e:
C
chalsliu 已提交
68
                break
Y
YUNSHEN XIE 已提交
69 70
            else:
                ix = ix + 1
71 72
        if last_commit.message.find('test=allcase') != -1:
            print('PREC test=allcase is set')
C
chalsliu 已提交
73
            self.full_case = True
C
chalsliu 已提交
74

75 76 77 78 79 80 81 82
    #todo: exception
    def __wget_with_retry(self, url):
        ix = 1
        proxy = '--no-proxy'
        while ix < 6:
            if ix // 2 == 0:
                proxy = ''
            else:
83 84 85 86
                if platform.system() == 'Windows':
                    proxy = '-Y off'
                else:
                    proxy = '--no-proxy'
87 88 89 90 91 92
            code = subprocess.call(
                'wget -q {} --no-check-certificate {}'.format(proxy, url),
                shell=True)
            if code == 0:
                return True
            print(
93 94
                'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'
                .format(url, ix, ix * 10, proxy))
95 96 97 98
            time.sleep(ix * 10)
            ix += 1
        return False

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
    def __urlretrieve(self, url, filename):
        ix = 1
        with_proxy = urllib.request.getproxies()
        without_proxy = {'http': '', 'http': ''}
        while ix < 6:
            if ix // 2 == 0:
                cur_proxy = urllib.request.ProxyHandler(without_proxy)
            else:
                cur_proxy = urllib.request.ProxyHandler(with_proxy)
            opener = urllib.request.build_opener(cur_proxy,
                                                 urllib.request.HTTPHandler)
            urllib.request.install_opener(opener)
            try:
                urllib.request.urlretrieve(url, filename)
            except Exception as e:
                print(e)
                print(
116 117
                    'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'
                    .format(url, ix, ix * 10, cur_proxy))
118 119 120 121 122 123 124 125
                continue
            else:
                return True
            time.sleep(ix * 10)
            ix += 1

        return False

C
chalsliu 已提交
126 127 128
    def get_pr_files(self):
        """ Get files in pull request. """
        page = 0
Z
zhangchunle 已提交
129
        file_dict = {}
130
        file_count = 0
C
chalsliu 已提交
131 132 133 134 135
        while True:
            files = self.pr.get_files().get_page(page)
            if not files:
                break
            for f in files:
Z
zhangchunle 已提交
136
                file_dict[PADDLE_ROOT + f.filename] = f.status
137 138 139
                file_count += 1
            if file_count == 30:  #if pr file count = 31, nend to run all case
                break
C
chalsliu 已提交
140
            page += 1
Z
zhangchunle 已提交
141 142 143 144 145 146
        print("pr modify files: %s" % file_dict)
        return file_dict

    def get_is_white_file(self, filename):
        """ judge is white file in pr's files. """
        isWhiteFile = False
147 148 149 150
        not_white_files = (PADDLE_ROOT + 'cmake/', PADDLE_ROOT + 'patches/',
                           PADDLE_ROOT + 'tools/dockerfile/',
                           PADDLE_ROOT + 'tools/windows/',
                           PADDLE_ROOT + 'tools/test_runner.py',
Z
zhangchunle 已提交
151
                           PADDLE_ROOT + 'tools/parallel_UT_rule.py')
Z
zhangchunle 已提交
152 153
        if 'cmakelist' in filename.lower():
            isWhiteFile = False
154
        elif filename.startswith((not_white_files)):
Z
zhangchunle 已提交
155 156 157 158
            isWhiteFile = False
        else:
            isWhiteFile = True
        return isWhiteFile
C
chalsliu 已提交
159

160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
    def __get_comment_by_filetype(self, content, filetype):
        result = []
        if filetype == 'py':
            result = self.__get_comment_by_prog(content, self.py_prog_oneline)
            result.extend(
                self.__get_comment_by_prog(content, self.py_prog_multiline_a))
            result.extend(
                self.__get_comment_by_prog(content, self.py_prog_multiline_b))
        if filetype == 'cc':
            result = self.__get_comment_by_prog(content, self.cc_prog_oneline)
            result.extend(
                self.__get_comment_by_prog(content, self.cc_prog_multiline))
        return result

    def __get_comment_by_prog(self, content, prog):
        result_list = prog.findall(content)
        if not result_list:
C
chalsliu 已提交
177 178
            return []
        result = []
179 180 181 182 183 184
        for u in result_list:
            result.extend(u.split('\n'))
        return result

    def get_comment_of_file(self, f):
        #content = self.repo.get_contents(f.replace(PADDLE_ROOT, ''), 'pull/').decoded_content
185
        #todo: get file from github
186
        with open(f, encoding="utf-8") as fd:
187 188 189 190 191 192 193 194 195 196 197 198 199 200
            lines = fd.readlines()
        lineno = 1
        inputs = ''
        for line in lines:
            #for line in content.split('\n'):
            #input += str(lineno) + '|' + line + '\n'
            inputs += str(lineno) + '|' + line
            lineno += 1
        fietype = ''
        if f.endswith('.h') or f.endswith('.cc') or f.endswith('.cu'):
            filetype = 'cc'
        if f.endswith('.py'):
            filetype = 'py'
        else:
C
chalsliu 已提交
201
            return []
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
        return self.__get_comment_by_filetype(inputs, filetype)

    def get_pr_diff_lines(self):
        file_to_diff_lines = {}
        r = requests.get(self.pr.diff_url)
        data = r.text
        data = data.split('\n')
        ix = 0
        while ix < len(data):
            if data[ix].startswith('+++'):
                if data[ix].rstrip('\r\n') == '+++ /dev/null':
                    ix += 1
                    continue
                filename = data[ix][6:]
                ix += 1
                while ix < len(data):
                    result = self.lineno_prog.match(data[ix])
                    if not result:
                        break
                    lineno = int(result.group(1))
                    length = int(result.group(2))
                    ix += 1
                    end = ix + length
                    while ix < end:
                        if data[ix][0] == '-':
                            end += 1
                        if data[ix][0] == '+':
                            line_list = file_to_diff_lines.get(filename)
C
chalsliu 已提交
230 231
                            line = '{}{}'.format(lineno,
                                                 data[ix].replace('+', '|', 1))
232 233 234
                            if line_list:
                                line_list.append(line)
                            else:
235 236 237
                                file_to_diff_lines[filename] = [
                                    line,
                                ]
238 239 240 241 242 243 244 245 246
                        if data[ix][0] != '-':
                            lineno += 1
                        ix += 1
            ix += 1
        return file_to_diff_lines

    def is_only_comment(self, f):
        file_to_diff_lines = self.get_pr_diff_lines()
        comment_lines = self.get_comment_of_file(f)
C
chalsliu 已提交
247 248 249
        diff_lines = file_to_diff_lines.get(f.replace(PADDLE_ROOT, '', 1))
        if not diff_lines:
            return False
250 251 252
        for l in diff_lines:
            if l not in comment_lines:
                return False
253
        print('PREC {} is only comment'.format(f))
254 255
        return True

Z
zhangchunle 已提交
256
    def get_all_count(self):
257 258 259
        p = subprocess.Popen("cd {}build && ctest -N".format(PADDLE_ROOT),
                             shell=True,
                             stdout=subprocess.PIPE)
Y
YUNSHEN XIE 已提交
260 261 262 263 264
        out, err = p.communicate()
        for line in out.splitlines():
            if 'Total Tests:' in str(line):
                all_counts = line.split()[-1]
        return int(all_counts)
Z
zhangchunle 已提交
265

266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
    def file_is_unnit_test(self, filename):
        #get all testcases by ctest-N
        all_ut_file = '%s/build/all_ut_file' % PADDLE_ROOT
        os.system(
            "cd %s/build && ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d' > %s"
            % (PADDLE_ROOT, all_ut_file))
        #determine whether filename is in all_ut_case
        with open(all_ut_file, 'r') as f:
            (filepath, tempfilename) = os.path.split(filename)
            for f_file in f:
                if f_file.strip('\n') == tempfilename.split(".")[0]:
                    return True
            else:
                return False

C
chalsliu 已提交
281 282
    def get_pr_ut(self):
        """ Get unit tests in pull request. """
C
chalsliu 已提交
283 284
        if self.full_case:
            return ''
C
chalsliu 已提交
285
        check_added_ut = False
C
chalsliu 已提交
286 287
        ut_list = []
        file_ut_map = None
Z
zhangchunle 已提交
288

289
        ret = self.__urlretrieve(
Z
zhangchunle 已提交
290 291
            'https://paddle-docker-tar.bj.bcebos.com/pre_test/ut_file_map.json',
            'ut_file_map.json')
292 293 294
        if not ret:
            print('PREC download file_ut.json failed')
            exit(1)
Z
zhangchunle 已提交
295

Z
zhangchunle 已提交
296
        with open('ut_file_map.json') as jsonfile:
C
chalsliu 已提交
297
            file_ut_map = json.load(jsonfile)
Z
zhangchunle 已提交
298 299 300

        current_system = platform.system()
        notHitMapFiles = []
Z
zhangchunle 已提交
301
        hitMapFiles = {}
Z
zhangchunle 已提交
302
        onlyCommentsFilesOrXpu = []
Z
zhangchunle 已提交
303 304 305
        filterFiles = []
        file_list = []
        file_dict = self.get_pr_files()
306 307
        if len(file_dict) == 30:  #if pr file count = 31, nend to run all case
            return ''
Z
zhangchunle 已提交
308
        for filename in file_dict:
Z
zhangchunle 已提交
309
            if filename.startswith(PADDLE_ROOT + 'python/'):
Z
zhangchunle 已提交
310
                file_list.append(filename)
Z
zhangchunle 已提交
311 312 313 314 315 316 317 318 319 320 321 322 323
            elif filename.startswith(PADDLE_ROOT + 'paddle/'):
                if filename.startswith((PADDLE_ROOT + 'paddle/infrt',
                                        PADDLE_ROOT + 'paddle/utils')):
                    filterFiles.append(filename)
                elif filename.startswith(PADDLE_ROOT + 'paddle/scripts'):
                    if filename.startswith(
                        (PADDLE_ROOT + 'paddle/scripts/paddle_build.sh',
                         PADDLE_ROOT + 'paddle/scripts/paddle_build.bat')):
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
                else:
                    file_list.append(filename)
Z
zhangchunle 已提交
324
            else:
325
                if file_dict[filename] == 'added':
Z
zhangchunle 已提交
326 327
                    file_list.append(filename)
                else:
328 329 330 331 332
                    isWhiteFile = self.get_is_white_file(filename)
                    if isWhiteFile == False:
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
Z
zhangchunle 已提交
333 334
        if len(file_list) == 0:
            ut_list.append('filterfiles_placeholder')
Z
zhangchunle 已提交
335 336 337 338 339 340 341 342 343 344 345 346
            ret = self.__urlretrieve(
                'https://paddle-docker-tar.bj.bcebos.com/pre_test/prec_delta',
                'prec_delta')
            if ret:
                with open('prec_delta') as delta:
                    for ut in delta:
                        ut_list.append(ut.rstrip('\r\n'))
            else:
                print('PREC download prec_delta failed')
                exit(1)
            PRECISION_TEST_Cases_ratio = format(
                float(len(ut_list)) / float(self.get_all_count()), '.2f')
Z
zhangchunle 已提交
347 348
            print("filterFiles: %s" % filterFiles)
            print("ipipe_log_param_PRECISION_TEST: true")
Z
zhangchunle 已提交
349 350 351 352
            print("ipipe_log_param_PRECISION_TEST_Cases_count: %s" %
                  len(ut_list))
            print("ipipe_log_param_PRECISION_TEST_Cases_ratio: %s" %
                  PRECISION_TEST_Cases_ratio)
Z
zhangchunle 已提交
353
            return '\n'.join(ut_list)
Z
zhangchunle 已提交
354 355 356 357 358 359 360 361 362 363
        else:
            for f in file_list:
                if current_system == "Darwin" or current_system == "Windows" or self.suffix == ".py3":
                    f_judge = f.replace(PADDLE_ROOT, '/paddle/', 1)
                    f_judge = f_judge.replace('//', '/')
                else:
                    f_judge = f
                if f_judge not in file_ut_map:
                    if f_judge.endswith('.md'):
                        ut_list.append('md_placeholder')
Z
zhangchunle 已提交
364
                        onlyCommentsFilesOrXpu.append(f_judge)
365
                    elif 'tests/unittests/xpu' in f_judge or 'tests/unittests/npu' in f_judge or 'op_npu.cc' in f_judge:
Z
zhangchunle 已提交
366 367
                        ut_list.append('xpu_npu_placeholder')
                        onlyCommentsFilesOrXpu.append(f_judge)
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
                    elif f_judge.endswith(('.h', '.cu', '.cc', '.py')):
                        #determine whether the new added file is a member of added_ut
                        if file_dict[f] in ['added']:
                            f_judge_in_added_ut = False
                            with open('{}/added_ut'.format(
                                    PADDLE_ROOT)) as utfile:
                                (filepath,
                                 tempfilename) = os.path.split(f_judge)
                                for f_file in utfile:
                                    if f_file.strip('\n') == tempfilename.split(
                                            ".")[0]:
                                        f_judge_in_added_ut = True
                            if f_judge_in_added_ut == True:
                                print(
                                    "Adding new unit tests not hit mapFiles: %s"
                                    % f_judge)
                            else:
                                notHitMapFiles.append(f_judge)
                        elif file_dict[f] in ['removed']:
                            print("remove file not hit mapFiles: %s" % f_judge)
                        else:
Z
zhangchunle 已提交
389 390 391
                            if self.is_only_comment(f):
                                ut_list.append('comment_placeholder')
                                onlyCommentsFilesOrXpu.append(f_judge)
392 393
                            if self.file_is_unnit_test(f_judge):
                                ut_list.append(f_judge.split(".")[0])
Z
zhangchunle 已提交
394 395 396
                            else:
                                notHitMapFiles.append(f_judge)
                    else:
397 398
                        notHitMapFiles.append(
                            f_judge) if file_dict[f] != 'removed' else print(
Z
zhangchunle 已提交
399 400 401
                                "remove file not hit mapFiles: %s" % f_judge)
                else:
                    if file_dict[f] not in ['removed']:
Z
zhangchunle 已提交
402 403 404 405
                        if self.is_only_comment(f):
                            ut_list.append('comment_placeholder')
                            onlyCommentsFilesOrXpu.append(f_judge)
                        else:
Z
zhangchunle 已提交
406 407
                            hitMapFiles[f_judge] = len(file_ut_map[f_judge])
                            ut_list.extend(file_ut_map.get(f_judge))
408
                    else:
Z
zhangchunle 已提交
409
                        hitMapFiles[f_judge] = len(file_ut_map[f_judge])
Z
zhangchunle 已提交
410
                        ut_list.extend(file_ut_map.get(f_judge))
Z
zhangchunle 已提交
411

Z
zhangchunle 已提交
412 413 414 415
            ut_list = list(set(ut_list))
            if len(notHitMapFiles) != 0:
                print("ipipe_log_param_PRECISION_TEST: false")
                print("notHitMapFiles: %s" % notHitMapFiles)
Z
zhangchunle 已提交
416 417
                if len(filterFiles) != 0:
                    print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
418
                return ''
C
chalsliu 已提交
419
            else:
Z
zhangchunle 已提交
420 421 422 423 424 425 426 427 428 429 430
                if ut_list:
                    ret = self.__urlretrieve(
                        'https://paddle-docker-tar.bj.bcebos.com/pre_test/prec_delta',
                        'prec_delta')
                    if ret:
                        with open('prec_delta') as delta:
                            for ut in delta:
                                ut_list.append(ut.rstrip('\r\n'))
                    else:
                        print('PREC download prec_delta failed')
                        exit(1)
Z
zhangchunle 已提交
431
                    print("hitMapFiles: %s" % hitMapFiles)
Z
zhangchunle 已提交
432 433 434 435 436 437 438 439
                    print("ipipe_log_param_PRECISION_TEST: true")
                    print("ipipe_log_param_PRECISION_TEST_Cases_count: %s" %
                          len(ut_list))
                    PRECISION_TEST_Cases_ratio = format(
                        float(len(ut_list)) / float(self.get_all_count()),
                        '.2f')
                    print("ipipe_log_param_PRECISION_TEST_Cases_ratio: %s" %
                          PRECISION_TEST_Cases_ratio)
Z
zhangchunle 已提交
440 441
                    if len(filterFiles) != 0:
                        print("filterFiles: %s" % filterFiles)
Z
zhangchunle 已提交
442
                return '\n'.join(ut_list)
C
chalsliu 已提交
443 444 445 446 447


if __name__ == '__main__':
    pr_checker = PRChecker()
    pr_checker.init()
448 449
    with open('ut_list', 'w') as f:
        f.write(pr_checker.get_pr_ut())