From 597897e3aeff05f6ee5061a4e516c33d4b9d068c Mon Sep 17 00:00:00 2001 From: chalsliu <45041955+chalsliu@users.noreply.github.com> Date: Wed, 2 Dec 2020 14:29:58 +0800 Subject: [PATCH] Supprot precision test for code analysis --- tools/get_pr_ut.py | 137 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 127 insertions(+), 10 deletions(-) diff --git a/tools/get_pr_ut.py b/tools/get_pr_ut.py index ce1af24190c..46c051bdd2e 100644 --- a/tools/get_pr_ut.py +++ b/tools/get_pr_ut.py @@ -15,6 +15,9 @@ import os import json +import re +import sys +import requests from github import Github PADDLE_ROOT = os.getenv('PADDLE_ROOT', '/paddle/') @@ -26,7 +29,14 @@ class PRChecker(object): def __init__(self): self.github = Github(os.getenv('GITHUB_API_TOKEN'), timeout=60) self.repo = self.github.get_repo('PaddlePaddle/Paddle') + self.py_prog_oneline = re.compile('\d+\|\s*#.*') + self.py_prog_multiline_a = re.compile('\d+\|\s*""".*?"""', re.DOTALL) + self.py_prog_multiline_b = re.compile("\d+\|\s*'''.*?'''", re.DOTALL) + self.cc_prog_online = re.compile('\d+\|\s*//.*') + self.cc_prog_multiline = re.compile('\d+\|\s*/\*.*?\*/', re.DOTALL) + self.lineno_prog = re.compile('@@ \-\d+,\d+ \+(\d+),(\d+) @@') self.pr = None + self.suffix = '' def init(self): """ Get pull request. """ @@ -34,6 +44,9 @@ class PRChecker(object): if not pr_id: print('No PR ID') exit(0) + suffix = os.getenv('PREC_SUFFIX') + if suffix: + self.suffix = suffix self.pr = self.repo.get_pull(int(pr_id)) def get_pr_files(self): @@ -49,30 +62,134 @@ class PRChecker(object): page += 1 return file_list + def __get_comment_by_filetype(self, content, filetype): + result = [] + if filetype == 'py': + result = self.__get_comment_by_prog(content, self.py_prog_oneline) + result.extend( + self.__get_comment_by_prog(content, self.py_prog_multiline_a)) + result.extend( + self.__get_comment_by_prog(content, self.py_prog_multiline_b)) + if filetype == 'cc': + result = self.__get_comment_by_prog(content, self.cc_prog_oneline) + result.extend( + self.__get_comment_by_prog(content, self.cc_prog_multiline)) + return result + + def __get_comment_by_prog(self, content, prog): + result = [] + result_list = prog.findall(content) + if not result_list: + return None + for u in result_list: + result.extend(u.split('\n')) + return result + + def get_comment_of_file(self, f): + #content = self.repo.get_contents(f.replace(PADDLE_ROOT, ''), 'pull/').decoded_content + with open(f) as fd: + lines = fd.readlines() + lineno = 1 + inputs = '' + for line in lines: + #for line in content.split('\n'): + #input += str(lineno) + '|' + line + '\n' + inputs += str(lineno) + '|' + line + lineno += 1 + fietype = '' + if f.endswith('.h') or f.endswith('.cc') or f.endswith('.cu'): + filetype = 'cc' + if f.endswith('.py'): + filetype = 'py' + else: + return None + return self.__get_comment_by_filetype(inputs, filetype) + + def get_pr_diff_lines(self): + file_to_diff_lines = {} + r = requests.get(self.pr.diff_url) + data = r.text + data = data.split('\n') + ix = 0 + while ix < len(data): + if data[ix].startswith('+++'): + if data[ix].rstrip('\r\n') == '+++ /dev/null': + ix += 1 + continue + filename = data[ix][6:] + ix += 1 + while ix < len(data): + result = self.lineno_prog.match(data[ix]) + if not result: + break + lineno = int(result.group(1)) + length = int(result.group(2)) + ix += 1 + end = ix + length + while ix < end: + if data[ix][0] == '-': + end += 1 + if data[ix][0] == '+': + line_list = file_to_diff_lines.get(filename) + line = '{}{}'.format(lineno, data[ix].replace('+', + '|')) + if line_list: + line_list.append(line) + else: + file_to_diff_lines[filename] = [line, ] + if data[ix][0] != '-': + lineno += 1 + ix += 1 + ix += 1 + return file_to_diff_lines + + def is_only_comment(self, f): + file_to_diff_lines = self.get_pr_diff_lines() + comment_lines = self.get_comment_of_file(f) + #for l in comment_lines: + # print(l) + diff_lines = file_to_diff_lines.get(f.replace(PADDLE_ROOT, '')) + for l in diff_lines: + if l not in comment_lines: + return False + return True + def get_pr_ut(self): """ Get unit tests in pull request. """ check_added_ut = False ut_list = [] file_ut_map = None - cmd = 'wget -q --no-check-certificate https://sys-p0.bj.bcebos.com/prec/file_ut.json' + cmd = 'wget -q --no-check-certificate https://sys-p0.bj.bcebos.com/prec/file_ut.json' + self.suffix os.system(cmd) - with open('file_ut.json') as jsonfile: + with open('file_ut.json' + self.suffix) as jsonfile: file_ut_map = json.load(jsonfile) for f in self.get_pr_files(): - if f.endswith('.h') or f.endswith('.cu'): - return '' if f not in file_ut_map: - if f.find('test_') != -1 or f.find('_test') != -1: - check_added_ut = True - continue + if f.endswith('.md'): + ut_list.append('md_placeholder') + elif f.endswith('.h') or f.endswith('.cu'): + if self.is_only_comment(f): + ut_list.append('h_cu_comment_placeholder') + else: + return '' + elif f.endswith('.cc'): + if f.find('test_') != -1 or f.find('_test') != -1: + check_added_ut = True + elif self.is_only_comment(f): + ut_list.append('cc_comment_placeholder') + else: + return '' else: return '' else: - ut_list.extend(file_ut_map.get(f)) + if self.is_only_comment(f): + ut_list.append('cc_comment_placeholder') + else: + ut_list.extend(file_ut_map.get(f)) ut_list = list(set(ut_list)) - cmd = 'wget -q --no-check-certificate https://sys-p0.bj.bcebos.com/prec/prec_delta' + cmd = 'wget -q --no-check-certificate https://sys-p0.bj.bcebos.com/prec/prec_delta' + self.suffix os.system(cmd) - with open('prec_delta') as delta: + with open('prec_delta' + self.suffix) as delta: for ut in delta: ut_list.append(ut.rstrip('\r\n')) -- GitLab