sampcd_processor.py 23.1 KB
Newer Older
1
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
T
tianshuo78520a 已提交
14 15

import os
16
import sys
T
tianshuo78520a 已提交
17
import subprocess
18 19 20
import multiprocessing
import math
import platform
21 22 23
import inspect
import paddle
import paddle.fluid
Z
zhangchunle 已提交
24
import json
25 26
"""
please make sure to run in the tools path
27
usage: python sample_test.py {arg1} 
28 29 30
arg1: the first arg defined running in gpu version or cpu version

for example, you can run cpu version python2 testing like this:
31 32 33

    python sampcd_processor.py cpu 

34
"""
T
tianshuo78520a 已提交
35 36 37


def find_all(srcstr, substr):
38
    """
39 40 41 42 43 44
    to find all desired substring in the source string
     and return their starting indices as a list

    Args:
        srcstr(str): the parent string
        substr(str): substr
45

46
    Returns:
47
        list: a list of the indices of the substrings
48
              found
49
    """
T
tianshuo78520a 已提交
50 51 52 53 54 55 56 57 58
    indices = []
    gotone = srcstr.find(substr)
    while (gotone != -1):
        indices.append(gotone)
        gotone = srcstr.find(substr, gotone + 1)
    return indices


def check_indent(cdline):
59
    """
60
    to check the indent of a given code line
61

62 63
    to get the number of starting blank chars,
    e.t. blankspaces and \t
64 65

    \t will be interpreted as 4 single blankspaces,
66
    e.t. '\t'='    '
67

68 69 70 71
    Args:
        cdline(str) : a single line of code from the source file

    Returns:
72
        int : the indent of the number of interpreted
73
             blankspaces
74
    """
T
tianshuo78520a 已提交
75 76 77 78 79 80 81 82 83 84 85
    indent = 0
    for c in cdline:
        if c == '\t':
            indent += 4
        elif c == ' ':
            indent += 1
        if c != ' ' and c != '\t':
            break
    return indent


86 87 88
# srccom: raw comments in the source,including ''' and original indent
def sampcd_extract_and_run(srccom, name, htype="def", hname=""):
    """
89 90 91 92 93 94 95 96 97
    Extract and run sample codes from source comment and
    the result will be returned.

    Args:
        srccom(str): the source comment of some API whose
                     example codes will be extracted and run.
        name(str): the name of the API.
        htype(str): the type of hint banners, def/class/method.
        hname(str): the name of the hint  banners , e.t. def hname.
98

99
    Returns:
100
        result: True or False
101 102
    """

103 104
    result = True

105 106
    def sampcd_header_print(name, sampcd, htype, hname):
        """
107
        print hint banner headers.
108

109 110 111 112 113 114
        Args:
            name(str): the name of the API.
            sampcd(str): sample code string
            htype(str): the type of hint banners, def/class/method.
            hname(str): the name of the hint  banners , e.t. def hname.
            flushed.
115 116 117
        """
        print_header(htype, hname)
        print("Sample code ", str(y), " extracted for ", name, "   :")
118
        print(sampcd)
119 120 121
        print("----example code check----\n")
        print("executing sample code .....")
        print("execution result:")
122 123

    sampcd_begins = find_all(srccom, " code-block:: python")
124
    if len(sampcd_begins) == 0:
125
        print_header(htype, hname)
126 127 128 129
        '''
        detect sample codes using >>> to format
        and consider this situation as wrong
        '''
130 131 132
        if srccom.find("Examples:") != -1:
            print("----example code check----\n")
            if srccom.find(">>>") != -1:
T
tianshuo78520a 已提交
133
                print(
134 135
                    "Deprecated sample code style:\n\n    Examples:\n\n        >>>codeline\n        >>>codeline\n\n\n ",
                    "Please use '.. code-block:: python' to ",
T
tianshuo78520a 已提交
136
                    "format sample code.\n")
137
                result = False
T
tianshuo78520a 已提交
138
        else:
139 140
            print("Error: No sample code!\n")
            result = False
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161

    for y in range(1, len(sampcd_begins) + 1):
        sampcd_begin = sampcd_begins[y - 1]
        sampcd = srccom[sampcd_begin + len(" code-block:: python") + 1:]
        sampcd = sampcd.split("\n")
        # remove starting empty lines
        while sampcd[0].replace(' ', '').replace('\t', '') == '':
            sampcd.pop(0)

        # the minimum indent, which is the indent of the first
        # non-empty line
        min_indent = check_indent(sampcd[0])
        sampcd_to_write = []
        for i in range(0, len(sampcd)):
            cdline = sampcd[i]
            # handle empty lines or those only with spaces/tabs
            if cdline.strip() == '':
                continue
            this_indent = check_indent(cdline)
            if this_indent < min_indent:
                break
Z
zhangchunle 已提交
162
            else:
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
                cdline = cdline.replace('\t', '    ')
                sampcd_to_write.append(cdline[min_indent:])

        sampcd = '\n'.join(sampcd_to_write)
        if sys.argv[1] == "cpu":
            sampcd = '\nimport os\n' + 'os.environ["CUDA_VISIBLE_DEVICES"] = ""\n' + sampcd
        if sys.argv[1] == "gpu":
            sampcd = '\nimport os\n' + 'os.environ["CUDA_VISIBLE_DEVICES"] = "0"\n' + sampcd
        sampcd += '\nprint(' + '\"' + name + ' sample code is executed successfully!\")'

        if len(sampcd_begins) > 1:
            tfname = name + "_example_" + str(y) + ".py"
        else:
            tfname = name + "_example" + ".py"
        tempf = open("samplecode_temp/" + tfname, 'w')
        tempf.write(sampcd)
        tempf.close()
        if platform.python_version()[0] == "2":
            cmd = ["python", "samplecode_temp/" + tfname]
        elif platform.python_version()[0] == "3":
            cmd = ["python3", "samplecode_temp/" + tfname]
        else:
            print("Error: fail to parse python version!")
            result = False
            exit(1)

        subprc = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        output, error = subprc.communicate()
        msg = "".join(output.decode(encoding='utf-8'))
        err = "".join(error.decode(encoding='utf-8'))

        if subprc.returncode != 0:
            print("\nSample code error found in ", name, ":\n")
            sampcd_header_print(name, sampcd, htype, hname)
            print("subprocess return code: ", str(subprc.returncode))
            print("Error Raised from Sample Code ", name, " :\n")
            print(err)
            print(msg)
            result = False
        # msg is the returned code execution report
        #os.remove("samplecode_temp/" + tfname)
205

206
    return result
T
tianshuo78520a 已提交
207 208 209


def single_defcom_extract(start_from, srcls, is_class_begin=False):
210
    """
211 212
    to extract a def function/class/method comments body

213
    Args:
214 215 216 217 218 219
        start_from(int): the line num of "def" header
        srcls(list): the source file in lines
        is_class_begin(bool): whether the start_from is a beginning a class. \
        For a sole class body itself may end up with its method if it has no
        docstring. But the body of \
        a common def function can only be ended up by a none-indented def/class
220

221 222 223
    Returns:
        string : the extracted comment body, inclusive of its quote marks.

224
    """
225

T
tianshuo78520a 已提交
226
    i = start_from
227 228 229
    fcombody = ""  # def comment body
    comstart = -1  # the starting line index of comment mark "'''" or """"""
    # if it is not -1, it indicates the loop is in the comment body
230 231
    comstyle = 0  # comment mark style ,comments quoted with ''' is coded as 1
    # comments quoted with """ is coded as 2
T
tianshuo78520a 已提交
232 233
    for x in range(i + 1, len(srcls)):
        if is_class_begin:
234
            if srcls[x].replace('\t', '    ').startswith('    def '):
T
tianshuo78520a 已提交
235
                break
236
        if srcls[x].startswith('def ') or srcls[x].startswith('class '):
T
tianshuo78520a 已提交
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
            break
        else:
            if (comstart == -1 and srcls[x].replace(" ", '').replace(
                    "\t", '').replace("\n", '').startswith("\"\"\"")):
                comstart = x
                comstyle = 2
                continue
            if (comstyle == 2 and comstart != -1 and
                    srcls[x].replace(" ", '').replace("\t", '').replace(
                        "\n", '').startswith("\"\"\"")):
                break
            if (comstart == -1 and srcls[x].replace(" ", '').replace(
                    "\t", '').replace("\n", '').startswith("\'\'\'")):
                comstart = x
                comstyle = 1
                continue
            if (comstyle == 1 and comstart != -1 and
                    srcls[x].replace(" ", '').replace("\t", '').replace(
                        "\n", '').startswith("\'\'\'")):
                break
            if (comstart !=
258
                    -1):  # when the comments start, begin to add line to fcombody
T
tianshuo78520a 已提交
259 260 261 262
                fcombody += srcls[x]
    return fcombody


263 264 265
def print_header(htype, name):
    print(htype, " name:", name)
    print("-----------------------")
266

T
tianshuo78520a 已提交
267

268
def srccoms_extract(srcfile, wlist):
269
    """
270 271 272 273 274 275 276 277 278
    Given a source file ``srcfile``, this function will
    extract its API(doc comments) and run sample codes in the
    API.

    Args:
        srcfile(file): the source file
        wlist(list): white list

    Returns:
279
        result: True or False
280
    """
281

282
    process_result = True
T
tianshuo78520a 已提交
283
    srcc = srcfile.read()
284 285
    # 2. get defs and classes header line number
    # set file pointer to its beginning
T
tianshuo78520a 已提交
286
    srcfile.seek(0, 0)
287
    srcls = srcfile.readlines()  # source lines
288

289
    # 1. fetch__all__ list
T
tianshuo78520a 已提交
290
    allidx = srcc.find("__all__")
291 292 293 294 295 296
    srcfile_new = srcfile.name
    srcfile_new = srcfile_new.replace('.py', '')
    srcfile_list = srcfile_new.split('/')
    srcfile_str = ''
    for i in range(4, len(srcfile_list)):
        srcfile_str = srcfile_str + srcfile_list[i] + '.'
297
    if allidx != -1:
T
tianshuo78520a 已提交
298
        alllist = []
299 300
        # get all list for layers/ops.py
        if srcfile.name.find("ops.py") != -1:
T
tianshuo78520a 已提交
301
            for ai in range(0, len(srcls)):
302
                if srcls[ai].startswith("__all__"):
T
tianshuo78520a 已提交
303 304
                    lb = srcls[ai].find('[')
                    rb = srcls[ai].find(']')
305
                    if lb == -1:
T
tianshuo78520a 已提交
306 307 308 309
                        continue
                    allele = srcls[ai][lb + 1:rb].replace("'", '').replace(
                        " ", '').replace("\"", '')
                    alllist.append(allele)
310 311
            if '' in alllist:
                alllist.remove('')
T
tianshuo78520a 已提交
312 313 314 315 316 317 318 319 320 321 322 323
        else:
            alllist_b = allidx + len("__all__")
            allstr = srcc[alllist_b + srcc[alllist_b:].find("[") + 1:alllist_b +
                          srcc[alllist_b:].find("]")]
            allstr = allstr.replace("\n", '').replace(" ", '').replace(
                "'", '').replace("\"", '')
            alllist = allstr.split(',')
            if '' in alllist:
                alllist.remove('')
        api_alllist_count = len(alllist)
        api_count = 0
        handled = []
324 325
        # get src contents in layers/ops.py
        if srcfile.name.find("ops.py") != -1:
T
tianshuo78520a 已提交
326 327 328 329 330 331 332
            for i in range(0, len(srcls)):
                if srcls[i].find("__doc__") != -1:
                    opname = srcls[i][:srcls[i].find("__doc__") - 1]
                    if opname in wlist:
                        continue
                    comstart = i
                    for j in range(i, len(srcls)):
333
                        if srcls[j].find("\"\"\"") != -1:
T
tianshuo78520a 已提交
334 335 336 337
                            comstart = i
                    opcom = ""
                    for j in range(comstart + 1, len(srcls)):
                        opcom += srcls[j]
338
                        if srcls[j].find("\"\"\"") != -1:
T
tianshuo78520a 已提交
339 340
                            break
                    api_count += 1
341
                    handled.append(
342 343 344
                        opname)  # ops.py also has normal formatted functions
                    # use list 'handled'  to mark the functions have been handled here
                    # which will be ignored in the following step
T
tianshuo78520a 已提交
345
        for i in range(0, len(srcls)):
346
            if srcls[i].startswith(
347
                    'def '):  # a function header is detected in line i
T
tianshuo78520a 已提交
348
                f_header = srcls[i].replace(" ", '')
349
                fn = f_header[len('def'):f_header.find('(')]  # function name
350 351
                if "%s%s" % (srcfile_str, fn) not in methods:
                    continue
T
tianshuo78520a 已提交
352 353 354 355
                if fn in handled:
                    continue
                if fn in alllist:
                    api_count += 1
356
                    if fn in wlist or fn + "@" + srcfile.name in wlist:
T
tianshuo78520a 已提交
357 358
                        continue
                    fcombody = single_defcom_extract(i, srcls)
359
                    if fcombody == "":  # if no comment
360
                        print_header("def", fn)
361 362
                        print("WARNING: no comments in function ", fn,
                              ", but it deserves.")
T
tianshuo78520a 已提交
363 364
                        continue
                    else:
365 366
                        if not sampcd_extract_and_run(fcombody, fn, "def", fn):
                            process_result = False
367

T
tianshuo78520a 已提交
368 369
            if srcls[i].startswith('class '):
                c_header = srcls[i].replace(" ", '')
370
                cn = c_header[len('class'):c_header.find('(')]  # class name
371 372
                if '%s%s' % (srcfile_str, cn) not in methods:
                    continue
T
tianshuo78520a 已提交
373 374 375 376
                if cn in handled:
                    continue
                if cn in alllist:
                    api_count += 1
377
                    if cn in wlist or cn + "@" + srcfile.name in wlist:
T
tianshuo78520a 已提交
378
                        continue
379
                    # class comment
T
tianshuo78520a 已提交
380
                    classcom = single_defcom_extract(i, srcls, True)
381
                    if classcom != "":
382 383
                        if not sampcd_extract_and_run(classcom, cn, "class",
                                                      cn):
384

385
                            process_result = False
T
tianshuo78520a 已提交
386
                    else:
387 388 389
                        print("WARNING: no comments in class itself ", cn,
                              ", but it deserves.\n")
                    # handling methods in class bodies
T
tianshuo78520a 已提交
390 391
                    for x in range(
                            i + 1,
392
                            len(srcls)):  # from the next line of class header
T
tianshuo78520a 已提交
393 394 395 396
                        if (srcls[x].startswith('def ') or
                                srcls[x].startswith('class ')):
                            break
                        else:
397
                            # member method def header
398
                            srcls[x] = srcls[x].replace('\t', '    ')
T
tianshuo78520a 已提交
399
                            if (srcls[x].startswith(
400
                                    '    def ')):  # detect a mehtod header..
T
tianshuo78520a 已提交
401 402 403
                                thisl = srcls[x]
                                indent = len(thisl) - len(thisl.lstrip())
                                mn = thisl[indent + len('def '):thisl.find(
404 405
                                    '(')]  # method name
                                name = cn + "." + mn  # full name
406 407 408 409
                                if '%s%s' % (
                                        srcfile_str, name
                                ) not in methods:  # class method not in api.spec 
                                    continue
T
tianshuo78520a 已提交
410 411
                                if mn.startswith('_'):
                                    continue
412
                                if name in wlist or name + "@" + srcfile.name in wlist:
T
tianshuo78520a 已提交
413
                                    continue
414 415 416 417 418
                                thismethod = [thisl[indent:]
                                              ]  # method body lines
                                # get all the lines of a single method body
                                # into thismethod(list)
                                # and send it to single_defcom_extract
T
tianshuo78520a 已提交
419
                                for y in range(x + 1, len(srcls)):
420
                                    srcls[y] = srcls[y].replace('\t', '    ')
T
tianshuo78520a 已提交
421 422
                                    if (srcls[y].startswith('def ') or
                                            srcls[y].startswith('class ')):
423
                                        # end of method
T
tianshuo78520a 已提交
424
                                        break
425 426
                                    elif srcls[y].startswith('    def '):
                                        # end of method
T
tianshuo78520a 已提交
427 428 429 430 431
                                        break
                                    else:
                                        thismethod.append(srcls[y][indent:])
                                thismtdcom = single_defcom_extract(0,
                                                                   thismethod)
432
                                if thismtdcom != "":
433 434 435
                                    if not sampcd_extract_and_run(
                                            thismtdcom, name, "method", name):
                                        process_result = False
436

437
    return process_result
T
tianshuo78520a 已提交
438 439


440
def test(file_list):
441
    process_result = True
442
    for file in file_list:
443 444 445 446
        with open(file, 'r') as src:
            if not srccoms_extract(src, wlist):
                process_result = False
    return process_result
447 448


449
def get_filenames():
450
    '''
451
    this function will get the modules that pending for check.
452 453 454 455 456 457 458 459

    Returns:

        list: the modules pending for check .

    '''
    filenames = []
    global methods
460
    global whl_error
461
    methods = []
462
    whl_error = []
463 464
    get_incrementapi()
    API_spec = 'dev_pr_diff_api.spec'
465 466
    with open(API_spec) as f:
        for line in f.readlines():
467
            api = line.replace('\n', '')
468 469 470
            try:
                module = eval(api).__module__
            except AttributeError:
471
                whl_error.append(api)
472 473 474 475 476 477 478 479
                continue
            if len(module.split('.')) > 2:
                filename = '../python/'
                module_py = '%s.py' % module.split('.')[-1]
                for i in range(0, len(module.split('.')) - 1):
                    filename = filename + '%s/' % module.split('.')[i]
                filename = filename + module_py
            else:
480
                filename = ''
481
                print("\nWARNING:----Exception in get api filename----\n")
482
                print("\n" + api + ' module is ' + module + "\n")
483 484 485
            if filename != '':
                if filename not in filenames:
                    filenames.append(filename)
486 487 488 489 490 491 492 493 494 495
            # get all methods
            method = ''
            if inspect.isclass(eval(api)):
                name = api.split('.')[-1]
            elif inspect.isfunction(eval(api)):
                name = api.split('.')[-1]
            elif inspect.ismethod(eval(api)):
                name = '%s.%s' % (api.split('.')[-2], api.split('.')[-1])
            else:
                name = ''
496
                print("\nWARNING:----Exception in get api methods----\n")
497 498 499 500 501 502 503
                print("\n" + line + "\n")
                print("\n" + api + ' method is None!!!' + "\n")
            for j in range(2, len(module.split('.'))):
                method = method + '%s.' % module.split('.')[j]
            method = method + name
            if method not in methods:
                methods.append(method)
504
    os.remove(API_spec)
505 506 507
    return filenames


508 509 510 511 512 513 514 515 516 517 518 519
def get_incrementapi():
    '''
    this function will get the apis that difference between API_DEV.spec and API_PR.spec.
    '''

    def get_api_md5(path):
        api_md5 = {}
        API_spec = '%s/%s' % (os.path.abspath(os.path.join(os.getcwd(), "..")),
                              path)
        with open(API_spec) as f:
            for line in f.readlines():
                api = line.split(' ', 1)[0]
520 521 522
                md5 = line.split("'document', ")[1].replace(')', '').replace(
                    '\n', '')
                api_md5[api] = md5
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537
        return api_md5

    dev_api = get_api_md5('paddle/fluid/API_DEV.spec')
    pr_api = get_api_md5('paddle/fluid/API_PR.spec')
    with open('dev_pr_diff_api.spec', 'w') as f:
        for key in pr_api:
            if key in dev_api:
                if dev_api[key] != pr_api[key]:
                    f.write(key)
                    f.write('\n')
            else:
                f.write(key)
                f.write('\n')


538 539 540
# only white on CPU
gpu_not_white = [
    "deformable_conv", "cuda_places", "CUDAPinnedPlace", "CUDAPlace",
541
    "cuda_profiler", 'DGCMomentumOptimizer'
542
]
543

Z
zhangchunle 已提交
544

545
def get_wlist():
Z
zhangchunle 已提交
546 547 548 549 550 551 552 553 554
    '''
    this function will get the white list of API.

    Returns:

        wlist: a list of API that should not trigger the example check .

    '''
    wlist = []
555
    with open("wlist.json", 'r') as load_f:
Z
zhangchunle 已提交
556 557 558 559 560 561
        load_dict = json.load(load_f)
        for key in load_dict:
            wlist = wlist + load_dict[key]
    return wlist


562
wlist = get_wlist()
563 564

if len(sys.argv) < 2:
565
    print("Error: inadequate number of arguments")
566 567 568 569 570
    print('''If you are going to run it on 
        "CPU: >>> python sampcd_processor.py cpu
        "GPU: >>> python sampcd_processor.py gpu
        ''')
    sys.exit("lack arguments")
T
tianshuo78520a 已提交
571
else:
572 573 574 575
    if sys.argv[1] == "gpu":
        for _gnw in gpu_not_white:
            wlist.remove(_gnw)
    elif sys.argv[1] != "cpu":
576 577
        print("Unrecognized argument:'", sys.argv[1], "' , 'cpu' or 'gpu' is ",
              "desired\n")
578
        sys.exit("Invalid arguments")
579 580
    print("API check -- Example Code")
    print("sample_test running under python", platform.python_version())
581 582
    if not os.path.isdir("./samplecode_temp"):
        os.mkdir("./samplecode_temp")
583
    cpus = multiprocessing.cpu_count()
584
    filenames = get_filenames()
585
    if len(filenames) == 0 and len(whl_error) == 0:
586 587 588 589 590
        print("-----API_PR.spec is the same as API_DEV.spec-----")
        exit(0)
    elif '../python/paddle/fluid/core_avx.py' in filenames:
        filenames.remove('../python/paddle/fluid/core_avx.py')
    print("API_PR is diff from API_DEV: %s" % filenames)
591
    one_part_filenum = int(math.ceil(len(filenames) / cpus))
592 593
    if one_part_filenum == 0:
        one_part_filenum = 1
594 595 596 597
    divided_file_list = [
        filenames[i:i + one_part_filenum]
        for i in range(0, len(filenames), one_part_filenum)
    ]
598

599 600
    po = multiprocessing.Pool()
    results = po.map_async(test, divided_file_list)
601 602
    po.close()
    po.join()
603

604
    result = results.get()
605

606
    # delete temp files
607 608 609 610 611
    for root, dirs, files in os.walk("./samplecode_temp"):
        for fntemp in files:
            os.remove("./samplecode_temp/" + fntemp)
    os.rmdir("./samplecode_temp")

612
    print("----------------End of the Check--------------------")
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
    if len(whl_error) != 0:
        print("%s is not in whl." % whl_error)
        print("")
        print("Please check the whl package and API_PR.spec!")
        print("You can follow these steps in order to generate API.spec:")
        print("1. cd ${paddle_path}, compile paddle;")
        print("2. pip install build/python/dist/(build whl package);")
        print(
            "3. run 'python tools/print_signatures.py paddle > paddle/fluid/API.spec'."
        )
        for temp in result:
            if not temp:
                print("")
                print("In addition, mistakes found in sample codes.")
                print("Please check sample codes.")
        print("----------------------------------------------------")
        exit(1)
    else:
        for temp in result:
            if not temp:
                print("Mistakes found in sample codes.")
                print("Please check sample codes.")
                exit(1)
636
    print("Sample code check is successful!")