sampcd_processor.py 22.1 KB
Newer Older
1
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
T
tianshuo78520a 已提交
14 15

import os
16
import sys
T
tianshuo78520a 已提交
17
import subprocess
18 19 20
import multiprocessing
import math
import platform
21 22 23
import inspect
import paddle
import paddle.fluid
Z
zhangchunle 已提交
24
import json
25 26
"""
please make sure to run in the tools path
27
usage: python sample_test.py {arg1} 
28 29 30
arg1: the first arg defined running in gpu version or cpu version

for example, you can run cpu version python2 testing like this:
31 32 33

    python sampcd_processor.py cpu 

34
"""
T
tianshuo78520a 已提交
35 36 37


def find_all(srcstr, substr):
38
    """
39 40 41 42 43 44
    to find all desired substring in the source string
     and return their starting indices as a list

    Args:
        srcstr(str): the parent string
        substr(str): substr
45

46
    Returns:
47
        list: a list of the indices of the substrings
48
              found
49
    """
T
tianshuo78520a 已提交
50 51 52 53 54 55 56 57 58
    indices = []
    gotone = srcstr.find(substr)
    while (gotone != -1):
        indices.append(gotone)
        gotone = srcstr.find(substr, gotone + 1)
    return indices


def check_indent(cdline):
59
    """
60
    to check the indent of a given code line
61

62 63
    to get the number of starting blank chars,
    e.t. blankspaces and \t
64 65

    \t will be interpreted as 4 single blankspaces,
66
    e.t. '\t'='    '
67

68 69 70 71
    Args:
        cdline(str) : a single line of code from the source file

    Returns:
72
        int : the indent of the number of interpreted
73
             blankspaces
74
    """
T
tianshuo78520a 已提交
75 76 77 78 79 80 81 82 83 84 85
    indent = 0
    for c in cdline:
        if c == '\t':
            indent += 4
        elif c == ' ':
            indent += 1
        if c != ' ' and c != '\t':
            break
    return indent


86 87 88
# srccom: raw comments in the source,including ''' and original indent
def sampcd_extract_and_run(srccom, name, htype="def", hname=""):
    """
89 90 91 92 93 94 95 96 97
    Extract and run sample codes from source comment and
    the result will be returned.

    Args:
        srccom(str): the source comment of some API whose
                     example codes will be extracted and run.
        name(str): the name of the API.
        htype(str): the type of hint banners, def/class/method.
        hname(str): the name of the hint  banners , e.t. def hname.
98

99
    Returns:
100
        result: True or False
101 102
    """

103 104
    result = True

105 106
    def sampcd_header_print(name, sampcd, htype, hname):
        """
107
        print hint banner headers.
108

109 110 111 112 113 114
        Args:
            name(str): the name of the API.
            sampcd(str): sample code string
            htype(str): the type of hint banners, def/class/method.
            hname(str): the name of the hint  banners , e.t. def hname.
            flushed.
115 116 117
        """
        print_header(htype, hname)
        print("Sample code ", str(y), " extracted for ", name, "   :")
118
        print(sampcd)
119 120 121
        print("----example code check----\n")
        print("executing sample code .....")
        print("execution result:")
122 123

    sampcd_begins = find_all(srccom, " code-block:: python")
124 125
    if len(sampcd_begins) == 0:
        print_header(htype, hname)
126 127 128 129
        '''
        detect sample codes using >>> to format
        and consider this situation as wrong
        '''
130 131 132
        if srccom.find("Examples:") != -1:
            print("----example code check----\n")
            if srccom.find(">>>") != -1:
T
tianshuo78520a 已提交
133
                print(
134 135
                    "Deprecated sample code style:\n\n    Examples:\n\n        >>>codeline\n        >>>codeline\n\n\n ",
                    "Please use '.. code-block:: python' to ",
T
tianshuo78520a 已提交
136
                    "format sample code.\n")
137
                result = False
T
tianshuo78520a 已提交
138
        else:
139 140
            print("Error: No sample code!\n")
            result = False
T
tianshuo78520a 已提交
141 142 143

    for y in range(1, len(sampcd_begins) + 1):
        sampcd_begin = sampcd_begins[y - 1]
144
        sampcd = srccom[sampcd_begin + len(" code-block:: python") + 1:]
T
tianshuo78520a 已提交
145
        sampcd = sampcd.split("\n")
146
        # remove starting empty lines
T
tianshuo78520a 已提交
147 148
        while sampcd[0].replace(' ', '').replace('\t', '') == '':
            sampcd.pop(0)
149

150 151
        # the minimum indent, which is the indent of the first
        # non-empty line
T
tianshuo78520a 已提交
152 153 154 155
        min_indent = check_indent(sampcd[0])
        sampcd_to_write = []
        for i in range(0, len(sampcd)):
            cdline = sampcd[i]
156
            # handle empty lines or those only with spaces/tabs
T
tianshuo78520a 已提交
157 158 159
            if cdline.strip() == '':
                continue
            this_indent = check_indent(cdline)
160
            if this_indent < min_indent:
T
tianshuo78520a 已提交
161 162 163 164
                break
            else:
                cdline = cdline.replace('\t', '    ')
                sampcd_to_write.append(cdline[min_indent:])
165

T
tianshuo78520a 已提交
166
        sampcd = '\n'.join(sampcd_to_write)
167 168 169 170
        if sys.argv[1] == "cpu":
            sampcd = '\nimport os\n' + 'os.environ["CUDA_VISIBLE_DEVICES"] = ""\n' + sampcd
        if sys.argv[1] == "gpu":
            sampcd = '\nimport os\n' + 'os.environ["CUDA_VISIBLE_DEVICES"] = "0"\n' + sampcd
171
        sampcd += '\nprint(' + '\"' + name + ' sample code is executed successfully!\")'
T
tianshuo78520a 已提交
172

173
        if len(sampcd_begins) > 1:
T
tianshuo78520a 已提交
174 175 176 177 178 179
            tfname = name + "_example_" + str(y) + ".py"
        else:
            tfname = name + "_example" + ".py"
        tempf = open("samplecode_temp/" + tfname, 'w')
        tempf.write(sampcd)
        tempf.close()
180 181 182 183 184
        if platform.python_version()[0] == "2":
            cmd = ["python", "samplecode_temp/" + tfname]
        elif platform.python_version()[0] == "3":
            cmd = ["python3", "samplecode_temp/" + tfname]
        else:
185 186
            print("Error: fail to parse python version!")
            result = False
187
            exit(1)
188

T
tianshuo78520a 已提交
189 190
        subprc = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
191
        output, error = subprc.communicate()
192 193 194 195 196 197 198 199 200 201
        msg = "".join(output.decode(encoding='utf-8'))
        err = "".join(error.decode(encoding='utf-8'))

        if subprc.returncode != 0:
            print("\nSample code error found in ", name, ":\n")
            sampcd_header_print(name, sampcd, htype, hname)
            print("subprocess return code: ", str(subprc.returncode))
            print("Error Raised from Sample Code ", name, " :\n")
            print(err)
            print(msg)
202
            result = False
203
        # msg is the returned code execution report
204 205
        #os.remove("samplecode_temp/" + tfname)

206
    return result
T
tianshuo78520a 已提交
207 208 209


def single_defcom_extract(start_from, srcls, is_class_begin=False):
210
    """
211 212
    to extract a def function/class/method comments body

213
    Args:
214 215 216 217 218 219
        start_from(int): the line num of "def" header
        srcls(list): the source file in lines
        is_class_begin(bool): whether the start_from is a beginning a class. \
        For a sole class body itself may end up with its method if it has no
        docstring. But the body of \
        a common def function can only be ended up by a none-indented def/class
220

221 222 223
    Returns:
        string : the extracted comment body, inclusive of its quote marks.

224
    """
225

T
tianshuo78520a 已提交
226
    i = start_from
227 228 229
    fcombody = ""  # def comment body
    comstart = -1  # the starting line index of comment mark "'''" or """"""
    # if it is not -1, it indicates the loop is in the comment body
230 231
    comstyle = 0  # comment mark style ,comments quoted with ''' is coded as 1
    # comments quoted with """ is coded as 2
T
tianshuo78520a 已提交
232 233
    for x in range(i + 1, len(srcls)):
        if is_class_begin:
234
            if srcls[x].replace('\t', '    ').startswith('    def '):
T
tianshuo78520a 已提交
235
                break
236
        if srcls[x].startswith('def ') or srcls[x].startswith('class '):
T
tianshuo78520a 已提交
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
            break
        else:
            if (comstart == -1 and srcls[x].replace(" ", '').replace(
                    "\t", '').replace("\n", '').startswith("\"\"\"")):
                comstart = x
                comstyle = 2
                continue
            if (comstyle == 2 and comstart != -1 and
                    srcls[x].replace(" ", '').replace("\t", '').replace(
                        "\n", '').startswith("\"\"\"")):
                break
            if (comstart == -1 and srcls[x].replace(" ", '').replace(
                    "\t", '').replace("\n", '').startswith("\'\'\'")):
                comstart = x
                comstyle = 1
                continue
            if (comstyle == 1 and comstart != -1 and
                    srcls[x].replace(" ", '').replace("\t", '').replace(
                        "\n", '').startswith("\'\'\'")):
                break
            if (comstart !=
258
                    -1):  # when the comments start, begin to add line to fcombody
T
tianshuo78520a 已提交
259 260 261 262
                fcombody += srcls[x]
    return fcombody


263 264 265
def print_header(htype, name):
    print(htype, " name:", name)
    print("-----------------------")
266

T
tianshuo78520a 已提交
267

268
def srccoms_extract(srcfile, wlist):
269
    """
270 271 272 273 274 275 276 277 278
    Given a source file ``srcfile``, this function will
    extract its API(doc comments) and run sample codes in the
    API.

    Args:
        srcfile(file): the source file
        wlist(list): white list

    Returns:
279
        result: True or False
280
    """
281

282
    process_result = True
T
tianshuo78520a 已提交
283
    srcc = srcfile.read()
284 285
    # 2. get defs and classes header line number
    # set file pointer to its beginning
T
tianshuo78520a 已提交
286
    srcfile.seek(0, 0)
287
    srcls = srcfile.readlines()  # source lines
288

289
    # 1. fetch__all__ list
T
tianshuo78520a 已提交
290
    allidx = srcc.find("__all__")
291 292 293 294 295 296
    srcfile_new = srcfile.name
    srcfile_new = srcfile_new.replace('.py', '')
    srcfile_list = srcfile_new.split('/')
    srcfile_str = ''
    for i in range(4, len(srcfile_list)):
        srcfile_str = srcfile_str + srcfile_list[i] + '.'
297
    if allidx != -1:
T
tianshuo78520a 已提交
298
        alllist = []
299 300
        # get all list for layers/ops.py
        if srcfile.name.find("ops.py") != -1:
T
tianshuo78520a 已提交
301
            for ai in range(0, len(srcls)):
302
                if srcls[ai].startswith("__all__"):
T
tianshuo78520a 已提交
303 304
                    lb = srcls[ai].find('[')
                    rb = srcls[ai].find(']')
305
                    if lb == -1:
T
tianshuo78520a 已提交
306 307 308 309
                        continue
                    allele = srcls[ai][lb + 1:rb].replace("'", '').replace(
                        " ", '').replace("\"", '')
                    alllist.append(allele)
310 311
            if '' in alllist:
                alllist.remove('')
T
tianshuo78520a 已提交
312 313 314 315 316 317 318 319 320 321 322 323
        else:
            alllist_b = allidx + len("__all__")
            allstr = srcc[alllist_b + srcc[alllist_b:].find("[") + 1:alllist_b +
                          srcc[alllist_b:].find("]")]
            allstr = allstr.replace("\n", '').replace(" ", '').replace(
                "'", '').replace("\"", '')
            alllist = allstr.split(',')
            if '' in alllist:
                alllist.remove('')
        api_alllist_count = len(alllist)
        api_count = 0
        handled = []
324 325
        # get src contents in layers/ops.py
        if srcfile.name.find("ops.py") != -1:
T
tianshuo78520a 已提交
326 327 328 329 330 331 332
            for i in range(0, len(srcls)):
                if srcls[i].find("__doc__") != -1:
                    opname = srcls[i][:srcls[i].find("__doc__") - 1]
                    if opname in wlist:
                        continue
                    comstart = i
                    for j in range(i, len(srcls)):
333
                        if srcls[j].find("\"\"\"") != -1:
T
tianshuo78520a 已提交
334 335 336 337
                            comstart = i
                    opcom = ""
                    for j in range(comstart + 1, len(srcls)):
                        opcom += srcls[j]
338
                        if srcls[j].find("\"\"\"") != -1:
T
tianshuo78520a 已提交
339 340
                            break
                    api_count += 1
341
                    handled.append(
342 343 344
                        opname)  # ops.py also has normal formatted functions
                    # use list 'handled'  to mark the functions have been handled here
                    # which will be ignored in the following step
T
tianshuo78520a 已提交
345
        for i in range(0, len(srcls)):
346
            if srcls[i].startswith(
347
                    'def '):  # a function header is detected in line i
T
tianshuo78520a 已提交
348
                f_header = srcls[i].replace(" ", '')
349
                fn = f_header[len('def'):f_header.find('(')]  # function name
350 351
                if "%s%s" % (srcfile_str, fn) not in methods:
                    continue
T
tianshuo78520a 已提交
352 353 354 355
                if fn in handled:
                    continue
                if fn in alllist:
                    api_count += 1
356
                    if fn in wlist or fn + "@" + srcfile.name in wlist:
T
tianshuo78520a 已提交
357 358
                        continue
                    fcombody = single_defcom_extract(i, srcls)
359 360 361 362
                    if fcombody == "":  # if no comment
                        print_header("def", fn)
                        print("WARNING: no comments in function ", fn,
                              ", but it deserves.")
T
tianshuo78520a 已提交
363 364
                        continue
                    else:
365 366
                        if not sampcd_extract_and_run(fcombody, fn, "def", fn):
                            process_result = False
367

T
tianshuo78520a 已提交
368 369
            if srcls[i].startswith('class '):
                c_header = srcls[i].replace(" ", '')
370
                cn = c_header[len('class'):c_header.find('(')]  # class name
371 372
                if '%s%s' % (srcfile_str, cn) not in methods:
                    continue
T
tianshuo78520a 已提交
373 374 375 376
                if cn in handled:
                    continue
                if cn in alllist:
                    api_count += 1
377
                    if cn in wlist or cn + "@" + srcfile.name in wlist:
T
tianshuo78520a 已提交
378
                        continue
379
                    # class comment
T
tianshuo78520a 已提交
380
                    classcom = single_defcom_extract(i, srcls, True)
381
                    if classcom != "":
382 383
                        if not sampcd_extract_and_run(classcom, cn, "class",
                                                      cn):
384

385
                            process_result = False
T
tianshuo78520a 已提交
386
                    else:
387 388 389
                        print("WARNING: no comments in class itself ", cn,
                              ", but it deserves.\n")
                    # handling methods in class bodies
T
tianshuo78520a 已提交
390 391
                    for x in range(
                            i + 1,
392
                            len(srcls)):  # from the next line of class header
T
tianshuo78520a 已提交
393 394 395 396
                        if (srcls[x].startswith('def ') or
                                srcls[x].startswith('class ')):
                            break
                        else:
397
                            # member method def header
398
                            srcls[x] = srcls[x].replace('\t', '    ')
T
tianshuo78520a 已提交
399
                            if (srcls[x].startswith(
400
                                    '    def ')):  # detect a mehtod header..
T
tianshuo78520a 已提交
401 402 403
                                thisl = srcls[x]
                                indent = len(thisl) - len(thisl.lstrip())
                                mn = thisl[indent + len('def '):thisl.find(
404 405
                                    '(')]  # method name
                                name = cn + "." + mn  # full name
406 407 408 409
                                if '%s%s' % (
                                        srcfile_str, name
                                ) not in methods:  # class method not in api.spec 
                                    continue
T
tianshuo78520a 已提交
410 411
                                if mn.startswith('_'):
                                    continue
412
                                if name in wlist or name + "@" + srcfile.name in wlist:
T
tianshuo78520a 已提交
413
                                    continue
414 415 416 417 418
                                thismethod = [thisl[indent:]
                                              ]  # method body lines
                                # get all the lines of a single method body
                                # into thismethod(list)
                                # and send it to single_defcom_extract
T
tianshuo78520a 已提交
419
                                for y in range(x + 1, len(srcls)):
420
                                    srcls[y] = srcls[y].replace('\t', '    ')
T
tianshuo78520a 已提交
421 422
                                    if (srcls[y].startswith('def ') or
                                            srcls[y].startswith('class ')):
423
                                        # end of method
T
tianshuo78520a 已提交
424
                                        break
425 426
                                    elif srcls[y].startswith('    def '):
                                        # end of method
T
tianshuo78520a 已提交
427 428 429 430 431
                                        break
                                    else:
                                        thismethod.append(srcls[y][indent:])
                                thismtdcom = single_defcom_extract(0,
                                                                   thismethod)
432
                                if thismtdcom != "":
433 434 435
                                    if not sampcd_extract_and_run(
                                            thismtdcom, name, "method", name):
                                        process_result = False
436

437
    return process_result
T
tianshuo78520a 已提交
438 439


440
def test(file_list):
441
    process_result = True
442
    for file in file_list:
443 444 445 446
        with open(file, 'r') as src:
            if not srccoms_extract(src, wlist):
                process_result = False
    return process_result
447 448


449
def get_filenames():
450
    '''
451
    this function will get the modules that pending for check.
452 453 454 455 456 457 458 459 460

    Returns:

        list: the modules pending for check .

    '''
    filenames = []
    global methods
    methods = []
461 462
    get_incrementapi()
    API_spec = 'dev_pr_diff_api.spec'
463 464
    with open(API_spec) as f:
        for line in f.readlines():
465
            api = line.replace('\n', '')
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
            try:
                module = eval(api).__module__
            except AttributeError:
                continue
            if len(module.split('.')) > 2:
                filename = '../python/'
                module_py = '%s.py' % module.split('.')[-1]
                for i in range(0, len(module.split('.')) - 1):
                    filename = filename + '%s/' % module.split('.')[i]
                filename = filename + module_py
            else:
                print("\n----Exception in get api filename----\n")
                print("\n" + api + 'module is ' + module + "\n")
            if filename not in filenames:
                filenames.append(filename)
            # get all methods
            method = ''
            if inspect.isclass(eval(api)):
                name = api.split('.')[-1]
            elif inspect.isfunction(eval(api)):
                name = api.split('.')[-1]
            elif inspect.ismethod(eval(api)):
                name = '%s.%s' % (api.split('.')[-2], api.split('.')[-1])
            else:
                name = ''
                print("\n----Exception in get api methods----\n")
                print("\n" + line + "\n")
                print("\n" + api + ' method is None!!!' + "\n")
            for j in range(2, len(module.split('.'))):
                method = method + '%s.' % module.split('.')[j]
            method = method + name
            if method not in methods:
                methods.append(method)
499
    os.remove(API_spec)
500 501 502
    return filenames


503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
def get_incrementapi():
    '''
    this function will get the apis that difference between API_DEV.spec and API_PR.spec.
    '''

    def get_api_md5(path):
        api_md5 = {}
        API_spec = '%s/%s' % (os.path.abspath(os.path.join(os.getcwd(), "..")),
                              path)
        with open(API_spec) as f:
            for line in f.readlines():
                api = line.split(' ', 1)[0]
                md5 = line.split("'document', ")[1].replace(')', '').replace(
                    '\n', '')
                api_md5[api] = md5
        return api_md5

    dev_api = get_api_md5('paddle/fluid/API_DEV.spec')
    pr_api = get_api_md5('paddle/fluid/API_PR.spec')
    with open('dev_pr_diff_api.spec', 'w') as f:
        for key in pr_api:
            if key in dev_api:
                if dev_api[key] != pr_api[key]:
                    f.write(key)
                    f.write('\n')
            else:
                f.write(key)
                f.write('\n')


533 534 535
# only white on CPU
gpu_not_white = [
    "deformable_conv", "cuda_places", "CUDAPinnedPlace", "CUDAPlace",
536
    "cuda_profiler", 'DGCMomentumOptimizer'
537
]
538

Z
zhangchunle 已提交
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557

def get_wlist():
    '''
    this function will get the white list of API.

    Returns:

        wlist: a list of API that should not trigger the example check .

    '''
    wlist = []
    with open("wlist.json", 'r') as load_f:
        load_dict = json.load(load_f)
        for key in load_dict:
            wlist = wlist + load_dict[key]
    return wlist


wlist = get_wlist()
558 559

if len(sys.argv) < 2:
560
    print("Error: inadequate number of arguments")
561 562 563 564 565
    print('''If you are going to run it on 
        "CPU: >>> python sampcd_processor.py cpu
        "GPU: >>> python sampcd_processor.py gpu
        ''')
    sys.exit("lack arguments")
T
tianshuo78520a 已提交
566
else:
567 568 569 570
    if sys.argv[1] == "gpu":
        for _gnw in gpu_not_white:
            wlist.remove(_gnw)
    elif sys.argv[1] != "cpu":
571 572
        print("Unrecognized argument:'", sys.argv[1], "' , 'cpu' or 'gpu' is ",
              "desired\n")
573
        sys.exit("Invalid arguments")
574 575
    print("API check -- Example Code")
    print("sample_test running under python", platform.python_version())
576 577
    if not os.path.isdir("./samplecode_temp"):
        os.mkdir("./samplecode_temp")
578
    cpus = multiprocessing.cpu_count()
579 580 581 582 583 584 585
    filenames = get_filenames()
    if len(filenames) == 0:
        print("-----API_PR.spec is the same as API_DEV.spec-----")
        exit(0)
    elif '../python/paddle/fluid/core_avx.py' in filenames:
        filenames.remove('../python/paddle/fluid/core_avx.py')
    print("API_PR is diff from API_DEV: %s" % filenames)
586
    one_part_filenum = int(math.ceil(len(filenames) / cpus))
587 588
    if one_part_filenum == 0:
        one_part_filenum = 1
589 590 591 592
    divided_file_list = [
        filenames[i:i + one_part_filenum]
        for i in range(0, len(filenames), one_part_filenum)
    ]
593

594 595
    po = multiprocessing.Pool()
    results = po.map_async(test, divided_file_list)
596 597
    po.close()
    po.join()
598

599
    result = results.get()
600

601
    # delete temp files
602 603 604 605 606
    for root, dirs, files in os.walk("./samplecode_temp"):
        for fntemp in files:
            os.remove("./samplecode_temp/" + fntemp)
    os.rmdir("./samplecode_temp")

607
    print("----------------End of the Check--------------------")
608 609 610 611 612
    for temp in result:
        if not temp:
            print("Mistakes found in sample codes")
            exit(1)
    print("Sample code check is successful!")