sampcd_processor.py 15.1 KB
Newer Older
1
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
T
tianshuo78520a 已提交
14 15

import os
16
import sys
T
tianshuo78520a 已提交
17
import subprocess
18 19 20
import multiprocessing
import math
import platform
21
import inspect
Z
zhangchunle 已提交
22
import json
23 24 25 26
import argparse
import shutil
import re
import logging
27 28
"""
please make sure to run in the tools path
29 30
usage: python sample_test.py {cpu or gpu} 
    {cpu or gpu}: running in cpu version or gpu version
31 32

for example, you can run cpu version python2 testing like this:
33 34 35

    python sampcd_processor.py cpu 

36
"""
T
tianshuo78520a 已提交
37

38 39 40 41 42 43 44
logger = logging.getLogger()
if logger.handlers:
    console = logger.handlers[
        0]  # we assume the first handler is the one we want to configure
else:
    console = logging.StreamHandler()
    logger.addHandler(console)
45
console.setFormatter(logging.Formatter("%(message)s"))
46 47 48 49 50 51 52 53 54 55

RUN_ON_DEVICE = 'cpu'
GPU_ID = 0
methods = []
whl_error = []
API_DEV_SPEC_FN = 'paddle/fluid/API_DEV.spec'
API_PR_SPEC_FN = 'paddle/fluid/API_PR.spec'
API_DIFF_SPEC_FN = 'dev_pr_diff_api.spec'
SAMPLECODE_TEMPDIR = 'samplecode_temp'

T
tianshuo78520a 已提交
56 57

def find_all(srcstr, substr):
58
    """
59 60 61 62 63 64
    to find all desired substring in the source string
     and return their starting indices as a list

    Args:
        srcstr(str): the parent string
        substr(str): substr
65

66
    Returns:
67
        list: a list of the indices of the substrings
68
              found
69
    """
T
tianshuo78520a 已提交
70 71 72 73 74 75 76 77 78
    indices = []
    gotone = srcstr.find(substr)
    while (gotone != -1):
        indices.append(gotone)
        gotone = srcstr.find(substr, gotone + 1)
    return indices


def check_indent(cdline):
79
    """
80
    to check the indent of a given code line
81

82 83
    to get the number of starting blank chars,
    e.t. blankspaces and \t
84 85

    \t will be interpreted as 4 single blankspaces,
86
    e.t. '\t'='    '
87

88 89 90 91
    Args:
        cdline(str) : a single line of code from the source file

    Returns:
92
        int : the indent of the number of interpreted
93
             blankspaces
94
    """
T
tianshuo78520a 已提交
95 96 97 98 99 100 101 102 103 104 105
    indent = 0
    for c in cdline:
        if c == '\t':
            indent += 4
        elif c == ' ':
            indent += 1
        if c != ' ' and c != '\t':
            break
    return indent


106 107 108
def sampcd_extract_to_file(srccom, name, htype="def", hname=""):
    """
    Extract sample codes from __doc__, and write them to files.
109

110 111 112 113 114 115
    Args:
        srccom(str): the source comment of some API whose
                     example codes will be extracted and run.
        name(str): the name of the API.
        htype(str): the type of hint banners, def/class/method.
        hname(str): the name of the hint  banners , e.t. def hname.
116

117 118 119 120
    Returns:
        sample_code_filenames(list of str)
    """
    global GPU_ID, RUN_ON_DEVICE, SAMPLECODE_TEMPDIR
121
    CODE_BLOCK_INTERDUCTORY = "code-block:: python"
122

123
    sampcd_begins = find_all(srccom, CODE_BLOCK_INTERDUCTORY)
124
    if len(sampcd_begins) == 0:
125 126 127
        # detect sample codes using >>> to format and consider this situation as wrong
        print(htype, " name:", hname)
        print("-----------------------")
128 129 130
        if srccom.find("Examples:") != -1:
            print("----example code check----\n")
            if srccom.find(">>>") != -1:
T
tianshuo78520a 已提交
131
                print(
132 133
                    "Deprecated sample code style:\n\n    Examples:\n\n        >>>codeline\n        >>>codeline\n\n\n ",
                    "Please use '.. code-block:: python' to ",
T
tianshuo78520a 已提交
134
                    "format sample code.\n")
135
                return []
T
tianshuo78520a 已提交
136
        else:
137
            print("Error: No sample code!\n")
138 139
            return []
    sample_code_filenames = []
140 141
    for y in range(1, len(sampcd_begins) + 1):
        sampcd_begin = sampcd_begins[y - 1]
142
        sampcd = srccom[sampcd_begin + len(CODE_BLOCK_INTERDUCTORY) + 1:]
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
        sampcd = sampcd.split("\n")
        # remove starting empty lines
        while sampcd[0].replace(' ', '').replace('\t', '') == '':
            sampcd.pop(0)

        # the minimum indent, which is the indent of the first
        # non-empty line
        min_indent = check_indent(sampcd[0])
        sampcd_to_write = []
        for i in range(0, len(sampcd)):
            cdline = sampcd[i]
            # handle empty lines or those only with spaces/tabs
            if cdline.strip() == '':
                continue
            this_indent = check_indent(cdline)
            if this_indent < min_indent:
                break
Z
zhangchunle 已提交
160
            else:
161 162 163 164
                cdline = cdline.replace('\t', '    ')
                sampcd_to_write.append(cdline[min_indent:])

        sampcd = '\n'.join(sampcd_to_write)
165 166 167 168 169
        if RUN_ON_DEVICE == "cpu":
            sampcd = '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = ""\n' + sampcd
        if RUN_ON_DEVICE == "gpu":
            sampcd = '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = "{}"\n'.format(
                GPU_ID) + sampcd
170 171
        sampcd += '\nprint(' + '\"' + name + ' sample code is executed successfully!\")'

172 173 174 175
        tfname = os.path.join(SAMPLECODE_TEMPDIR, '{}_example{}'.format(
            name, '.py' if len(sampcd_begins) == 1 else '_{}.py'.format(y)))
        with open(tfname, 'w') as tempf:
            tempf.write(sampcd)
176 177 178 179
        sample_code_filenames.append(tfname)
    return sample_code_filenames


180 181 182 183 184 185 186 187 188 189 190 191
def execute_samplecode(tfname):
    """
    Execute a sample-code test.

    Args:
        tfname: the filename of the samplecode.
    
    Returns:
        result: success or not
        tfname: same as the input argument
        msg: the stdout output of the samplecode executing.
    """
192 193 194 195 196 197 198 199 200
    result = True
    msg = None
    if platform.python_version()[0] in ["2", "3"]:
        cmd = [sys.executable, tfname]
    else:
        print("Error: fail to parse python version!")
        result = False
        exit(1)

201 202 203 204 205 206 207 208
    # check required envisonment
    with open(tfname, 'r') as f:
        for line in f.readlines():
            if re.match(r'#\s*required\s*:\s*(distributed|gpu|skip)', line):
                result = True
                return result, tfname, '{} is skipped. cause: {}'.format(tfname,
                                                                         line)

209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
    logging.info('running %s', tfname)
    print("\n----example code check----")
    print("executing sample code .....", tfname)
    subprc = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, error = subprc.communicate()
    msg = "".join(output.decode(encoding='utf-8'))
    err = "".join(error.decode(encoding='utf-8'))

    if subprc.returncode != 0:
        print("Sample code error found in ", tfname, ":")
        print("-----------------------")
        print(open(tfname).read())
        print("-----------------------")
        print("subprocess return code: ", str(subprc.returncode))
        print("Error Raised from Sample Code ", tfname, " :")
        print(err)
        print(msg)
        print("----example code check failed----\n")
        logging.warning('%s error: %s', tfname, err)
        logging.warning('%s msg: %s', tfname, msg)
        result = False
    else:
        print("----example code check success----\n")

    # msg is the returned code execution report
    return result, tfname, msg
T
tianshuo78520a 已提交
236 237


238
def get_filenames():
239
    '''
240
    this function will get the sample code files that pending for check.
241 242 243

    Returns:

244
        dict: the sample code files pending for check .
245 246

    '''
247
    global methods  # write
248
    global whl_error
249
    import paddle
250
    whl_error = []
251
    get_incrementapi()
252 253
    all_sample_code_filenames = {}
    with open(API_DIFF_SPEC_FN) as f:
254
        for line in f.readlines():
255
            api = line.replace('\n', '')
256
            try:
257
                api_obj = eval(api)
258
            except AttributeError:
259
                whl_error.append(api)
260
                continue
261 262 263 264
            except SyntaxError:
                logger.warning('line:%s, api:%s', line, api)
                # paddle.Tensor.<lambda>
                continue
265 266 267 268 269 270
            if hasattr(api_obj, '__doc__') and api_obj.__doc__:
                sample_code_filenames = sampcd_extract_to_file(api_obj.__doc__,
                                                               api)
                for tfname in sample_code_filenames:
                    all_sample_code_filenames[tfname] = api
    return all_sample_code_filenames
271 272


273
def get_api_md5(path):
274 275 276 277 278 279 280 281 282
    """
    read the api spec file, and scratch the md5sum value of every api's docstring.

    Args:
        path: the api spec file. ATTENTION the path relative
    
    Returns:
        api_md5(dict): key is the api's real fullname, value is the md5sum.
    """
283 284 285
    api_md5 = {}
    API_spec = '%s/%s' % (os.path.abspath(os.path.join(os.getcwd(), "..")),
                          path)
286 287 288
    pat = re.compile(r'\((paddle[^,]+)\W*document\W*([0-9a-z]{32})')
    patArgSpec = re.compile(
        r'^(paddle[^,]+)\s+\(ArgSpec.*document\W*([0-9a-z]{32})')
289 290
    with open(API_spec) as f:
        for line in f.readlines():
291 292 293 294 295
            mo = pat.search(line)
            if not mo:
                mo = patArgSpec.search(line)
            if mo:
                api_md5[mo.group(1)] = mo.group(2)
296 297 298
    return api_md5


299 300 301 302
def get_incrementapi():
    '''
    this function will get the apis that difference between API_DEV.spec and API_PR.spec.
    '''
303 304 305 306
    global API_DEV_SPEC_FN, API_PR_SPEC_FN, API_DIFF_SPEC_FN  ## readonly
    dev_api = get_api_md5(API_DEV_SPEC_FN)
    pr_api = get_api_md5(API_PR_SPEC_FN)
    with open(API_DIFF_SPEC_FN, 'w') as f:
307 308 309
        for key in pr_api:
            if key in dev_api:
                if dev_api[key] != pr_api[key]:
310 311
                    logger.debug("%s in dev is %s, different from pr's %s", key,
                                 dev_api[key], pr_api[key])
312 313 314
                    f.write(key)
                    f.write('\n')
            else:
315
                logger.debug("%s is not in dev", key)
316 317 318 319
                f.write(key)
                f.write('\n')


320
def get_wlist(fn="wlist.json"):
Z
zhangchunle 已提交
321 322 323 324 325 326 327 328 329
    '''
    this function will get the white list of API.

    Returns:

        wlist: a list of API that should not trigger the example check .

    '''
    wlist = []
Z
zhangchunle 已提交
330
    wlist_file = []
331 332
    # only white on CPU
    gpu_not_white = []
333
    with open(fn, 'r') as load_f:
Z
zhangchunle 已提交
334 335
        load_dict = json.load(load_f)
        for key in load_dict:
336 337 338 339 340 341 342 343
            if key == 'wlist_dir':
                for item in load_dict[key]:
                    wlist_file.append(item["name"])
            elif key == "gpu_not_white":
                gpu_not_white = load_dict[key]
            elif key == "wlist_api":
                for item in load_dict[key]:
                    wlist.append(item["name"])
Z
zhangchunle 已提交
344 345
            else:
                wlist = wlist + load_dict[key]
346
    return wlist, wlist_file, gpu_not_white
Z
zhangchunle 已提交
347 348


349 350 351 352 353 354
arguments = [
    # flags, dest, type, default, help
    ['--gpu_id', 'gpu_id', int, 0, 'GPU device id to use [0]'],
    ['--logf', 'logf', str, None, 'file for logging'],
    ['--threads', 'threads', int, 0, 'sub processes number'],
]
355

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398

def parse_args():
    """
    Parse input arguments
    """
    global arguments
    parser = argparse.ArgumentParser(description='run Sample Code Test')
    # parser.add_argument('--cpu', dest='cpu_mode', action="store_true",
    #                     help='Use CPU mode (overrides --gpu)')
    # parser.add_argument('--gpu', dest='gpu_mode', action="store_true")
    parser.add_argument('--debug', dest='debug', action="store_true")
    parser.add_argument('mode', type=str, help='run on device', default='cpu')
    for item in arguments:
        parser.add_argument(
            item[0], dest=item[1], help=item[4], type=item[2], default=item[3])

    if len(sys.argv) == 1:
        args = parser.parse_args(['cpu'])
        return args
    #    parser.print_help()
    #    sys.exit(1)

    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    if args.debug:
        logger.setLevel(logging.DEBUG)
    if args.logf:
        logfHandler = logging.FileHandler(args.logf)
        logfHandler.setFormatter(
            logging.Formatter(
                "%(asctime)s - %(funcName)s:%(lineno)d - %(levelname)s - %(message)s"
            ))
        logger.addHandler(logfHandler)

    wlist, wlist_file, gpu_not_white = get_wlist()

    if args.mode == "gpu":
        GPU_ID = args.gpu_id
        logger.info("using GPU_ID %d", GPU_ID)
399 400
        for _gnw in gpu_not_white:
            wlist.remove(_gnw)
401 402 403
    elif args.mode != "cpu":
        logger.error("Unrecognized argument:%s, 'cpu' or 'gpu' is desired.",
                     args.mode)
404
        sys.exit("Invalid arguments")
405 406 407 408 409 410 411 412 413 414 415 416
    RUN_ON_DEVICE = args.mode
    logger.info("API check -- Example Code")
    logger.info("sample_test running under python %s",
                platform.python_version())

    if os.path.exists(SAMPLECODE_TEMPDIR):
        if not os.path.isdir(SAMPLECODE_TEMPDIR):
            os.remove(SAMPLECODE_TEMPDIR)
            os.mkdir(SAMPLECODE_TEMPDIR)
    else:
        os.mkdir(SAMPLECODE_TEMPDIR)

417
    filenames = get_filenames()
418
    if len(filenames) == 0 and len(whl_error) == 0:
419
        logger.info("-----API_PR.spec is the same as API_DEV.spec-----")
420
        exit(0)
421 422 423 424 425 426
    logger.info("API_PR is diff from API_DEV: %s", filenames)

    threads = multiprocessing.cpu_count()
    if args.threads:
        threads = args.threads
    po = multiprocessing.Pool(threads)
427
    results = po.map_async(execute_samplecode, filenames.keys())
428 429
    po.close()
    po.join()
430

431
    result = results.get()
432

433
    # delete temp files
434 435
    if not args.debug:
        shutil.rmtree(SAMPLECODE_TEMPDIR)
436

437
    logger.info("----------------End of the Check--------------------")
438
    if len(whl_error) != 0:
439 440 441 442 443 444 445
        logger.info("%s is not in whl.", whl_error)
        logger.info("")
        logger.info("Please check the whl package and API_PR.spec!")
        logger.info("You can follow these steps in order to generate API.spec:")
        logger.info("1. cd ${paddle_path}, compile paddle;")
        logger.info("2. pip install build/python/dist/(build whl package);")
        logger.info(
446 447 448
            "3. run 'python tools/print_signatures.py paddle > paddle/fluid/API.spec'."
        )
        for temp in result:
449 450 451 452 453
            if not temp[0]:
                logger.info("In addition, mistakes found in sample codes: %s",
                            temp[1])
                logger.info("error_methods: %s", str(temp[2]))
        logger.info("----------------------------------------------------")
454 455
        exit(1)
    else:
456
        has_error = False
457
        for temp in result:
458 459 460 461 462 463 464 465 466 467
            if not temp[0]:
                logger.info("In addition, mistakes found in sample codes: %s",
                            temp[1])
                logger.info("error_methods: %s", str(temp[2]))
                has_error = True
        if has_error:
            logger.info("Mistakes found in sample codes.")
            logger.info("Please check sample codes.")
            exit(1)
    logger.info("Sample code check is successful!")