sampcd_processor.py 18.0 KB
Newer Older
1
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14 15
"""
please make sure to run in the tools path
16
usage: python sample_test.py {cpu or gpu}
17 18
    {cpu or gpu}: running in cpu version or gpu version

19
for example, you can run cpu version testing like this:
20

21
    python sampcd_processor.py cpu
T
tianshuo78520a 已提交
22

23
"""
24
import logging
25
import multiprocessing
26
import os
27
import platform
28
import re
29 30 31
import shutil
import subprocess
import sys
32
import time
T
tianshuo78520a 已提交
33

34 35 36 37 38 39 40 41 42 43 44
from sampcd_processor_utils import ENV_KEY_TEST_CAPACITY  # noqa: F401
from sampcd_processor_utils import (
    API_DIFF_SPEC_FN,
    extract_code_blocks_from_docstr,
    get_full_api_from_pr_spec,
    get_incrementapi,
    parse_args,
    run_doctest,
)
from sampcd_processor_xdoctest import Xdoctester

45 46 47
logger = logging.getLogger()
if logger.handlers:
    console = logger.handlers[
48 49
        0
    ]  # we assume the first handler is the one we want to configure
50
else:
51
    console = logging.StreamHandler(stream=sys.stderr)
52
    logger.addHandler(console)
53
console.setFormatter(logging.Formatter("%(message)s"))
54 55

RUN_ON_DEVICE = 'cpu'
56
SAMPLE_CODE_TEST_CAPACITY = set()
57 58 59
GPU_ID = 0
whl_error = []
SAMPLECODE_TEMPDIR = 'samplecode_temp'
60 61 62 63 64 65 66 67
ENV_KEY_CODES_FRONTEND = 'CODES_INSERTED_INTO_FRONTEND'
SUMMARY_INFO = {
    'success': [],
    'failed': [],
    'skiptest': [],
    'nocodes': [],
    # ... required not-match
}
68

T
tianshuo78520a 已提交
69 70

def find_all(srcstr, substr):
71
    """
72 73 74 75 76 77
    to find all desired substring in the source string
     and return their starting indices as a list

    Args:
        srcstr(str): the parent string
        substr(str): substr
78

79
    Returns:
80
        list: a list of the indices of the substrings
81
              found
82
    """
T
tianshuo78520a 已提交
83 84
    indices = []
    gotone = srcstr.find(substr)
85
    while gotone != -1:
T
tianshuo78520a 已提交
86 87 88 89 90
        indices.append(gotone)
        gotone = srcstr.find(substr, gotone + 1)
    return indices


91 92 93 94 95 96 97 98
def find_last_future_line_end(cbstr):
    """
    find the last `__future__` line.

    Args:
        docstr(str): docstring
    Return:
        index of the line end or None.
99
    """
100 101 102 103 104 105 106 107 108 109 110 111
    pat = re.compile('__future__.*\n')
    lastmo = None
    it = re.finditer(pat, cbstr)
    while True:
        try:
            lastmo = next(it)
        except StopIteration:
            break
    if lastmo:
        return lastmo.end()
    else:
        return None
112 113


114 115 116 117 118 119 120 121 122 123 124 125 126
def get_test_capacity():
    """
    collect capacities and set to SAMPLE_CODE_TEST_CAPACITY
    """
    global SAMPLE_CODE_TEST_CAPACITY  # write
    global ENV_KEY_TEST_CAPACITY, RUN_ON_DEVICE  # readonly
    if ENV_KEY_TEST_CAPACITY in os.environ:
        for r in os.environ[ENV_KEY_TEST_CAPACITY].split(','):
            rr = r.strip().lower()
            if r:
                SAMPLE_CODE_TEST_CAPACITY.add(rr)
    if 'cpu' not in SAMPLE_CODE_TEST_CAPACITY:
        SAMPLE_CODE_TEST_CAPACITY.add('cpu')
127

128 129 130 131 132
    if RUN_ON_DEVICE:
        SAMPLE_CODE_TEST_CAPACITY.add(RUN_ON_DEVICE)


def is_required_match(requirestr, cbtitle='not-specified'):
133
    """
134
    search the required instruction in the code-block, and check it match the current running environment.
135

136 137 138 139 140 141 142 143 144 145 146
    environment values of equipped: cpu, gpu, xpu, distributed, skip
    the 'skip' is the special flag to skip the test, so is_required_match will return False directly.

    Args:
        requirestr(str): the required string.
        cbtitle(str): the title of the code-block.
    returns:
        True - yes, matched
        False - not match
        None - skipped  # trick
    """
147
    global SAMPLE_CODE_TEST_CAPACITY, RUN_ON_DEVICE  # readonly
148
    requires = {'cpu'}
149 150 151 152 153
    if requirestr:
        for r in requirestr.split(','):
            rr = r.strip().lower()
            if rr:
                requires.add(rr)
154 155
    else:
        requires.add(RUN_ON_DEVICE)
156 157 158 159
    if 'skip' in requires or 'skiptest' in requires:
        logger.info('%s: skipped', cbtitle)
        return None

160
    if all(
161 162 163
        k in SAMPLE_CODE_TEST_CAPACITY
        for k in requires
        if k not in ['skip', 'skiptest']
164
    ):
165 166
        return True

167 168 169 170 171 172
    logger.info(
        '%s: the equipments [%s] not match the required [%s].',
        cbtitle,
        ','.join(SAMPLE_CODE_TEST_CAPACITY),
        ','.join(requires),
    )
173 174 175 176 177 178 179 180 181 182
    return False


def insert_codes_into_codeblock(codeblock, apiname='not-specified'):
    """
    insert some codes in the frontend and backend into the code-block.
    """
    global ENV_KEY_CODES_FRONTEND, GPU_ID, RUN_ON_DEVICE  # readonly
    inserted_codes_f = ''
    inserted_codes_b = ''
183 184 185 186
    if (
        ENV_KEY_CODES_FRONTEND in os.environ
        and os.environ[ENV_KEY_CODES_FRONTEND]
    ):
187 188 189
        inserted_codes_f = os.environ[ENV_KEY_CODES_FRONTEND]
    else:
        cpu_str = '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = ""\n'
190 191 192 193 194
        gpu_str = (
            '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = "{}"\n'.format(
                GPU_ID
            )
        )
195 196
        if 'required' in codeblock and codeblock['required']:
            if codeblock['required'] == 'cpu':
197 198 199 200 201 202 203 204 205
                inserted_codes_f = cpu_str
            elif codeblock['required'] == 'gpu':
                inserted_codes_f = gpu_str
        else:
            if RUN_ON_DEVICE == "cpu":
                inserted_codes_f = cpu_str
            elif RUN_ON_DEVICE == "gpu":
                inserted_codes_f = gpu_str
    inserted_codes_b = '\nprint("{}\'s sample code (name:{}, id:{}) is executed successfully!")'.format(
206 207
        apiname, codeblock['name'], codeblock['id']
    )
208 209 210 211

    cb = codeblock['codes']
    last_future_line_end = find_last_future_line_end(cb)
    if last_future_line_end:
212 213 214 215 216 217
        return (
            cb[:last_future_line_end]
            + inserted_codes_f
            + cb[last_future_line_end:]
            + inserted_codes_b
        )
218 219
    else:
        return inserted_codes_f + cb + inserted_codes_b
T
tianshuo78520a 已提交
220 221


222 223 224 225 226 227 228 229 230
def is_ps_wrapped_codeblock(codeblock):
    """If the codeblock is wrapped by PS1(>>> ),
    we skip test and use xdoctest instead.
    """
    codes = codeblock['codes']
    match_obj = re.search(r"\n>>>\s?", "\n" + codes)
    return match_obj is not None


231 232 233
def sampcd_extract_to_file(srccom, name, htype="def", hname=""):
    """
    Extract sample codes from __doc__, and write them to files.
234

235 236 237 238 239 240
    Args:
        srccom(str): the source comment of some API whose
                     example codes will be extracted and run.
        name(str): the name of the API.
        htype(str): the type of hint banners, def/class/method.
        hname(str): the name of the hint  banners , e.t. def hname.
241

242 243 244
    Returns:
        sample_code_filenames(list of str)
    """
245 246
    global GPU_ID, RUN_ON_DEVICE, SAMPLECODE_TEMPDIR  # readonly
    global SUMMARY_INFO  # update
247

248 249 250
    codeblocks = extract_code_blocks_from_docstr(srccom)
    if len(codeblocks) == 0:
        SUMMARY_INFO['nocodes'].append(name)
251
        # detect sample codes using >>> to format and consider this situation as wrong
252 253
        logger.info(htype + " name:" + name)
        logger.info("-----------------------")
254
        if srccom.find("Examples:") != -1:
255
            logger.info("----example code check----")
256
            if srccom.find(">>>") != -1:
257 258
                logger.warning(
                    r"""Deprecated sample code style:
259 260 261 262
    Examples:
        >>>codeline
        >>>codeline

263 264
Please use '.. code-block:: python' to format the sample code."""
                )
265
                return []
T
tianshuo78520a 已提交
266
        else:
267 268 269
            logger.error(
                "Error: No sample code found! Please check if the API comment contais string 'Examples:' correctly"
            )
270
            return []
271

272
    sample_code_filenames = []
273
    for y, cb in enumerate(codeblocks):
274 275 276 277 278 279 280 281 282
        if is_ps_wrapped_codeblock(cb):
            SUMMARY_INFO['skiptest'].append("{}-{}".format(name, cb['id']))
            logger.info(
                '{}\' code block (name:{}, id:{}) is wrapped by PS1(>>> ), which will be tested by xdoctest.'.format(
                    name, cb['name'], cb['id']
                )
            )
            continue

283 284 285 286 287 288
        matched = is_required_match(cb['required'], name)
        # matched has three states:
        # True - please execute it;
        # None - no sample code found;
        # False - it need other special equipment or environment.
        # so, the following conditional statements are intentionally arranged.
289
        if matched:
290
            tfname = os.path.join(
291 292
                SAMPLECODE_TEMPDIR,
                '{}_example{}'.format(
293
                    name,
294
                    '.py' if len(codeblocks) == 1 else f'_{y + 1}.py',
295 296
                ),
            )
297 298 299 300 301
            with open(tfname, 'w') as tempf:
                sampcd = insert_codes_into_codeblock(cb, name)
                tempf.write(sampcd)
            sample_code_filenames.append(tfname)
        elif matched is None:
302 303 304 305 306
            logger.info(
                '{}\' code block (name:{}, id:{}) is skipped.'.format(
                    name, cb['name'], cb['id']
                )
            )
307
            SUMMARY_INFO['skiptest'].append("{}-{}".format(name, cb['id']))
308
        elif not matched:
309
            logger.info(
310 311 312 313 314 315 316 317
                '{}\' code block (name:{}, id:{}) required({}) not match capacity({}).'.format(
                    name,
                    cb['name'],
                    cb['id'],
                    cb['required'],
                    SAMPLE_CODE_TEST_CAPACITY,
                )
            )
318 319 320 321
            if cb['required'] not in SUMMARY_INFO:
                SUMMARY_INFO[cb['required']] = []
            SUMMARY_INFO[cb['required']].append("{}-{}".format(name, cb['id']))

322 323 324
    return sample_code_filenames


325 326
def execute_samplecode(tfname):
    """
327
    Execute a sample-code test
328 329

    Args:
330
        tfname: the filename of the sample code
331

332 333 334
    Returns:
        result: success or not
        tfname: same as the input argument
335 336
        msg: the stdout output of the sample code executing
        time: time consumed by sample code
337
    """
338 339
    result = True
    msg = None
T
tianshuo78520a 已提交
340
    if platform.python_version()[0] in ["3"]:
341 342
        cmd = [sys.executable, tfname]
    else:
343
        logger.error("Error: fail to parse python version!")
344
        result = False
345
        sys.exit(1)
346

347 348 349
    logger.info("----example code check----")
    logger.info("executing sample code: %s", tfname)
    start_time = time.time()
350 351 352
    subprc = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )
353 354 355
    output, error = subprc.communicate()
    msg = "".join(output.decode(encoding='utf-8'))
    err = "".join(error.decode(encoding='utf-8'))
356
    end_time = time.time()
357 358

    if subprc.returncode != 0:
359
        with open(tfname, 'r') as f:
360 361
            logger.warning(
                """Sample code error found in %s:
362 363 364 365 366 367 368
-----------------------
%s
-----------------------
subprocess return code: %d
Error Raised from Sample Code:
stderr: %s
stdout: %s
369 370 371 372 373 374 375
""",
                tfname,
                f.read(),
                subprc.returncode,
                err,
                msg,
            )
376
        logger.info("----example code check failed----")
377 378
        result = False
    else:
379
        logger.info("----example code check success----")
380 381

    # msg is the returned code execution report
382
    return result, tfname, msg, end_time - start_time
T
tianshuo78520a 已提交
383 384


385
def get_filenames(full_test=False):
386
    '''
387
    this function will get the sample code files that pending for check.
388

389 390 391
    Args:
        full_test: the full apis or the increment

392 393
    Returns:

394
        dict: the sample code files pending for check .
395 396

    '''
397
    global whl_error
398
    import paddle  # noqa: F401
399
    import paddle.static.quantization  # noqa: F401
400

401
    whl_error = []
402
    if full_test:
403
        get_full_api_from_pr_spec()
404 405
    else:
        get_incrementapi()
406 407
    all_sample_code_filenames = {}
    with open(API_DIFF_SPEC_FN) as f:
408
        for line in f.readlines():
409
            api = line.replace('\n', '')
410
            try:
411
                api_obj = eval(api)
412
            except AttributeError:
413
                whl_error.append(api)
414
                continue
415 416 417 418
            except SyntaxError:
                logger.warning('line:%s, api:%s', line, api)
                # paddle.Tensor.<lambda>
                continue
419
            if hasattr(api_obj, '__doc__') and api_obj.__doc__:
420
                sample_code_filenames = sampcd_extract_to_file(
421 422
                    api_obj.__doc__, api
                )
423 424 425
                for tfname in sample_code_filenames:
                    all_sample_code_filenames[tfname] = api
    return all_sample_code_filenames
426 427


428 429 430 431
if __name__ == '__main__':
    args = parse_args()
    if args.debug:
        logger.setLevel(logging.DEBUG)
432 433
    else:
        logger.setLevel(logging.INFO)
434 435 436 437 438
    if args.logf:
        logfHandler = logging.FileHandler(args.logf)
        logfHandler.setFormatter(
            logging.Formatter(
                "%(asctime)s - %(funcName)s:%(lineno)d - %(levelname)s - %(message)s"
439 440
            )
        )
441 442 443 444 445 446
        logger.addHandler(logfHandler)

    if args.mode == "gpu":
        GPU_ID = args.gpu_id
        logger.info("using GPU_ID %d", GPU_ID)
    elif args.mode != "cpu":
447 448 449
        logger.error(
            "Unrecognized argument:%s, 'cpu' or 'gpu' is desired.", args.mode
        )
450
        sys.exit("Invalid arguments")
451
    RUN_ON_DEVICE = args.mode
452
    get_test_capacity()
453
    logger.info("API check -- Example Code")
454 455 456
    logger.info(
        "sample_test running under python %s", platform.python_version()
    )
457 458 459 460 461 462 463 464

    if os.path.exists(SAMPLECODE_TEMPDIR):
        if not os.path.isdir(SAMPLECODE_TEMPDIR):
            os.remove(SAMPLECODE_TEMPDIR)
            os.mkdir(SAMPLECODE_TEMPDIR)
    else:
        os.mkdir(SAMPLECODE_TEMPDIR)

465
    filenames = get_filenames(args.full_test)
466
    if len(filenames) == 0 and len(whl_error) == 0:
467
        logger.info("-----API_PR.spec is the same as API_DEV.spec-----")
468 469 470 471 472 473
        # not exit if no filenames, we should do xdoctest later.
        # sys.exit(0)

        # delete temp files
        if not args.debug:
            shutil.rmtree(SAMPLECODE_TEMPDIR)
474

475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
    else:
        logger.info("API_PR is diff from API_DEV: %s", filenames)

        threads = multiprocessing.cpu_count()
        if args.threads:
            threads = args.threads
        po = multiprocessing.Pool(threads)
        results = po.map_async(execute_samplecode, filenames.keys())
        po.close()
        po.join()

        result = results.get()

        # delete temp files
        if not args.debug:
            shutil.rmtree(SAMPLECODE_TEMPDIR)

        stdout_handler = logging.StreamHandler(stream=sys.stdout)
        logger.addHandler(stdout_handler)
        logger.info("----------------End of the Check--------------------")
        if len(whl_error) != 0:
            logger.info("%s is not in whl.", whl_error)
            logger.info("")
            logger.info("Please check the whl package and API_PR.spec!")
499
            logger.info(
500
                "You can follow these steps in order to generate API.spec:"
501
            )
502 503
            logger.info("1. cd ${paddle_path}, compile paddle;")
            logger.info("2. pip install build/python/dist/(build whl package);")
504
            logger.info(
505
                "3. run 'python tools/print_signatures.py paddle > paddle/fluid/API.spec'."
506
            )
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
            for temp in result:
                if not temp[0]:
                    logger.info(
                        "In addition, mistakes found in sample codes: %s",
                        temp[1],
                    )
            logger.info("----------------------------------------------------")
            sys.exit(1)
        else:
            timeovered_test = {}
            for temp in result:
                if not temp[0]:
                    logger.info(
                        "In addition, mistakes found in sample codes: %s",
                        temp[1],
                    )
                    SUMMARY_INFO['failed'].append(temp[1])
                else:
                    SUMMARY_INFO['success'].append(temp[1])
                if temp[3] > 10:
                    timeovered_test[temp[1]] = temp[3]

            if len(timeovered_test):
530
                logger.info(
531
                    "%d sample codes ran time over 10s", len(timeovered_test)
532
                )
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
                if args.debug:
                    for k, v in timeovered_test.items():
                        logger.info(f'{k} - {v}s')
            if len(SUMMARY_INFO['success']):
                logger.info(
                    "%d sample codes ran success", len(SUMMARY_INFO['success'])
                )
            for k, v in SUMMARY_INFO.items():
                if k not in ['success', 'failed', 'skiptest', 'nocodes']:
                    logger.info(
                        "%d sample codes required not match for %s", len(v), k
                    )
            if len(SUMMARY_INFO['skiptest']):
                logger.info(
                    "%d sample codes skipped", len(SUMMARY_INFO['skiptest'])
                )
                if args.debug:
                    logger.info('\n'.join(SUMMARY_INFO['skiptest']))
            if len(SUMMARY_INFO['nocodes']):
                logger.info(
                    "%d apis don't have sample codes",
                    len(SUMMARY_INFO['nocodes']),
                )
                if args.debug:
                    logger.info('\n'.join(SUMMARY_INFO['nocodes']))
            if len(SUMMARY_INFO['failed']):
                logger.info(
                    "%d sample codes ran failed", len(SUMMARY_INFO['failed'])
                )
                logger.info('\n'.join(SUMMARY_INFO['failed']))
                logger.info(
                    "Mistakes found in sample codes. Please recheck the sample codes."
                )
                sys.exit(1)
567

568
        logger.info("Sample code check is successful!")
569

570 571
    # run xdoctest
    run_doctest(args, doctester=Xdoctester(debug=args.debug))