[Change] 不再兼容旧格式示例代码 (#56573)

* [Change] forbid legacy sample code style * [Change] remove legacy sampcd_processor.py * [Fix] fix command * [Change] check plain style sample code * [Fix] fix logger * [Change] remove arguments * [Change] remove cb_required * [Change] change parse args * [Change] restore creation.py

[Change] 不再兼容旧格式示例代码 (#56573)
* [Change] forbid legacy sample code style * [Change] remove legacy sampcd_processor.py * [Fix] fix command * [Change] check plain style sample code * [Fix] fix logger * [Change] remove arguments * [Change] remove cb_required * [Change] change parse args * [Change] restore creation.py
92080699 · megemini · GitHub · e0841332 · 92080699 · 92080699
6 changed file
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -3140,10 +3140,10 @@ function exec_samplecode_test() {

    cd ${PADDLE_ROOT}/tools
    if [ "$1" = "cpu" ] ; then
-        python sampcd_processor.py --debug cpu; example_error=$?
+        python sampcd_processor.py --debug --mode cpu; example_error=$?
    elif [ "$1" = "gpu" ] ; then
        SAMPLE_CODE_EXEC_THREADS=${SAMPLE_CODE_EXEC_THREADS:-2}
-        python sampcd_processor.py --threads=${SAMPLE_CODE_EXEC_THREADS} --debug gpu; example_error=$?
+        python sampcd_processor.py --threads=${SAMPLE_CODE_EXEC_THREADS} --debug --mode gpu; example_error=$?
    fi
    if [ "$example_error" != "0" ];then
      echo "Code instance execution failed" >&2

--- a/tools/sampcd_processor.py
+++ b/tools/sampcd_processor.py
--- a/tools/sampcd_processor_utils.py
+++ b/tools/sampcd_processor_utils.py
@@ -265,7 +265,7 @@ def extract_code_blocks_from_docstr(docstr, google_style=True):
    Return:
        code_blocks: A list of code-blocks, indent removed.
                     element {'name': the code-block's name, 'id': sequence id.
-                              'codes': codes, 'required': 'gpu', 'in_examples': bool, code block in `Examples` or not,}
+                              'codes': codes, 'in_examples': bool, code block in `Examples` or not,}
    """
    code_blocks = []

@@ -290,7 +290,6 @@ def extract_code_blocks_from_docstr(docstr, google_style=True):

    cb_start_pat = re.compile(r"code-block::\s*python")
    cb_param_pat = re.compile(r"^\s*:(\w+):\s*(\S*)\s*$")
-    cb_required_pat = re.compile(r"^\s*#\s*require[s|d]\s*:\s*(\S+)\s*$")

    cb_info = {}
    cb_info['cb_started'] = False
@@ -298,23 +297,20 @@ def extract_code_blocks_from_docstr(docstr, google_style=True):
    cb_info['cb_cur_indent'] = -1
    cb_info['cb_cur_name'] = None
    cb_info['cb_cur_seq_id'] = 0
-    cb_info['cb_required'] = None

    def _cb_started():
-        # nonlocal cb_started, cb_cur_name, cb_required, cb_cur_seq_id
+        # nonlocal cb_started, cb_cur_name, cb_cur_seq_id
        cb_info['cb_started'] = True
        cb_info['cb_cur_seq_id'] += 1
        cb_info['cb_cur_name'] = None
-        cb_info['cb_required'] = None

    def _append_code_block(in_examples):
-        # nonlocal code_blocks, cb_cur, cb_cur_name, cb_cur_seq_id, cb_required
+        # nonlocal code_blocks, cb_cur, cb_cur_name, cb_cur_seq_id
        code_blocks.append(
            {
                'codes': inspect.cleandoc("\n" + "\n".join(cb_info['cb_cur'])),
                'name': cb_info['cb_cur_name'],
                'id': cb_info['cb_cur_seq_id'],
-                'required': cb_info['cb_required'],
                'in_examples': in_examples,
            }
        )
@@ -339,10 +335,6 @@ def extract_code_blocks_from_docstr(docstr, google_style=True):
                    if mo_p.group(1) == 'name':
                        cb_info['cb_cur_name'] = mo_p.group(2)
                    continue
-                # read the required directive
-                mo_r = cb_required_pat.match(linecont)
-                if mo_r:
-                    cb_info['cb_required'] = mo_r.group(1)
                # docstring end
                if lineno == lastlineindex:
                    mo = re.search(r"\S", linecont)
@@ -479,6 +471,49 @@ def get_docstring(full_test=False):
    return docstrings_to_test, whl_error


+def check_old_style(docstrings_to_test: typing.Dict[str, str]):
+    old_style_apis = []
+    for api_name, raw_docstring in docstrings_to_test.items():
+        for codeblock in extract_code_blocks_from_docstr(
+            raw_docstring, google_style=False
+        ):
+            old_style = True
+
+            for line in codeblock['codes'].splitlines():
+                if line.strip().startswith('>>>'):
+                    old_style = False
+                    break
+
+            if old_style:
+                codeblock_name = codeblock['name']
+                codeblock_id = codeblock['id']
+
+                docstring_name = '{}:{}'.format(
+                    api_name, codeblock_name or codeblock_id
+                )
+
+                old_style_apis.append(docstring_name)
+
+    if old_style_apis:
+        stdout_handler = logging.StreamHandler(stream=sys.stdout)
+        logger.addHandler(stdout_handler)
+
+        logger.info(
+            ">>> %d apis use plain sample code style.",
+            len(old_style_apis),
+        )
+        logger.info('=======================')
+        logger.info('\n'.join(old_style_apis))
+        logger.info('=======================')
+        logger.info("Check Failed!")
+        logger.info("DEPRECATION: Please do not use plain sample code style.")
+        logger.info(
+            "For more information: https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/dev_guides/style_guide_and_references/code_example_writing_specification_cn.html "
+        )
+        logger.info("----------------End of the Check--------------------")
+        sys.exit(1)
+
+
 def exec_gen_doc():
    result = True
    cmd = ["bash", "document_preview.sh"]
@@ -568,27 +603,30 @@ def get_test_results(


 def run_doctest(args, doctester: DocTester):
-    logger.info("----------------Codeblock Check Start--------------------")
-
    # init logger
    init_logger(debug=args.debug, log_file=args.logf)

-    logger.info("Check test mode ...")
+    logger.info("----------------Codeblock Check Start--------------------")
+
+    logger.info(">>> Check test mode ...")
    run_on_device = check_test_mode(mode=args.mode, gpu_id=args.gpu_id)

-    logger.info("Get test capacity ...")
+    logger.info(">>> Get test capacity ...")
    sample_code_test_capacity = get_test_capacity(run_on_device)

-    logger.info("Get docstring from api ...")
+    logger.info(">>> Get docstring from api ...")
    docstrings_to_test, whl_error = get_docstring(full_test=args.full_test)

-    logger.info("Prepare doctester ...")
+    logger.info(">>> Checking plain sample code style before Paddle 2.5 ...")
+    check_old_style(docstrings_to_test)
+
+    logger.info(">>> Prepare doctester ...")
    doctester.prepare(sample_code_test_capacity)

-    logger.info("Running doctester ...")
+    logger.info(">>> Running doctester ...")
    test_results = get_test_results(doctester, docstrings_to_test)

-    logger.info("Print summary ...")
+    logger.info(">>> Print summary ...")
    doctester.print_summary(test_results, whl_error)

    if args.mode == "cpu":
@@ -596,37 +634,39 @@ def run_doctest(args, doctester: DocTester):
        exec_gen_doc()


-arguments = [
-    # flags, dest, type, default, help
-    ['--gpu_id', 'gpu_id', int, 0, 'GPU device id to use [0]'],
-    ['--logf', 'logf', str, None, 'file for logging'],
-    ['--threads', 'threads', int, 0, 'sub processes number'],
-]
-
-
 def parse_args():
    """
    Parse input arguments
    """
-    global arguments
    parser = argparse.ArgumentParser(description='run Sample Code Test')
    parser.add_argument('--debug', dest='debug', action="store_true")
    parser.add_argument('--full-test', dest='full_test', action="store_true")
-    parser.add_argument('mode', type=str, help='run on device', default='cpu')
+    parser.add_argument(
+        '--mode', dest='mode', type=str, default='cpu', help='run on device'
+    )
    parser.add_argument(
        '--build-doc',
        dest='build_doc',
        action='store_true',
        help='build doc if need.',
    )
-    for item in arguments:
-        parser.add_argument(
-            item[0], dest=item[1], help=item[4], type=item[2], default=item[3]
-        )
-
-    if len(sys.argv) == 1:
-        args = parser.parse_args(['cpu'])
-        return args
+    parser.add_argument(
+        '--gpu_id',
+        dest='gpu_id',
+        type=int,
+        default=0,
+        help='GPU device id to use [0]',
+    )
+    parser.add_argument(
+        '--logf', dest='logf', type=str, default=None, help='file for logging'
+    )
+    parser.add_argument(
+        '--threads',
+        dest='threads',
+        type=int,
+        default=0,
+        help='sub processes number',
+    )

    args = parser.parse_args()
    return args
--- a/tools/sampcd_processor_xdoctest.py
+++ b/tools/sampcd_processor_xdoctest.py
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-please make sure to run in the tools path
-usage: python sampcd_processor_xdoctest.py {cpu or gpu}
-    {cpu or gpu}: running in cpu version or gpu version
-
-for example, you can run cpu version testing like this:
-
-    python sampcd_processor_xdoctest.py cpu
-
-"""
-
-import functools
-import logging
-import multiprocessing
-import os
-import platform
-import queue
-import re
-import sys
-import threading
-import time
-import typing
-
-import xdoctest
-from sampcd_processor_utils import (
-    TEST_TIMEOUT,
-    DocTester,
-    TestResult,
-    logger,
-    parse_args,
-    run_doctest,
-)
-
-XDOCTEST_CONFIG = {
-    "global_exec": r"\n".join(
-        [
-            "import paddle",
-            "paddle.device.set_device('cpu')",
-            "paddle.set_default_dtype('float32')",
-            "paddle.disable_static()",
-        ]
-    ),
-    "default_runtime_state": {"IGNORE_WHITESPACE": True},
-}
-
-
-def _patch_global_state(debug, verbose):
-    # patch xdoctest global_state
-    from xdoctest import global_state
-
-    _debug_xdoctest = debug and verbose > 2
-    global_state.DEBUG = _debug_xdoctest
-    global_state.DEBUG_PARSER = global_state.DEBUG_PARSER and _debug_xdoctest
-    global_state.DEBUG_CORE = global_state.DEBUG_CORE and _debug_xdoctest
-    global_state.DEBUG_RUNNER = global_state.DEBUG_RUNNER and _debug_xdoctest
-    global_state.DEBUG_DOCTEST = global_state.DEBUG_DOCTEST and _debug_xdoctest
-
-
-def _patch_tensor_place():
-    from xdoctest import checker
-
-    pattern_tensor = re.compile(
-        r"""
-        (Tensor\(.*?place=)     # Tensor start
-        (.*?)                   # Place=(XXX)
-        (\,.*?\))
-        """,
-        re.X | re.S,
-    )
-
-    _check_output = checker.check_output
-
-    def check_output(got, want, runstate=None):
-        if not want:  # nocover
-            return True
-
-        return _check_output(
-            got=pattern_tensor.sub(r'\1Place(cpu)\3', got),
-            want=pattern_tensor.sub(r'\1Place(cpu)\3', want),
-            runstate=runstate,
-        )
-
-    checker.check_output = check_output
-
-
-def _patch_float_precision(digits):
-    from xdoctest import checker
-
-    pattern_number = re.compile(
-        r"""
-        (?:
-            (?<=[\s*\[\(\'\"\:])                        # number starts
-            (?:                                         # int/float or complex-real
-                (?:
-                    [+-]?
-                    (?:
-                        (?: \d*\.\d+) | (?: \d+\.?)     # int/float
-                    )
-                )
-                (?:[Ee][+-]?\d+)?
-            )
-            (?:                                         # complex-imag
-                (?:
-                    (?:
-                        [+-]?
-                        (?:
-                            (?: \d*\.\d+) | (?: \d+\.?)
-                        )
-                    )
-                    (?:[Ee][+-]?\d+)?
-                )
-            (?:[Jj])
-            )?
-        )
-        """,
-        re.X | re.S,
-    )
-
-    _check_output = checker.check_output
-
-    def _sub_number(match_obj, digits):
-        match_str = match_obj.group()
-
-        if 'j' in match_str or 'J' in match_str:
-            try:
-                match_num = complex(match_str)
-            except ValueError:
-                return match_str
-
-            return (
-                str(
-                    complex(
-                        round(match_num.real, digits),
-                        round(match_num.imag, digits),
-                    )
-                )
-                .strip('(')
-                .strip(')')
-            )
-        else:
-            try:
-                return str(round(float(match_str), digits))
-            except ValueError:
-                return match_str
-
-    sub_number = functools.partial(_sub_number, digits=digits)
-
-    def check_output(got, want, runstate=None):
-        if not want:  # nocover
-            return True
-
-        return _check_output(
-            got=pattern_number.sub(sub_number, got),
-            want=pattern_number.sub(sub_number, want),
-            runstate=runstate,
-        )
-
-    checker.check_output = check_output
-
-
-class Directive:
-    """Base class of global direvtives just for `xdoctest`."""
-
-    pattern: typing.Pattern
-
-    def parse_directive(self, docstring: str) -> typing.Tuple[str, typing.Any]:
-        pass
-
-
-class TimeoutDirective(Directive):
-    pattern = re.compile(
-        r"""
-        (?:
-            (?:
-                \s*\>{3}\s*\#\s*x?doctest\:\s*
-            )
-            (?P<op>[\+\-])
-            (?:
-                TIMEOUT
-            )
-            \(
-                (?P<time>\d+)
-            \)
-            (?:
-                \s*?
-            )
-        )
-        """,
-        re.X | re.S,
-    )
-
-    def __init__(self, timeout):
-        self._timeout = timeout
-
-    def parse_directive(self, docstring):
-        match_obj = self.pattern.search(docstring)
-        if match_obj is not None:
-            op_time = match_obj.group('time')
-            match_start = match_obj.start()
-            match_end = match_obj.end()
-
-            return (
-                (docstring[:match_start] + '\n' + docstring[match_end:]),
-                float(op_time),
-            )
-
-        return docstring, float(self._timeout)
-
-
-class Xdoctester(DocTester):
-    """A Xdoctest doctester."""
-
-    directives: typing.Dict[str, typing.Tuple[typing.Type[Directive], ...]] = {
-        'timeout': (TimeoutDirective, TEST_TIMEOUT)
-    }
-
-    def __init__(
-        self,
-        debug=False,
-        style='freeform',
-        target='codeblock',
-        mode='native',
-        verbose=2,
-        patch_global_state=True,
-        patch_tensor_place=True,
-        patch_float_precision=5,
-        use_multiprocessing=True,
-        **config,
-    ):
-        self.debug = debug
-
-        self.style = style
-        self.target = target
-        self.mode = mode
-        self.verbose = verbose
-        self.config = {**XDOCTEST_CONFIG, **(config or {})}
-
-        self._patch_global_state = patch_global_state
-        self._patch_tensor_place = patch_tensor_place
-        self._patch_float_precision = patch_float_precision
-        self._use_multiprocessing = use_multiprocessing
-
-        # patch xdoctest before `xdoctest.core.parse_docstr_examples`
-        self._patch_xdoctest()
-
-        self.docstring_parser = functools.partial(
-            xdoctest.core.parse_docstr_examples, style=self.style
-        )
-
-        self.directive_pattern = re.compile(
-            r"""
-            (?<=(\#\s))     # positive lookbehind, directive begins
-            (doctest)       # directive prefix, which should be replaced
-            (?=(:\s*.*\n))  # positive lookahead, directive content
-            """,
-            re.X,
-        )
-
-        self.directive_prefix = 'xdoctest'
-
-    def _patch_xdoctest(self):
-        if self._patch_global_state:
-            _patch_global_state(self.debug, self.verbose)
-
-        if self._patch_tensor_place:
-            _patch_tensor_place()
-
-        if self._patch_float_precision is not None:
-            _patch_float_precision(self._patch_float_precision)
-
-    def _parse_directive(
-        self, docstring: str
-    ) -> typing.Tuple[str, typing.Dict[str, Directive]]:
-        directives = {}
-        for name, directive_cls in self.directives.items():
-            docstring, direct = directive_cls[0](
-                *directive_cls[1:]
-            ).parse_directive(docstring)
-            directives[name] = direct
-
-        return docstring, directives
-
-    def convert_directive(self, docstring: str) -> str:
-        """Replace directive prefix with xdoctest"""
-        return self.directive_pattern.sub(self.directive_prefix, docstring)
-
-    def prepare(self, test_capacity: set):
-        """Set environs for xdoctest directive.
-        The keys in environs, which also used in `# xdoctest: +REQUIRES(env:XX)`, should be UPPER case.
-
-        If `test_capacity = {"cpu"}`, then we set:
-
-            - `os.environ["CPU"] = "True"`
-
-        which makes this SKIPPED:
-
-            - # xdoctest: +REQUIRES(env:GPU)
-
-        If `test_capacity = {"cpu", "gpu"}`, then we set:
-
-            - `os.environ["CPU"] = "True"`
-            - `os.environ["GPU"] = "True"`
-
-        which makes this SUCCESS:
-
-            - # xdoctest: +REQUIRES(env:GPU)
-        """
-        logger.info("Set xdoctest environ ...")
-        for capacity in test_capacity:
-            key = capacity.upper()
-            os.environ[key] = "True"
-            logger.info("Environ: %s , set to True.", key)
-
-        logger.info("API check using Xdoctest prepared!-- Example Code")
-        logger.info("running under python %s", platform.python_version())
-        logger.info("running under xdoctest %s", xdoctest.__version__)
-
-    def run(self, api_name: str, docstring: str) -> typing.List[TestResult]:
-        """Run the xdoctest with a docstring."""
-        # parse global directive
-        docstring, directives = self._parse_directive(docstring)
-
-        # extract xdoctest examples
-        examples_to_test, examples_nocode = self._extract_examples(
-            api_name, docstring, **directives
-        )
-
-        # run xdoctest
-        try:
-            result = self._execute_xdoctest(
-                examples_to_test, examples_nocode, **directives
-            )
-        except queue.Empty:
-            result = [
-                TestResult(
-                    name=api_name,
-                    timeout=True,
-                    time=directives.get('timeout', TEST_TIMEOUT),
-                )
-            ]
-
-        return result
-
-    def _extract_examples(self, api_name, docstring, **directives):
-        """Extract code block examples from docstring."""
-        examples_to_test = {}
-        examples_nocode = {}
-        for example_idx, example in enumerate(
-            self.docstring_parser(docstr=docstring, callname=api_name)
-        ):
-            example.mode = self.mode
-            example.config.update(self.config)
-            example_key = f"{api_name}_{example_idx}"
-
-            # check whether there are some parts parsed by xdoctest
-            if not example._parts:
-                examples_nocode[example_key] = example
-                continue
-
-            examples_to_test[example_key] = example
-
-        if not examples_nocode and not examples_to_test:
-            examples_nocode[api_name] = api_name
-
-        return examples_to_test, examples_nocode
-
-    def _execute_xdoctest(
-        self, examples_to_test, examples_nocode, **directives
-    ):
-        if self._use_multiprocessing:
-            _ctx = multiprocessing.get_context('spawn')
-            result_queue = _ctx.Queue()
-            exec_processer = functools.partial(_ctx.Process, daemon=True)
-        else:
-            result_queue = queue.Queue()
-            exec_processer = functools.partial(threading.Thread, daemon=True)
-
-        processer = exec_processer(
-            target=self._execute_with_queue,
-            args=(
-                result_queue,
-                examples_to_test,
-                examples_nocode,
-            ),
-        )
-
-        processer.start()
-        result = result_queue.get(
-            timeout=directives.get('timeout', TEST_TIMEOUT)
-        )
-        processer.join()
-
-        return result
-
-    def _execute(self, examples_to_test, examples_nocode):
-        """Run xdoctest for each example"""
-        # patch xdoctest first in each process
-        self._patch_xdoctest()
-
-        # run the xdoctest
-        test_results = []
-        for _, example in examples_to_test.items():
-            start_time = time.time()
-            result = example.run(verbose=self.verbose, on_error='return')
-            end_time = time.time()
-
-            test_results.append(
-                TestResult(
-                    name=str(example),
-                    passed=result['passed'],
-                    skipped=result['skipped'],
-                    failed=result['failed'],
-                    test_msg=str(result['exc_info']),
-                    time=end_time - start_time,
-                )
-            )
-
-        for _, example in examples_nocode.items():
-            test_results.append(TestResult(name=str(example), nocode=True))
-
-        return test_results
-
-    def _execute_with_queue(self, queue, examples_to_test, examples_nocode):
-        queue.put(self._execute(examples_to_test, examples_nocode))
-
-    def print_summary(self, test_results, whl_error=None):
-        summary_success = []
-        summary_failed = []
-        summary_skiptest = []
-        summary_timeout = []
-        summary_nocodes = []
-
-        stdout_handler = logging.StreamHandler(stream=sys.stdout)
-        logger.addHandler(stdout_handler)
-        logger.info("----------------End of the Check--------------------")
-        if whl_error is not None and whl_error:
-            logger.info("%s is not in whl.", whl_error)
-            logger.info("")
-            logger.info("Please check the whl package and API_PR.spec!")
-            logger.info(
-                "You can follow these steps in order to generate API.spec:"
-            )
-            logger.info("1. cd ${paddle_path}, compile paddle;")
-            logger.info("2. pip install build/python/dist/(build whl package);")
-            logger.info(
-                "3. run 'python tools/print_signatures.py paddle > paddle/fluid/API.spec'."
-            )
-            for test_result in test_results:
-                if test_result.failed:
-                    logger.info(
-                        "In addition, mistakes found in sample codes: %s",
-                        test_result.name,
-                    )
-            logger.info("----------------------------------------------------")
-            sys.exit(1)
-        else:
-            for test_result in test_results:
-                if not test_result.nocode:
-                    if test_result.passed:
-                        summary_success.append(test_result.name)
-
-                    if test_result.skipped:
-                        summary_skiptest.append(test_result.name)
-
-                    if test_result.failed:
-                        summary_failed.append(test_result.name)
-
-                    if test_result.timeout:
-                        summary_timeout.append(
-                            {
-                                'api_name': test_result.name,
-                                'run_time': test_result.time,
-                            }
-                        )
-                else:
-                    summary_nocodes.append(test_result.name)
-
-            if len(summary_success):
-                logger.info("%d sample codes ran success", len(summary_success))
-                logger.info('\n'.join(summary_success))
-
-            if len(summary_skiptest):
-                logger.info("%d sample codes skipped", len(summary_skiptest))
-                logger.info('\n'.join(summary_skiptest))
-
-            if len(summary_nocodes):
-                logger.info(
-                    "%d apis could not run test or don't have sample codes",
-                    len(summary_nocodes),
-                )
-                logger.info('\n'.join(summary_nocodes))
-
-            if len(summary_timeout):
-                logger.info("%d sample codes ran timeout", len(summary_timeout))
-                for _result in summary_timeout:
-                    logger.info(
-                        f"{_result['api_name']} - more than {_result['run_time']}s"
-                    )
-
-            if len(summary_failed):
-                logger.info("%d sample codes ran failed", len(summary_failed))
-                logger.info('\n'.join(summary_failed))
-                logger.info(
-                    "Mistakes found in sample codes. Please recheck the sample codes."
-                )
-                sys.exit(1)
-
-        logger.info("Sample code check is successful!")
-
-
-if __name__ == '__main__':
-    args = parse_args()
-    run_doctest(args, doctester=Xdoctester(debug=args.debug))
--- a/tools/test_sampcd_processor.py
+++ b/tools/test_sampcd_processor.py
--- a/tools/test_sampcd_processor_xdoctest.py
+++ b/tools/test_sampcd_processor_xdoctest.py