未验证 提交 92080699 编写于 作者: M megemini 提交者: GitHub

[Change] 不再兼容旧格式示例代码 (#56573)

* [Change] forbid legacy sample code style

* [Change] remove legacy sampcd_processor.py

* [Fix] fix command

* [Change] check plain style sample code

* [Fix] fix logger

* [Change] remove arguments

* [Change] remove cb_required

* [Change] change parse args

* [Change] restore creation.py
上级 e0841332
...@@ -3140,10 +3140,10 @@ function exec_samplecode_test() { ...@@ -3140,10 +3140,10 @@ function exec_samplecode_test() {
cd ${PADDLE_ROOT}/tools cd ${PADDLE_ROOT}/tools
if [ "$1" = "cpu" ] ; then if [ "$1" = "cpu" ] ; then
python sampcd_processor.py --debug cpu; example_error=$? python sampcd_processor.py --debug --mode cpu; example_error=$?
elif [ "$1" = "gpu" ] ; then elif [ "$1" = "gpu" ] ; then
SAMPLE_CODE_EXEC_THREADS=${SAMPLE_CODE_EXEC_THREADS:-2} SAMPLE_CODE_EXEC_THREADS=${SAMPLE_CODE_EXEC_THREADS:-2}
python sampcd_processor.py --threads=${SAMPLE_CODE_EXEC_THREADS} --debug gpu; example_error=$? python sampcd_processor.py --threads=${SAMPLE_CODE_EXEC_THREADS} --debug --mode gpu; example_error=$?
fi fi
if [ "$example_error" != "0" ];then if [ "$example_error" != "0" ];then
echo "Code instance execution failed" >&2 echo "Code instance execution failed" >&2
......
此差异已折叠。
...@@ -265,7 +265,7 @@ def extract_code_blocks_from_docstr(docstr, google_style=True): ...@@ -265,7 +265,7 @@ def extract_code_blocks_from_docstr(docstr, google_style=True):
Return: Return:
code_blocks: A list of code-blocks, indent removed. code_blocks: A list of code-blocks, indent removed.
element {'name': the code-block's name, 'id': sequence id. element {'name': the code-block's name, 'id': sequence id.
'codes': codes, 'required': 'gpu', 'in_examples': bool, code block in `Examples` or not,} 'codes': codes, 'in_examples': bool, code block in `Examples` or not,}
""" """
code_blocks = [] code_blocks = []
...@@ -290,7 +290,6 @@ def extract_code_blocks_from_docstr(docstr, google_style=True): ...@@ -290,7 +290,6 @@ def extract_code_blocks_from_docstr(docstr, google_style=True):
cb_start_pat = re.compile(r"code-block::\s*python") cb_start_pat = re.compile(r"code-block::\s*python")
cb_param_pat = re.compile(r"^\s*:(\w+):\s*(\S*)\s*$") cb_param_pat = re.compile(r"^\s*:(\w+):\s*(\S*)\s*$")
cb_required_pat = re.compile(r"^\s*#\s*require[s|d]\s*:\s*(\S+)\s*$")
cb_info = {} cb_info = {}
cb_info['cb_started'] = False cb_info['cb_started'] = False
...@@ -298,23 +297,20 @@ def extract_code_blocks_from_docstr(docstr, google_style=True): ...@@ -298,23 +297,20 @@ def extract_code_blocks_from_docstr(docstr, google_style=True):
cb_info['cb_cur_indent'] = -1 cb_info['cb_cur_indent'] = -1
cb_info['cb_cur_name'] = None cb_info['cb_cur_name'] = None
cb_info['cb_cur_seq_id'] = 0 cb_info['cb_cur_seq_id'] = 0
cb_info['cb_required'] = None
def _cb_started(): def _cb_started():
# nonlocal cb_started, cb_cur_name, cb_required, cb_cur_seq_id # nonlocal cb_started, cb_cur_name, cb_cur_seq_id
cb_info['cb_started'] = True cb_info['cb_started'] = True
cb_info['cb_cur_seq_id'] += 1 cb_info['cb_cur_seq_id'] += 1
cb_info['cb_cur_name'] = None cb_info['cb_cur_name'] = None
cb_info['cb_required'] = None
def _append_code_block(in_examples): def _append_code_block(in_examples):
# nonlocal code_blocks, cb_cur, cb_cur_name, cb_cur_seq_id, cb_required # nonlocal code_blocks, cb_cur, cb_cur_name, cb_cur_seq_id
code_blocks.append( code_blocks.append(
{ {
'codes': inspect.cleandoc("\n" + "\n".join(cb_info['cb_cur'])), 'codes': inspect.cleandoc("\n" + "\n".join(cb_info['cb_cur'])),
'name': cb_info['cb_cur_name'], 'name': cb_info['cb_cur_name'],
'id': cb_info['cb_cur_seq_id'], 'id': cb_info['cb_cur_seq_id'],
'required': cb_info['cb_required'],
'in_examples': in_examples, 'in_examples': in_examples,
} }
) )
...@@ -339,10 +335,6 @@ def extract_code_blocks_from_docstr(docstr, google_style=True): ...@@ -339,10 +335,6 @@ def extract_code_blocks_from_docstr(docstr, google_style=True):
if mo_p.group(1) == 'name': if mo_p.group(1) == 'name':
cb_info['cb_cur_name'] = mo_p.group(2) cb_info['cb_cur_name'] = mo_p.group(2)
continue continue
# read the required directive
mo_r = cb_required_pat.match(linecont)
if mo_r:
cb_info['cb_required'] = mo_r.group(1)
# docstring end # docstring end
if lineno == lastlineindex: if lineno == lastlineindex:
mo = re.search(r"\S", linecont) mo = re.search(r"\S", linecont)
...@@ -479,6 +471,49 @@ def get_docstring(full_test=False): ...@@ -479,6 +471,49 @@ def get_docstring(full_test=False):
return docstrings_to_test, whl_error return docstrings_to_test, whl_error
def check_old_style(docstrings_to_test: typing.Dict[str, str]):
old_style_apis = []
for api_name, raw_docstring in docstrings_to_test.items():
for codeblock in extract_code_blocks_from_docstr(
raw_docstring, google_style=False
):
old_style = True
for line in codeblock['codes'].splitlines():
if line.strip().startswith('>>>'):
old_style = False
break
if old_style:
codeblock_name = codeblock['name']
codeblock_id = codeblock['id']
docstring_name = '{}:{}'.format(
api_name, codeblock_name or codeblock_id
)
old_style_apis.append(docstring_name)
if old_style_apis:
stdout_handler = logging.StreamHandler(stream=sys.stdout)
logger.addHandler(stdout_handler)
logger.info(
">>> %d apis use plain sample code style.",
len(old_style_apis),
)
logger.info('=======================')
logger.info('\n'.join(old_style_apis))
logger.info('=======================')
logger.info("Check Failed!")
logger.info("DEPRECATION: Please do not use plain sample code style.")
logger.info(
"For more information: https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/dev_guides/style_guide_and_references/code_example_writing_specification_cn.html "
)
logger.info("----------------End of the Check--------------------")
sys.exit(1)
def exec_gen_doc(): def exec_gen_doc():
result = True result = True
cmd = ["bash", "document_preview.sh"] cmd = ["bash", "document_preview.sh"]
...@@ -568,27 +603,30 @@ def get_test_results( ...@@ -568,27 +603,30 @@ def get_test_results(
def run_doctest(args, doctester: DocTester): def run_doctest(args, doctester: DocTester):
logger.info("----------------Codeblock Check Start--------------------")
# init logger # init logger
init_logger(debug=args.debug, log_file=args.logf) init_logger(debug=args.debug, log_file=args.logf)
logger.info("Check test mode ...") logger.info("----------------Codeblock Check Start--------------------")
logger.info(">>> Check test mode ...")
run_on_device = check_test_mode(mode=args.mode, gpu_id=args.gpu_id) run_on_device = check_test_mode(mode=args.mode, gpu_id=args.gpu_id)
logger.info("Get test capacity ...") logger.info(">>> Get test capacity ...")
sample_code_test_capacity = get_test_capacity(run_on_device) sample_code_test_capacity = get_test_capacity(run_on_device)
logger.info("Get docstring from api ...") logger.info(">>> Get docstring from api ...")
docstrings_to_test, whl_error = get_docstring(full_test=args.full_test) docstrings_to_test, whl_error = get_docstring(full_test=args.full_test)
logger.info("Prepare doctester ...") logger.info(">>> Checking plain sample code style before Paddle 2.5 ...")
check_old_style(docstrings_to_test)
logger.info(">>> Prepare doctester ...")
doctester.prepare(sample_code_test_capacity) doctester.prepare(sample_code_test_capacity)
logger.info("Running doctester ...") logger.info(">>> Running doctester ...")
test_results = get_test_results(doctester, docstrings_to_test) test_results = get_test_results(doctester, docstrings_to_test)
logger.info("Print summary ...") logger.info(">>> Print summary ...")
doctester.print_summary(test_results, whl_error) doctester.print_summary(test_results, whl_error)
if args.mode == "cpu": if args.mode == "cpu":
...@@ -596,37 +634,39 @@ def run_doctest(args, doctester: DocTester): ...@@ -596,37 +634,39 @@ def run_doctest(args, doctester: DocTester):
exec_gen_doc() exec_gen_doc()
arguments = [
# flags, dest, type, default, help
['--gpu_id', 'gpu_id', int, 0, 'GPU device id to use [0]'],
['--logf', 'logf', str, None, 'file for logging'],
['--threads', 'threads', int, 0, 'sub processes number'],
]
def parse_args(): def parse_args():
""" """
Parse input arguments Parse input arguments
""" """
global arguments
parser = argparse.ArgumentParser(description='run Sample Code Test') parser = argparse.ArgumentParser(description='run Sample Code Test')
parser.add_argument('--debug', dest='debug', action="store_true") parser.add_argument('--debug', dest='debug', action="store_true")
parser.add_argument('--full-test', dest='full_test', action="store_true") parser.add_argument('--full-test', dest='full_test', action="store_true")
parser.add_argument('mode', type=str, help='run on device', default='cpu') parser.add_argument(
'--mode', dest='mode', type=str, default='cpu', help='run on device'
)
parser.add_argument( parser.add_argument(
'--build-doc', '--build-doc',
dest='build_doc', dest='build_doc',
action='store_true', action='store_true',
help='build doc if need.', help='build doc if need.',
) )
for item in arguments: parser.add_argument(
parser.add_argument( '--gpu_id',
item[0], dest=item[1], help=item[4], type=item[2], default=item[3] dest='gpu_id',
) type=int,
default=0,
if len(sys.argv) == 1: help='GPU device id to use [0]',
args = parser.parse_args(['cpu']) )
return args parser.add_argument(
'--logf', dest='logf', type=str, default=None, help='file for logging'
)
parser.add_argument(
'--threads',
dest='threads',
type=int,
default=0,
help='sub processes number',
)
args = parser.parse_args() args = parser.parse_args()
return args return args
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
please make sure to run in the tools path
usage: python sampcd_processor_xdoctest.py {cpu or gpu}
{cpu or gpu}: running in cpu version or gpu version
for example, you can run cpu version testing like this:
python sampcd_processor_xdoctest.py cpu
"""
import functools
import logging
import multiprocessing
import os
import platform
import queue
import re
import sys
import threading
import time
import typing
import xdoctest
from sampcd_processor_utils import (
TEST_TIMEOUT,
DocTester,
TestResult,
logger,
parse_args,
run_doctest,
)
XDOCTEST_CONFIG = {
"global_exec": r"\n".join(
[
"import paddle",
"paddle.device.set_device('cpu')",
"paddle.set_default_dtype('float32')",
"paddle.disable_static()",
]
),
"default_runtime_state": {"IGNORE_WHITESPACE": True},
}
def _patch_global_state(debug, verbose):
# patch xdoctest global_state
from xdoctest import global_state
_debug_xdoctest = debug and verbose > 2
global_state.DEBUG = _debug_xdoctest
global_state.DEBUG_PARSER = global_state.DEBUG_PARSER and _debug_xdoctest
global_state.DEBUG_CORE = global_state.DEBUG_CORE and _debug_xdoctest
global_state.DEBUG_RUNNER = global_state.DEBUG_RUNNER and _debug_xdoctest
global_state.DEBUG_DOCTEST = global_state.DEBUG_DOCTEST and _debug_xdoctest
def _patch_tensor_place():
from xdoctest import checker
pattern_tensor = re.compile(
r"""
(Tensor\(.*?place=) # Tensor start
(.*?) # Place=(XXX)
(\,.*?\))
""",
re.X | re.S,
)
_check_output = checker.check_output
def check_output(got, want, runstate=None):
if not want: # nocover
return True
return _check_output(
got=pattern_tensor.sub(r'\1Place(cpu)\3', got),
want=pattern_tensor.sub(r'\1Place(cpu)\3', want),
runstate=runstate,
)
checker.check_output = check_output
def _patch_float_precision(digits):
from xdoctest import checker
pattern_number = re.compile(
r"""
(?:
(?<=[\s*\[\(\'\"\:]) # number starts
(?: # int/float or complex-real
(?:
[+-]?
(?:
(?: \d*\.\d+) | (?: \d+\.?) # int/float
)
)
(?:[Ee][+-]?\d+)?
)
(?: # complex-imag
(?:
(?:
[+-]?
(?:
(?: \d*\.\d+) | (?: \d+\.?)
)
)
(?:[Ee][+-]?\d+)?
)
(?:[Jj])
)?
)
""",
re.X | re.S,
)
_check_output = checker.check_output
def _sub_number(match_obj, digits):
match_str = match_obj.group()
if 'j' in match_str or 'J' in match_str:
try:
match_num = complex(match_str)
except ValueError:
return match_str
return (
str(
complex(
round(match_num.real, digits),
round(match_num.imag, digits),
)
)
.strip('(')
.strip(')')
)
else:
try:
return str(round(float(match_str), digits))
except ValueError:
return match_str
sub_number = functools.partial(_sub_number, digits=digits)
def check_output(got, want, runstate=None):
if not want: # nocover
return True
return _check_output(
got=pattern_number.sub(sub_number, got),
want=pattern_number.sub(sub_number, want),
runstate=runstate,
)
checker.check_output = check_output
class Directive:
"""Base class of global direvtives just for `xdoctest`."""
pattern: typing.Pattern
def parse_directive(self, docstring: str) -> typing.Tuple[str, typing.Any]:
pass
class TimeoutDirective(Directive):
pattern = re.compile(
r"""
(?:
(?:
\s*\>{3}\s*\#\s*x?doctest\:\s*
)
(?P<op>[\+\-])
(?:
TIMEOUT
)
\(
(?P<time>\d+)
\)
(?:
\s*?
)
)
""",
re.X | re.S,
)
def __init__(self, timeout):
self._timeout = timeout
def parse_directive(self, docstring):
match_obj = self.pattern.search(docstring)
if match_obj is not None:
op_time = match_obj.group('time')
match_start = match_obj.start()
match_end = match_obj.end()
return (
(docstring[:match_start] + '\n' + docstring[match_end:]),
float(op_time),
)
return docstring, float(self._timeout)
class Xdoctester(DocTester):
"""A Xdoctest doctester."""
directives: typing.Dict[str, typing.Tuple[typing.Type[Directive], ...]] = {
'timeout': (TimeoutDirective, TEST_TIMEOUT)
}
def __init__(
self,
debug=False,
style='freeform',
target='codeblock',
mode='native',
verbose=2,
patch_global_state=True,
patch_tensor_place=True,
patch_float_precision=5,
use_multiprocessing=True,
**config,
):
self.debug = debug
self.style = style
self.target = target
self.mode = mode
self.verbose = verbose
self.config = {**XDOCTEST_CONFIG, **(config or {})}
self._patch_global_state = patch_global_state
self._patch_tensor_place = patch_tensor_place
self._patch_float_precision = patch_float_precision
self._use_multiprocessing = use_multiprocessing
# patch xdoctest before `xdoctest.core.parse_docstr_examples`
self._patch_xdoctest()
self.docstring_parser = functools.partial(
xdoctest.core.parse_docstr_examples, style=self.style
)
self.directive_pattern = re.compile(
r"""
(?<=(\#\s)) # positive lookbehind, directive begins
(doctest) # directive prefix, which should be replaced
(?=(:\s*.*\n)) # positive lookahead, directive content
""",
re.X,
)
self.directive_prefix = 'xdoctest'
def _patch_xdoctest(self):
if self._patch_global_state:
_patch_global_state(self.debug, self.verbose)
if self._patch_tensor_place:
_patch_tensor_place()
if self._patch_float_precision is not None:
_patch_float_precision(self._patch_float_precision)
def _parse_directive(
self, docstring: str
) -> typing.Tuple[str, typing.Dict[str, Directive]]:
directives = {}
for name, directive_cls in self.directives.items():
docstring, direct = directive_cls[0](
*directive_cls[1:]
).parse_directive(docstring)
directives[name] = direct
return docstring, directives
def convert_directive(self, docstring: str) -> str:
"""Replace directive prefix with xdoctest"""
return self.directive_pattern.sub(self.directive_prefix, docstring)
def prepare(self, test_capacity: set):
"""Set environs for xdoctest directive.
The keys in environs, which also used in `# xdoctest: +REQUIRES(env:XX)`, should be UPPER case.
If `test_capacity = {"cpu"}`, then we set:
- `os.environ["CPU"] = "True"`
which makes this SKIPPED:
- # xdoctest: +REQUIRES(env:GPU)
If `test_capacity = {"cpu", "gpu"}`, then we set:
- `os.environ["CPU"] = "True"`
- `os.environ["GPU"] = "True"`
which makes this SUCCESS:
- # xdoctest: +REQUIRES(env:GPU)
"""
logger.info("Set xdoctest environ ...")
for capacity in test_capacity:
key = capacity.upper()
os.environ[key] = "True"
logger.info("Environ: %s , set to True.", key)
logger.info("API check using Xdoctest prepared!-- Example Code")
logger.info("running under python %s", platform.python_version())
logger.info("running under xdoctest %s", xdoctest.__version__)
def run(self, api_name: str, docstring: str) -> typing.List[TestResult]:
"""Run the xdoctest with a docstring."""
# parse global directive
docstring, directives = self._parse_directive(docstring)
# extract xdoctest examples
examples_to_test, examples_nocode = self._extract_examples(
api_name, docstring, **directives
)
# run xdoctest
try:
result = self._execute_xdoctest(
examples_to_test, examples_nocode, **directives
)
except queue.Empty:
result = [
TestResult(
name=api_name,
timeout=True,
time=directives.get('timeout', TEST_TIMEOUT),
)
]
return result
def _extract_examples(self, api_name, docstring, **directives):
"""Extract code block examples from docstring."""
examples_to_test = {}
examples_nocode = {}
for example_idx, example in enumerate(
self.docstring_parser(docstr=docstring, callname=api_name)
):
example.mode = self.mode
example.config.update(self.config)
example_key = f"{api_name}_{example_idx}"
# check whether there are some parts parsed by xdoctest
if not example._parts:
examples_nocode[example_key] = example
continue
examples_to_test[example_key] = example
if not examples_nocode and not examples_to_test:
examples_nocode[api_name] = api_name
return examples_to_test, examples_nocode
def _execute_xdoctest(
self, examples_to_test, examples_nocode, **directives
):
if self._use_multiprocessing:
_ctx = multiprocessing.get_context('spawn')
result_queue = _ctx.Queue()
exec_processer = functools.partial(_ctx.Process, daemon=True)
else:
result_queue = queue.Queue()
exec_processer = functools.partial(threading.Thread, daemon=True)
processer = exec_processer(
target=self._execute_with_queue,
args=(
result_queue,
examples_to_test,
examples_nocode,
),
)
processer.start()
result = result_queue.get(
timeout=directives.get('timeout', TEST_TIMEOUT)
)
processer.join()
return result
def _execute(self, examples_to_test, examples_nocode):
"""Run xdoctest for each example"""
# patch xdoctest first in each process
self._patch_xdoctest()
# run the xdoctest
test_results = []
for _, example in examples_to_test.items():
start_time = time.time()
result = example.run(verbose=self.verbose, on_error='return')
end_time = time.time()
test_results.append(
TestResult(
name=str(example),
passed=result['passed'],
skipped=result['skipped'],
failed=result['failed'],
test_msg=str(result['exc_info']),
time=end_time - start_time,
)
)
for _, example in examples_nocode.items():
test_results.append(TestResult(name=str(example), nocode=True))
return test_results
def _execute_with_queue(self, queue, examples_to_test, examples_nocode):
queue.put(self._execute(examples_to_test, examples_nocode))
def print_summary(self, test_results, whl_error=None):
summary_success = []
summary_failed = []
summary_skiptest = []
summary_timeout = []
summary_nocodes = []
stdout_handler = logging.StreamHandler(stream=sys.stdout)
logger.addHandler(stdout_handler)
logger.info("----------------End of the Check--------------------")
if whl_error is not None and whl_error:
logger.info("%s is not in whl.", whl_error)
logger.info("")
logger.info("Please check the whl package and API_PR.spec!")
logger.info(
"You can follow these steps in order to generate API.spec:"
)
logger.info("1. cd ${paddle_path}, compile paddle;")
logger.info("2. pip install build/python/dist/(build whl package);")
logger.info(
"3. run 'python tools/print_signatures.py paddle > paddle/fluid/API.spec'."
)
for test_result in test_results:
if test_result.failed:
logger.info(
"In addition, mistakes found in sample codes: %s",
test_result.name,
)
logger.info("----------------------------------------------------")
sys.exit(1)
else:
for test_result in test_results:
if not test_result.nocode:
if test_result.passed:
summary_success.append(test_result.name)
if test_result.skipped:
summary_skiptest.append(test_result.name)
if test_result.failed:
summary_failed.append(test_result.name)
if test_result.timeout:
summary_timeout.append(
{
'api_name': test_result.name,
'run_time': test_result.time,
}
)
else:
summary_nocodes.append(test_result.name)
if len(summary_success):
logger.info("%d sample codes ran success", len(summary_success))
logger.info('\n'.join(summary_success))
if len(summary_skiptest):
logger.info("%d sample codes skipped", len(summary_skiptest))
logger.info('\n'.join(summary_skiptest))
if len(summary_nocodes):
logger.info(
"%d apis could not run test or don't have sample codes",
len(summary_nocodes),
)
logger.info('\n'.join(summary_nocodes))
if len(summary_timeout):
logger.info("%d sample codes ran timeout", len(summary_timeout))
for _result in summary_timeout:
logger.info(
f"{_result['api_name']} - more than {_result['run_time']}s"
)
if len(summary_failed):
logger.info("%d sample codes ran failed", len(summary_failed))
logger.info('\n'.join(summary_failed))
logger.info(
"Mistakes found in sample codes. Please recheck the sample codes."
)
sys.exit(1)
logger.info("Sample code check is successful!")
if __name__ == '__main__':
args = parse_args()
run_doctest(args, doctester=Xdoctester(debug=args.debug))
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册