From afa4bf517f0c8fe5101d97ce98d3474dc7f43342 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=20Wei=20=28=E4=BB=BB=E5=8D=AB=29?= Date: Thu, 10 Jun 2021 17:35:24 +0800 Subject: [PATCH] fix the bug that `print_signature.py` cannot get all the public apis (#33423) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 增加方法获取和执行全量api的示例代码进行测试 * start the sample code test for gpu * should import paddle separately * add a stdout handler, the default is stderr. the paddle_build.sh will catch the stdout content. * add RUN_ON_DEVICE into the requires set * if codeblok['required'] is empty, use the RUN_ON_DEVICE instead * set the threads to 16 http://agroup.baidu.com/paddlepaddle-org-cn/md/article/4036225 * 设置默认日志级别为INFO级别 * using the logic from gen_doc.py * using modulelist to get the all apis * as we don't care which name is the shorttest, so fetch the first name in the all_names list * the new list from project * 先不启用gpu测试,先把print_signature获取不到全部API的问题解决了 --- paddle/scripts/paddle_build.sh | 18 +++-- tools/print_signatures.py | 135 ++++++++++++++++++++++++++++++++- tools/sampcd_processor.py | 56 +++++++++++--- 3 files changed, 189 insertions(+), 20 deletions(-) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 7fa79ede7f9..96dc8c67969 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -2009,12 +2009,16 @@ function build_document_preview() { sh /paddle/tools/document_preview.sh ${PORT} } - -function example() { +# origin name: example +function exec_samplecode_test() { pip install ${PADDLE_ROOT}/build/python/dist/*.whl paddle version cd ${PADDLE_ROOT}/tools - python sampcd_processor.py cpu;example_error=$? + if [ "$1" = "cpu" ] ; then + python sampcd_processor.py cpu; example_error=$? + elif [ "$1" = "gpu" ] ; then + python sampcd_processor.py --threads=16 --full-test gpu; example_error=$? + fi if [ "$example_error" != "0" ];then echo "Code instance execution failed" >&2 exit 5 @@ -2127,7 +2131,7 @@ function main() { check_sequence_op_unittest generate_api_spec ${PYTHON_ABI:-""} "PR" set +e - example_info=$(example) + example_info=$(exec_samplecode_test cpu) example_code=$? summary_check_problems $check_style_code $example_code "$check_style_info" "$example_info" assert_api_spec_approvals @@ -2286,7 +2290,11 @@ function main() { build_document_preview ;; api_example) - example + example_info=$(exec_samplecode_test cpu) + example_code=$? + check_style_code=0 + check_style_info= + summary_check_problems $check_style_code $example_code "$check_style_info" "$example_info" ;; test_op_benchmark) test_op_benchmark diff --git a/tools/print_signatures.py b/tools/print_signatures.py index 6de9d84379f..3fa9e9b782c 100644 --- a/tools/print_signatures.py +++ b/tools/print_signatures.py @@ -27,11 +27,25 @@ import pydoc import hashlib import platform import functools +import pkgutil +import logging +import paddle member_dict = collections.OrderedDict() visited_modules = set() +logger = logging.getLogger() +if logger.handlers: + # we assume the first handler is the one we want to configure + console = logger.handlers[0] +else: + console = logging.StreamHandler(sys.stderr) + logger.addHandler(console) +console.setFormatter( + logging.Formatter( + "%(asctime)s - %(funcName)s:%(lineno)d - %(levelname)s - %(message)s")) + def md5(doc): try: @@ -199,11 +213,124 @@ def visit_all_module(mod): visit_member(mod.__name__, instance) +# all from gen_doc.py +api_info_dict = {} # used by get_all_api + + +# step 1: walkthrough the paddle package to collect all the apis in api_set +def get_all_api(root_path='paddle', attr="__all__"): + """ + walk through the paddle package to collect all the apis. + """ + global api_info_dict + api_counter = 0 + for filefinder, name, ispkg in pkgutil.walk_packages( + path=paddle.__path__, prefix=paddle.__name__ + '.'): + try: + if name in sys.modules: + m = sys.modules[name] + else: + # importlib.import_module(name) + m = eval(name) + continue + except AttributeError: + logger.warning("AttributeError occurred when `eval(%s)`", name) + pass + else: + api_counter += process_module(m, attr) + + api_counter += process_module(paddle, attr) + + logger.info('%s: collected %d apis, %d distinct apis.', attr, api_counter, + len(api_info_dict)) + + return [api_info['all_names'][0] for api_info in api_info_dict.values()] + + +def insert_api_into_dict(full_name, gen_doc_anno=None): + """ + insert add api into the api_info_dict + Return: + api_info object or None + """ + try: + obj = eval(full_name) + fc_id = id(obj) + except AttributeError: + logger.warning("AttributeError occurred when `id(eval(%s))`", full_name) + return None + except: + logger.warning("Exception occurred when `id(eval(%s))`", full_name) + return None + else: + logger.debug("adding %s to api_info_dict.", full_name) + if fc_id in api_info_dict: + api_info_dict[fc_id]["all_names"].add(full_name) + else: + api_info_dict[fc_id] = { + "all_names": set([full_name]), + "id": fc_id, + "object": obj, + "type": type(obj).__name__, + } + docstr = inspect.getdoc(obj) + if docstr: + api_info_dict[fc_id]["docstring"] = inspect.cleandoc(docstr) + if gen_doc_anno: + api_info_dict[fc_id]["gen_doc_anno"] = gen_doc_anno + return api_info_dict[fc_id] + + +# step 1 fill field : `id` & `all_names`, type, docstring +def process_module(m, attr="__all__"): + api_counter = 0 + if hasattr(m, attr): + # may have duplication of api + for api in set(getattr(m, attr)): + if api[0] == '_': continue + # Exception occurred when `id(eval(paddle.dataset.conll05.test, get_dict))` + if ',' in api: continue + + # api's fullname + full_name = m.__name__ + "." + api + api_info = insert_api_into_dict(full_name) + if api_info is not None: + api_counter += 1 + if inspect.isclass(api_info['object']): + for name, value in inspect.getmembers(api_info['object']): + if (not name.startswith("_")) and hasattr(value, + '__name__'): + method_full_name = full_name + '.' + name # value.__name__ + method_api_info = insert_api_into_dict( + method_full_name, 'class_method') + if method_api_info is not None: + api_counter += 1 + return api_counter + + +def get_all_api_from_modulelist(): + modulelist = [ + paddle, paddle.amp, paddle.nn, paddle.nn.functional, + paddle.nn.initializer, paddle.nn.utils, paddle.static, paddle.static.nn, + paddle.io, paddle.jit, paddle.metric, paddle.distribution, + paddle.optimizer, paddle.optimizer.lr, paddle.regularizer, paddle.text, + paddle.utils, paddle.utils.download, paddle.utils.profiler, + paddle.utils.cpp_extension, paddle.sysconfig, paddle.vision, + paddle.distributed, paddle.distributed.fleet, + paddle.distributed.fleet.utils, paddle.distributed.parallel, + paddle.distributed.utils, paddle.callbacks, paddle.hub, paddle.autograd + ] + for m in modulelist: + visit_all_module(m) + + return member_dict + + if __name__ == '__main__': - import paddle - modules = sys.argv[1].split(",") - for m in modules: - visit_all_module(importlib.import_module(m)) + # modules = sys.argv[1].split(",") + # for m in modules: + # visit_all_module(importlib.import_module(m)) + get_all_api_from_modulelist() for name in member_dict: print(name, member_dict[name]) diff --git a/tools/sampcd_processor.py b/tools/sampcd_processor.py index a1658e3c2ed..0ac6c929c5d 100644 --- a/tools/sampcd_processor.py +++ b/tools/sampcd_processor.py @@ -39,14 +39,13 @@ if logger.handlers: console = logger.handlers[ 0] # we assume the first handler is the one we want to configure else: - console = logging.StreamHandler() + console = logging.StreamHandler(stream=sys.stderr) logger.addHandler(console) console.setFormatter(logging.Formatter("%(message)s")) RUN_ON_DEVICE = 'cpu' SAMPLE_CODE_TEST_CAPACITY = set() GPU_ID = 0 -methods = [] whl_error = [] API_DEV_SPEC_FN = 'paddle/fluid/API_DEV.spec' API_PR_SPEC_FN = 'paddle/fluid/API_PR.spec' @@ -247,13 +246,15 @@ def is_required_match(requirestr, cbtitle='not-specified'): False - not match None - skipped # trick """ - global SAMPLE_CODE_TEST_CAPACITY # readonly + global SAMPLE_CODE_TEST_CAPACITY, RUN_ON_DEVICE # readonly requires = set(['cpu']) if requirestr: for r in requirestr.split(','): rr = r.strip().lower() if rr: requires.add(rr) + else: + requires.add(RUN_ON_DEVICE) if 'skip' in requires or 'skiptest' in requires: logger.info('%s: skipped', cbtitle) return None @@ -283,8 +284,8 @@ def insert_codes_into_codeblock(codeblock, apiname='not-specified'): cpu_str = '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = ""\n' gpu_str = '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = "{}"\n'.format( GPU_ID) - if 'required' in codeblock: - if codeblock['required'] is None or codeblock['required'] == 'cpu': + if 'required' in codeblock and codeblock['required']: + if codeblock['required'] == 'cpu': inserted_codes_f = cpu_str elif codeblock['required'] == 'gpu': inserted_codes_f = gpu_str @@ -426,20 +427,25 @@ stdout: %s return result, tfname, msg, end_time - start_time -def get_filenames(): +def get_filenames(full_test=False): ''' this function will get the sample code files that pending for check. + Args: + full_test: the full apis or the increment + Returns: dict: the sample code files pending for check . ''' - global methods # write global whl_error import paddle whl_error = [] - get_incrementapi() + if full_test: + get_full_api() + else: + get_incrementapi() all_sample_code_filenames = {} with open(API_DIFF_SPEC_FN) as f: for line in f.readlines(): @@ -472,8 +478,9 @@ def get_api_md5(path): api_md5(dict): key is the api's real fullname, value is the md5sum. """ api_md5 = {} - API_spec = '%s/%s' % (os.path.abspath(os.path.join(os.getcwd(), "..")), - path) + API_spec = os.path.abspath(os.path.join(os.getcwd(), "..", path)) + if not os.path.isfile(API_spec): + return api_md5 pat = re.compile(r'\((paddle[^,]+)\W*document\W*([0-9a-z]{32})') patArgSpec = re.compile( r'^(paddle[^,]+)\s+\(ArgSpec.*document\W*([0-9a-z]{32})') @@ -487,6 +494,28 @@ def get_api_md5(path): return api_md5 +def get_full_api(): + """ + get all the apis + """ + global API_DIFF_SPEC_FN ## readonly + from print_signatures import get_all_api_from_modulelist + member_dict = get_all_api_from_modulelist() + with open(API_DIFF_SPEC_FN, 'w') as f: + f.write("\n".join(member_dict.keys())) + + +def get_full_api_by_walk(): + """ + get all the apis + """ + global API_DIFF_SPEC_FN ## readonly + from print_signatures import get_all_api + apilist = get_all_api() + with open(API_DIFF_SPEC_FN, 'w') as f: + f.write("\n".join(apilist)) + + def get_incrementapi(): ''' this function will get the apis that difference between API_DEV.spec and API_PR.spec. @@ -526,6 +555,7 @@ def parse_args(): # help='Use CPU mode (overrides --gpu)') # parser.add_argument('--gpu', dest='gpu_mode', action="store_true") parser.add_argument('--debug', dest='debug', action="store_true") + parser.add_argument('--full-test', dest='full_test', action="store_true") parser.add_argument('mode', type=str, help='run on device', default='cpu') for item in arguments: parser.add_argument( @@ -545,6 +575,8 @@ if __name__ == '__main__': args = parse_args() if args.debug: logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.INFO) if args.logf: logfHandler = logging.FileHandler(args.logf) logfHandler.setFormatter( @@ -573,7 +605,7 @@ if __name__ == '__main__': else: os.mkdir(SAMPLECODE_TEMPDIR) - filenames = get_filenames() + filenames = get_filenames(args.full_test) if len(filenames) == 0 and len(whl_error) == 0: logger.info("-----API_PR.spec is the same as API_DEV.spec-----") exit(0) @@ -593,6 +625,8 @@ if __name__ == '__main__': if not args.debug: shutil.rmtree(SAMPLECODE_TEMPDIR) + stdout_handler = logging.StreamHandler(stream=sys.stdout) + logger.addHandler(stdout_handler) logger.info("----------------End of the Check--------------------") if len(whl_error) != 0: logger.info("%s is not in whl.", whl_error) -- GitLab