未验证 提交 afa4bf51 编写于 作者: R Ren Wei (任卫) 提交者: GitHub

fix the bug that `print_signature.py` cannot get all the public apis (#33423)

* 增加方法获取和执行全量api的示例代码进行测试

* start the sample code test for gpu

* should import paddle separately

* add a stdout handler, the default is stderr. the paddle_build.sh will catch the stdout content.

* add RUN_ON_DEVICE into the requires set

* if codeblok['required'] is empty, use the RUN_ON_DEVICE instead

* set the threads to 16

http://agroup.baidu.com/paddlepaddle-org-cn/md/article/4036225

* 设置默认日志级别为INFO级别

* using the logic from gen_doc.py

* using modulelist to get the all apis

* as we don't care which name is the shorttest, so fetch the first name in the all_names list

* the new list from project

* 先不启用gpu测试,先把print_signature获取不到全部API的问题解决了
上级 f89a7b55
...@@ -2009,12 +2009,16 @@ function build_document_preview() { ...@@ -2009,12 +2009,16 @@ function build_document_preview() {
sh /paddle/tools/document_preview.sh ${PORT} sh /paddle/tools/document_preview.sh ${PORT}
} }
# origin name: example
function example() { function exec_samplecode_test() {
pip install ${PADDLE_ROOT}/build/python/dist/*.whl pip install ${PADDLE_ROOT}/build/python/dist/*.whl
paddle version paddle version
cd ${PADDLE_ROOT}/tools cd ${PADDLE_ROOT}/tools
python sampcd_processor.py cpu;example_error=$? if [ "$1" = "cpu" ] ; then
python sampcd_processor.py cpu; example_error=$?
elif [ "$1" = "gpu" ] ; then
python sampcd_processor.py --threads=16 --full-test gpu; example_error=$?
fi
if [ "$example_error" != "0" ];then if [ "$example_error" != "0" ];then
echo "Code instance execution failed" >&2 echo "Code instance execution failed" >&2
exit 5 exit 5
...@@ -2127,7 +2131,7 @@ function main() { ...@@ -2127,7 +2131,7 @@ function main() {
check_sequence_op_unittest check_sequence_op_unittest
generate_api_spec ${PYTHON_ABI:-""} "PR" generate_api_spec ${PYTHON_ABI:-""} "PR"
set +e set +e
example_info=$(example) example_info=$(exec_samplecode_test cpu)
example_code=$? example_code=$?
summary_check_problems $check_style_code $example_code "$check_style_info" "$example_info" summary_check_problems $check_style_code $example_code "$check_style_info" "$example_info"
assert_api_spec_approvals assert_api_spec_approvals
...@@ -2286,7 +2290,11 @@ function main() { ...@@ -2286,7 +2290,11 @@ function main() {
build_document_preview build_document_preview
;; ;;
api_example) api_example)
example example_info=$(exec_samplecode_test cpu)
example_code=$?
check_style_code=0
check_style_info=
summary_check_problems $check_style_code $example_code "$check_style_info" "$example_info"
;; ;;
test_op_benchmark) test_op_benchmark)
test_op_benchmark test_op_benchmark
......
...@@ -27,11 +27,25 @@ import pydoc ...@@ -27,11 +27,25 @@ import pydoc
import hashlib import hashlib
import platform import platform
import functools import functools
import pkgutil
import logging
import paddle
member_dict = collections.OrderedDict() member_dict = collections.OrderedDict()
visited_modules = set() visited_modules = set()
logger = logging.getLogger()
if logger.handlers:
# we assume the first handler is the one we want to configure
console = logger.handlers[0]
else:
console = logging.StreamHandler(sys.stderr)
logger.addHandler(console)
console.setFormatter(
logging.Formatter(
"%(asctime)s - %(funcName)s:%(lineno)d - %(levelname)s - %(message)s"))
def md5(doc): def md5(doc):
try: try:
...@@ -199,11 +213,124 @@ def visit_all_module(mod): ...@@ -199,11 +213,124 @@ def visit_all_module(mod):
visit_member(mod.__name__, instance) visit_member(mod.__name__, instance)
# all from gen_doc.py
api_info_dict = {} # used by get_all_api
# step 1: walkthrough the paddle package to collect all the apis in api_set
def get_all_api(root_path='paddle', attr="__all__"):
"""
walk through the paddle package to collect all the apis.
"""
global api_info_dict
api_counter = 0
for filefinder, name, ispkg in pkgutil.walk_packages(
path=paddle.__path__, prefix=paddle.__name__ + '.'):
try:
if name in sys.modules:
m = sys.modules[name]
else:
# importlib.import_module(name)
m = eval(name)
continue
except AttributeError:
logger.warning("AttributeError occurred when `eval(%s)`", name)
pass
else:
api_counter += process_module(m, attr)
api_counter += process_module(paddle, attr)
logger.info('%s: collected %d apis, %d distinct apis.', attr, api_counter,
len(api_info_dict))
return [api_info['all_names'][0] for api_info in api_info_dict.values()]
def insert_api_into_dict(full_name, gen_doc_anno=None):
"""
insert add api into the api_info_dict
Return:
api_info object or None
"""
try:
obj = eval(full_name)
fc_id = id(obj)
except AttributeError:
logger.warning("AttributeError occurred when `id(eval(%s))`", full_name)
return None
except:
logger.warning("Exception occurred when `id(eval(%s))`", full_name)
return None
else:
logger.debug("adding %s to api_info_dict.", full_name)
if fc_id in api_info_dict:
api_info_dict[fc_id]["all_names"].add(full_name)
else:
api_info_dict[fc_id] = {
"all_names": set([full_name]),
"id": fc_id,
"object": obj,
"type": type(obj).__name__,
}
docstr = inspect.getdoc(obj)
if docstr:
api_info_dict[fc_id]["docstring"] = inspect.cleandoc(docstr)
if gen_doc_anno:
api_info_dict[fc_id]["gen_doc_anno"] = gen_doc_anno
return api_info_dict[fc_id]
# step 1 fill field : `id` & `all_names`, type, docstring
def process_module(m, attr="__all__"):
api_counter = 0
if hasattr(m, attr):
# may have duplication of api
for api in set(getattr(m, attr)):
if api[0] == '_': continue
# Exception occurred when `id(eval(paddle.dataset.conll05.test, get_dict))`
if ',' in api: continue
# api's fullname
full_name = m.__name__ + "." + api
api_info = insert_api_into_dict(full_name)
if api_info is not None:
api_counter += 1
if inspect.isclass(api_info['object']):
for name, value in inspect.getmembers(api_info['object']):
if (not name.startswith("_")) and hasattr(value,
'__name__'):
method_full_name = full_name + '.' + name # value.__name__
method_api_info = insert_api_into_dict(
method_full_name, 'class_method')
if method_api_info is not None:
api_counter += 1
return api_counter
def get_all_api_from_modulelist():
modulelist = [
paddle, paddle.amp, paddle.nn, paddle.nn.functional,
paddle.nn.initializer, paddle.nn.utils, paddle.static, paddle.static.nn,
paddle.io, paddle.jit, paddle.metric, paddle.distribution,
paddle.optimizer, paddle.optimizer.lr, paddle.regularizer, paddle.text,
paddle.utils, paddle.utils.download, paddle.utils.profiler,
paddle.utils.cpp_extension, paddle.sysconfig, paddle.vision,
paddle.distributed, paddle.distributed.fleet,
paddle.distributed.fleet.utils, paddle.distributed.parallel,
paddle.distributed.utils, paddle.callbacks, paddle.hub, paddle.autograd
]
for m in modulelist:
visit_all_module(m)
return member_dict
if __name__ == '__main__': if __name__ == '__main__':
import paddle # modules = sys.argv[1].split(",")
modules = sys.argv[1].split(",") # for m in modules:
for m in modules: # visit_all_module(importlib.import_module(m))
visit_all_module(importlib.import_module(m)) get_all_api_from_modulelist()
for name in member_dict: for name in member_dict:
print(name, member_dict[name]) print(name, member_dict[name])
...@@ -39,14 +39,13 @@ if logger.handlers: ...@@ -39,14 +39,13 @@ if logger.handlers:
console = logger.handlers[ console = logger.handlers[
0] # we assume the first handler is the one we want to configure 0] # we assume the first handler is the one we want to configure
else: else:
console = logging.StreamHandler() console = logging.StreamHandler(stream=sys.stderr)
logger.addHandler(console) logger.addHandler(console)
console.setFormatter(logging.Formatter("%(message)s")) console.setFormatter(logging.Formatter("%(message)s"))
RUN_ON_DEVICE = 'cpu' RUN_ON_DEVICE = 'cpu'
SAMPLE_CODE_TEST_CAPACITY = set() SAMPLE_CODE_TEST_CAPACITY = set()
GPU_ID = 0 GPU_ID = 0
methods = []
whl_error = [] whl_error = []
API_DEV_SPEC_FN = 'paddle/fluid/API_DEV.spec' API_DEV_SPEC_FN = 'paddle/fluid/API_DEV.spec'
API_PR_SPEC_FN = 'paddle/fluid/API_PR.spec' API_PR_SPEC_FN = 'paddle/fluid/API_PR.spec'
...@@ -247,13 +246,15 @@ def is_required_match(requirestr, cbtitle='not-specified'): ...@@ -247,13 +246,15 @@ def is_required_match(requirestr, cbtitle='not-specified'):
False - not match False - not match
None - skipped # trick None - skipped # trick
""" """
global SAMPLE_CODE_TEST_CAPACITY # readonly global SAMPLE_CODE_TEST_CAPACITY, RUN_ON_DEVICE # readonly
requires = set(['cpu']) requires = set(['cpu'])
if requirestr: if requirestr:
for r in requirestr.split(','): for r in requirestr.split(','):
rr = r.strip().lower() rr = r.strip().lower()
if rr: if rr:
requires.add(rr) requires.add(rr)
else:
requires.add(RUN_ON_DEVICE)
if 'skip' in requires or 'skiptest' in requires: if 'skip' in requires or 'skiptest' in requires:
logger.info('%s: skipped', cbtitle) logger.info('%s: skipped', cbtitle)
return None return None
...@@ -283,8 +284,8 @@ def insert_codes_into_codeblock(codeblock, apiname='not-specified'): ...@@ -283,8 +284,8 @@ def insert_codes_into_codeblock(codeblock, apiname='not-specified'):
cpu_str = '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = ""\n' cpu_str = '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = ""\n'
gpu_str = '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = "{}"\n'.format( gpu_str = '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = "{}"\n'.format(
GPU_ID) GPU_ID)
if 'required' in codeblock: if 'required' in codeblock and codeblock['required']:
if codeblock['required'] is None or codeblock['required'] == 'cpu': if codeblock['required'] == 'cpu':
inserted_codes_f = cpu_str inserted_codes_f = cpu_str
elif codeblock['required'] == 'gpu': elif codeblock['required'] == 'gpu':
inserted_codes_f = gpu_str inserted_codes_f = gpu_str
...@@ -426,20 +427,25 @@ stdout: %s ...@@ -426,20 +427,25 @@ stdout: %s
return result, tfname, msg, end_time - start_time return result, tfname, msg, end_time - start_time
def get_filenames(): def get_filenames(full_test=False):
''' '''
this function will get the sample code files that pending for check. this function will get the sample code files that pending for check.
Args:
full_test: the full apis or the increment
Returns: Returns:
dict: the sample code files pending for check . dict: the sample code files pending for check .
''' '''
global methods # write
global whl_error global whl_error
import paddle import paddle
whl_error = [] whl_error = []
get_incrementapi() if full_test:
get_full_api()
else:
get_incrementapi()
all_sample_code_filenames = {} all_sample_code_filenames = {}
with open(API_DIFF_SPEC_FN) as f: with open(API_DIFF_SPEC_FN) as f:
for line in f.readlines(): for line in f.readlines():
...@@ -472,8 +478,9 @@ def get_api_md5(path): ...@@ -472,8 +478,9 @@ def get_api_md5(path):
api_md5(dict): key is the api's real fullname, value is the md5sum. api_md5(dict): key is the api's real fullname, value is the md5sum.
""" """
api_md5 = {} api_md5 = {}
API_spec = '%s/%s' % (os.path.abspath(os.path.join(os.getcwd(), "..")), API_spec = os.path.abspath(os.path.join(os.getcwd(), "..", path))
path) if not os.path.isfile(API_spec):
return api_md5
pat = re.compile(r'\((paddle[^,]+)\W*document\W*([0-9a-z]{32})') pat = re.compile(r'\((paddle[^,]+)\W*document\W*([0-9a-z]{32})')
patArgSpec = re.compile( patArgSpec = re.compile(
r'^(paddle[^,]+)\s+\(ArgSpec.*document\W*([0-9a-z]{32})') r'^(paddle[^,]+)\s+\(ArgSpec.*document\W*([0-9a-z]{32})')
...@@ -487,6 +494,28 @@ def get_api_md5(path): ...@@ -487,6 +494,28 @@ def get_api_md5(path):
return api_md5 return api_md5
def get_full_api():
"""
get all the apis
"""
global API_DIFF_SPEC_FN ## readonly
from print_signatures import get_all_api_from_modulelist
member_dict = get_all_api_from_modulelist()
with open(API_DIFF_SPEC_FN, 'w') as f:
f.write("\n".join(member_dict.keys()))
def get_full_api_by_walk():
"""
get all the apis
"""
global API_DIFF_SPEC_FN ## readonly
from print_signatures import get_all_api
apilist = get_all_api()
with open(API_DIFF_SPEC_FN, 'w') as f:
f.write("\n".join(apilist))
def get_incrementapi(): def get_incrementapi():
''' '''
this function will get the apis that difference between API_DEV.spec and API_PR.spec. this function will get the apis that difference between API_DEV.spec and API_PR.spec.
...@@ -526,6 +555,7 @@ def parse_args(): ...@@ -526,6 +555,7 @@ def parse_args():
# help='Use CPU mode (overrides --gpu)') # help='Use CPU mode (overrides --gpu)')
# parser.add_argument('--gpu', dest='gpu_mode', action="store_true") # parser.add_argument('--gpu', dest='gpu_mode', action="store_true")
parser.add_argument('--debug', dest='debug', action="store_true") parser.add_argument('--debug', dest='debug', action="store_true")
parser.add_argument('--full-test', dest='full_test', action="store_true")
parser.add_argument('mode', type=str, help='run on device', default='cpu') parser.add_argument('mode', type=str, help='run on device', default='cpu')
for item in arguments: for item in arguments:
parser.add_argument( parser.add_argument(
...@@ -545,6 +575,8 @@ if __name__ == '__main__': ...@@ -545,6 +575,8 @@ if __name__ == '__main__':
args = parse_args() args = parse_args()
if args.debug: if args.debug:
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
if args.logf: if args.logf:
logfHandler = logging.FileHandler(args.logf) logfHandler = logging.FileHandler(args.logf)
logfHandler.setFormatter( logfHandler.setFormatter(
...@@ -573,7 +605,7 @@ if __name__ == '__main__': ...@@ -573,7 +605,7 @@ if __name__ == '__main__':
else: else:
os.mkdir(SAMPLECODE_TEMPDIR) os.mkdir(SAMPLECODE_TEMPDIR)
filenames = get_filenames() filenames = get_filenames(args.full_test)
if len(filenames) == 0 and len(whl_error) == 0: if len(filenames) == 0 and len(whl_error) == 0:
logger.info("-----API_PR.spec is the same as API_DEV.spec-----") logger.info("-----API_PR.spec is the same as API_DEV.spec-----")
exit(0) exit(0)
...@@ -593,6 +625,8 @@ if __name__ == '__main__': ...@@ -593,6 +625,8 @@ if __name__ == '__main__':
if not args.debug: if not args.debug:
shutil.rmtree(SAMPLECODE_TEMPDIR) shutil.rmtree(SAMPLECODE_TEMPDIR)
stdout_handler = logging.StreamHandler(stream=sys.stdout)
logger.addHandler(stdout_handler)
logger.info("----------------End of the Check--------------------") logger.info("----------------End of the Check--------------------")
if len(whl_error) != 0: if len(whl_error) != 0:
logger.info("%s is not in whl.", whl_error) logger.info("%s is not in whl.", whl_error)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册