From 97794eca9200515bbe4e771ebcf2e048d13500ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=20Wei=20=28=E4=BB=BB=E5=8D=AB=29?= Date: Tue, 27 Apr 2021 13:10:07 +0800 Subject: [PATCH] str in python2 is different to python3's, it make mistakes for some api's docstring (#32588) * UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 1788: ordinal not in range(128) test=document_fix str(doc) in python2 test=document_fix * update md5 function in count_api_without_core_ops.py str in py2 is different. test=document_fix --- tools/count_api_without_core_ops.py | 18 +++++++++++++++--- tools/print_signatures.py | 18 +++++++++++++++--- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/tools/count_api_without_core_ops.py b/tools/count_api_without_core_ops.py index 99e84074158..664b94a059f 100644 --- a/tools/count_api_without_core_ops.py +++ b/tools/count_api_without_core_ops.py @@ -22,6 +22,7 @@ import pydoc import hashlib import six import functools +import platform __all__ = ['get_apis_with_and_without_core_ops', ] @@ -34,9 +35,20 @@ omitted_list = [ def md5(doc): - hash = hashlib.md5() - hash.update(str(doc).encode('utf-8')) - return hash.hexdigest() + try: + hashinst = hashlib.md5() + if platform.python_version()[0] == "2": + hashinst.update(str(doc)) + else: + hashinst.update(str(doc).encode('utf-8')) + md5sum = hashinst.hexdigest() + except UnicodeDecodeError as e: + md5sum = None + print( + "Error({}) occurred when `md5({})`, discard it.".format( + str(e), doc), + file=sys.stderr) + return md5sum def split_with_and_without_core_ops(member, cur_name): diff --git a/tools/print_signatures.py b/tools/print_signatures.py index cfe34fa3426..6de9d84379f 100644 --- a/tools/print_signatures.py +++ b/tools/print_signatures.py @@ -34,9 +34,21 @@ visited_modules = set() def md5(doc): - hash = hashlib.md5() - hash.update(str(doc).encode('utf-8')) - return hash.hexdigest() + try: + hashinst = hashlib.md5() + if platform.python_version()[0] == "2": + hashinst.update(str(doc)) + else: + hashinst.update(str(doc).encode('utf-8')) + md5sum = hashinst.hexdigest() + except UnicodeDecodeError as e: + md5sum = None + print( + "Error({}) occurred when `md5({})`, discard it.".format( + str(e), doc), + file=sys.stderr) + + return md5sum def get_functools_partial_spec(func): -- GitLab