From a810cd4e5cec177f76535d7d7537030625e5e7a0 Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Thu, 6 Jan 2022 15:32:36 +0800 Subject: [PATCH] Add cli logging. (#1274) --- paddlespeech/cli/asr/infer.py | 2 + paddlespeech/cli/cls/infer.py | 2 + paddlespeech/cli/st/infer.py | 2 + paddlespeech/cli/text/infer.py | 2 + paddlespeech/cli/tts/infer.py | 2 + paddlespeech/cli/utils.py | 189 +++++++++++++++++++++++++++++++++ 6 files changed, 199 insertions(+) diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py index db1e8341..8de96476 100644 --- a/paddlespeech/cli/asr/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -31,6 +31,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.transform.transformation import Transformation from paddlespeech.s2t.utils.dynamic_import import dynamic_import @@ -425,6 +426,7 @@ class ASRExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, audio_file: os.PathLike, model: str='conformer_wenetspeech', diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index c31ad361..52bc1972 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -26,6 +26,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddleaudio import load from paddleaudio.features import LogMelSpectrogram from paddlespeech.s2t.utils.dynamic_import import dynamic_import @@ -245,6 +246,7 @@ class CLSExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, audio_file: os.PathLike, model: str='panns_cnn14', diff --git a/paddlespeech/cli/st/infer.py b/paddlespeech/cli/st/infer.py index 553b025f..d6bd6304 100644 --- a/paddlespeech/cli/st/infer.py +++ b/paddlespeech/cli/st/infer.py @@ -30,6 +30,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.s2t.utils.utility import UpdateConfig @@ -334,6 +335,7 @@ class STExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, audio_file: os.PathLike, model: str='fat_st_ted', diff --git a/paddlespeech/cli/text/infer.py b/paddlespeech/cli/text/infer.py index da9c5fe0..1cef8fcf 100644 --- a/paddlespeech/cli/text/infer.py +++ b/paddlespeech/cli/text/infer.py @@ -26,6 +26,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper __all__ = ['TextExecutor'] @@ -272,6 +273,7 @@ class TextExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__( self, text: str, diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py index 75470e89..d66bc30d 100644 --- a/paddlespeech/cli/tts/infer.py +++ b/paddlespeech/cli/tts/infer.py @@ -29,6 +29,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.t2s.frontend import English from paddlespeech.t2s.frontend.zh_frontend import Frontend @@ -645,6 +646,7 @@ class TTSExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, text: str, am: str='fastspeech2_csmsc', diff --git a/paddlespeech/cli/utils.py b/paddlespeech/cli/utils.py index ee31b771..63b670c8 100644 --- a/paddlespeech/cli/utils.py +++ b/paddlespeech/cli/utils.py @@ -11,22 +11,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import hashlib +import inspect +import json import os import tarfile +import threading +import time +import uuid import zipfile from typing import Any from typing import Dict +import paddle +import paddleaudio +import requests +import yaml from paddle.framework import load from . import download +from .. import __version__ from .entry import commands +requests.adapters.DEFAULT_RETRIES = 3 + __all__ = [ 'cli_register', 'get_command', 'download_and_decompress', 'load_state_dict_from_url', + 'stats_wrapper', ] @@ -101,6 +115,13 @@ def download_and_decompress(archive: Dict[str, str], path: str) -> os.PathLike: if not os.path.isdir(uncompress_path): download._decompress(filepath) else: + StatsWorker( + task='download', + version=__version__, + extra_info={ + 'download_url': archive['url'], + 'paddle_version': paddle.__version__ + }).start() uncompress_path = download.get_path_from_url(archive['url'], path, archive['md5']) @@ -146,3 +167,171 @@ def _get_sub_home(directory): PPSPEECH_HOME = _get_paddlespcceh_home() MODEL_HOME = _get_sub_home('models') +CONF_HOME = _get_sub_home('conf') + + +def _md5(text: str): + '''Calculate the md5 value of the input text.''' + md5code = hashlib.md5(text.encode()) + return md5code.hexdigest() + + +class ConfigCache: + def __init__(self): + self._data = {} + self._initialize() + self.file = os.path.join(CONF_HOME, 'cache.yaml') + if not os.path.exists(self.file): + self.flush() + return + + with open(self.file, 'r') as file: + try: + cfg = yaml.load(file, Loader=yaml.FullLoader) + self._data.update(cfg) + except: + self.flush() + + @property + def cache_info(self): + return self._data['cache_info'] + + def _initialize(self): + # Set default configuration values. + cache_info = _md5(str(uuid.uuid1())[-12:]) + "-" + str(int(time.time())) + self._data['cache_info'] = cache_info + + def flush(self): + '''Flush the current configuration into the configuration file.''' + with open(self.file, 'w') as file: + cfg = json.loads(json.dumps(self._data)) + yaml.dump(cfg, file) + + +stats_api = "http://paddlepaddle.org.cn/paddlehub/stat" +cache_info = ConfigCache().cache_info + + +class StatsWorker(threading.Thread): + def __init__(self, + task="asr", + model=None, + version=__version__, + extra_info={}): + threading.Thread.__init__(self) + self._task = task + self._model = model + self._version = version + self._extra_info = extra_info + + def run(self): + params = { + 'task': self._task, + 'version': self._version, + 'from': 'ppspeech' + } + if self._model: + params['model'] = self._model + + self._extra_info.update({ + 'cache_info': cache_info, + }) + params.update({"extra": json.dumps(self._extra_info)}) + + try: + requests.get(stats_api, params) + except Exception: + pass + + return + + +def _note_one_stat(cls_name, params={}): + task = cls_name.replace('Executor', '').lower() # XXExecutor + extra_info = { + 'paddle_version': paddle.__version__, + } + + if 'model' in params: + model = params['model'] + else: + model = None + + if 'audio_file' in params: + try: + _, sr = paddleaudio.load(params['audio_file']) + except Exception: + sr = -1 + + if task == 'asr': + extra_info.update({ + 'lang': params['lang'], + 'inp_sr': sr, + 'model_sr': params['sample_rate'], + }) + elif task == 'st': + extra_info.update({ + 'lang': + params['src_lang'] + '-' + params['tgt_lang'], + 'inp_sr': + sr, + 'model_sr': + params['sample_rate'], + }) + elif task == 'tts': + model = params['am'] + extra_info.update({ + 'lang': params['lang'], + 'vocoder': params['voc'], + }) + elif task == 'cls': + extra_info.update({ + 'inp_sr': sr, + }) + elif task == 'text': + extra_info.update({ + 'sub_task': params['task'], + 'lang': params['lang'], + }) + else: + return + + StatsWorker( + task=task, + model=model, + version=__version__, + extra_info=extra_info, ).start() + + +def _parse_args(func, *args, **kwargs): + # FullArgSpec(args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, annotations) + argspec = inspect.getfullargspec(func) + + keys = argspec[0] + if keys[0] == 'self': # Remove self pointer. + keys = keys[1:] + + default_values = argspec[3] + values = [None] * (len(keys) - len(default_values)) + values.extend(list(default_values)) + params = dict(zip(keys, values)) + + for idx, v in enumerate(args): + params[keys[idx]] = v + for k, v in kwargs.items(): + params[k] = v + + return params + + +def stats_wrapper(executor_func): + def _warpper(self, *args, **kwargs): + try: + _note_one_stat( + type(self).__name__, _parse_args(executor_func, *args, + **kwargs)) + except Exception: + pass + return executor_func(self, *args, **kwargs) + + return _warpper -- GitLab