diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py index db1e8341f4f25cd871265b51b86cbbc7e68f112e..8de96476818d749ab8c164543c59a8aafc320c75 100644 --- a/paddlespeech/cli/asr/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -31,6 +31,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.transform.transformation import Transformation from paddlespeech.s2t.utils.dynamic_import import dynamic_import @@ -425,6 +426,7 @@ class ASRExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, audio_file: os.PathLike, model: str='conformer_wenetspeech', diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index c31ad36105e599596fc023b87496f37d993b399c..52bc1972df9b726c068f3a804ef7b0c37be88aca 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -26,6 +26,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddleaudio import load from paddleaudio.features import LogMelSpectrogram from paddlespeech.s2t.utils.dynamic_import import dynamic_import @@ -245,6 +246,7 @@ class CLSExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, audio_file: os.PathLike, model: str='panns_cnn14', diff --git a/paddlespeech/cli/st/infer.py b/paddlespeech/cli/st/infer.py index 553b025f022b064e936a9312624249106c3c95bb..d6bd6304dfeed14bcb6b94496d381d6f407d2de6 100644 --- a/paddlespeech/cli/st/infer.py +++ b/paddlespeech/cli/st/infer.py @@ -30,6 +30,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.s2t.utils.utility import UpdateConfig @@ -334,6 +335,7 @@ class STExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, audio_file: os.PathLike, model: str='fat_st_ted', diff --git a/paddlespeech/cli/text/infer.py b/paddlespeech/cli/text/infer.py index da9c5fe05710d9262e8ab8461020820720ed481d..1cef8fcfd2919f1dc9ede0baa22b2f1e80eb9d2c 100644 --- a/paddlespeech/cli/text/infer.py +++ b/paddlespeech/cli/text/infer.py @@ -26,6 +26,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper __all__ = ['TextExecutor'] @@ -272,6 +273,7 @@ class TextExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__( self, text: str, diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py index 75470e89768f4168c37757c0f363f71fb3bf0d6a..d66bc30d90a8289640f524c01c5d1c008793c765 100644 --- a/paddlespeech/cli/tts/infer.py +++ b/paddlespeech/cli/tts/infer.py @@ -29,6 +29,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.t2s.frontend import English from paddlespeech.t2s.frontend.zh_frontend import Frontend @@ -645,6 +646,7 @@ class TTSExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, text: str, am: str='fastspeech2_csmsc', diff --git a/paddlespeech/cli/utils.py b/paddlespeech/cli/utils.py index ee31b771bd7a0e4cbbb2e5d29fa70ebc8e8a2de4..63b670c863111719c3158d82dc4517bf0dc29d6a 100644 --- a/paddlespeech/cli/utils.py +++ b/paddlespeech/cli/utils.py @@ -11,22 +11,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import hashlib +import inspect +import json import os import tarfile +import threading +import time +import uuid import zipfile from typing import Any from typing import Dict +import paddle +import paddleaudio +import requests +import yaml from paddle.framework import load from . import download +from .. import __version__ from .entry import commands +requests.adapters.DEFAULT_RETRIES = 3 + __all__ = [ 'cli_register', 'get_command', 'download_and_decompress', 'load_state_dict_from_url', + 'stats_wrapper', ] @@ -101,6 +115,13 @@ def download_and_decompress(archive: Dict[str, str], path: str) -> os.PathLike: if not os.path.isdir(uncompress_path): download._decompress(filepath) else: + StatsWorker( + task='download', + version=__version__, + extra_info={ + 'download_url': archive['url'], + 'paddle_version': paddle.__version__ + }).start() uncompress_path = download.get_path_from_url(archive['url'], path, archive['md5']) @@ -146,3 +167,171 @@ def _get_sub_home(directory): PPSPEECH_HOME = _get_paddlespcceh_home() MODEL_HOME = _get_sub_home('models') +CONF_HOME = _get_sub_home('conf') + + +def _md5(text: str): + '''Calculate the md5 value of the input text.''' + md5code = hashlib.md5(text.encode()) + return md5code.hexdigest() + + +class ConfigCache: + def __init__(self): + self._data = {} + self._initialize() + self.file = os.path.join(CONF_HOME, 'cache.yaml') + if not os.path.exists(self.file): + self.flush() + return + + with open(self.file, 'r') as file: + try: + cfg = yaml.load(file, Loader=yaml.FullLoader) + self._data.update(cfg) + except: + self.flush() + + @property + def cache_info(self): + return self._data['cache_info'] + + def _initialize(self): + # Set default configuration values. + cache_info = _md5(str(uuid.uuid1())[-12:]) + "-" + str(int(time.time())) + self._data['cache_info'] = cache_info + + def flush(self): + '''Flush the current configuration into the configuration file.''' + with open(self.file, 'w') as file: + cfg = json.loads(json.dumps(self._data)) + yaml.dump(cfg, file) + + +stats_api = "http://paddlepaddle.org.cn/paddlehub/stat" +cache_info = ConfigCache().cache_info + + +class StatsWorker(threading.Thread): + def __init__(self, + task="asr", + model=None, + version=__version__, + extra_info={}): + threading.Thread.__init__(self) + self._task = task + self._model = model + self._version = version + self._extra_info = extra_info + + def run(self): + params = { + 'task': self._task, + 'version': self._version, + 'from': 'ppspeech' + } + if self._model: + params['model'] = self._model + + self._extra_info.update({ + 'cache_info': cache_info, + }) + params.update({"extra": json.dumps(self._extra_info)}) + + try: + requests.get(stats_api, params) + except Exception: + pass + + return + + +def _note_one_stat(cls_name, params={}): + task = cls_name.replace('Executor', '').lower() # XXExecutor + extra_info = { + 'paddle_version': paddle.__version__, + } + + if 'model' in params: + model = params['model'] + else: + model = None + + if 'audio_file' in params: + try: + _, sr = paddleaudio.load(params['audio_file']) + except Exception: + sr = -1 + + if task == 'asr': + extra_info.update({ + 'lang': params['lang'], + 'inp_sr': sr, + 'model_sr': params['sample_rate'], + }) + elif task == 'st': + extra_info.update({ + 'lang': + params['src_lang'] + '-' + params['tgt_lang'], + 'inp_sr': + sr, + 'model_sr': + params['sample_rate'], + }) + elif task == 'tts': + model = params['am'] + extra_info.update({ + 'lang': params['lang'], + 'vocoder': params['voc'], + }) + elif task == 'cls': + extra_info.update({ + 'inp_sr': sr, + }) + elif task == 'text': + extra_info.update({ + 'sub_task': params['task'], + 'lang': params['lang'], + }) + else: + return + + StatsWorker( + task=task, + model=model, + version=__version__, + extra_info=extra_info, ).start() + + +def _parse_args(func, *args, **kwargs): + # FullArgSpec(args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, annotations) + argspec = inspect.getfullargspec(func) + + keys = argspec[0] + if keys[0] == 'self': # Remove self pointer. + keys = keys[1:] + + default_values = argspec[3] + values = [None] * (len(keys) - len(default_values)) + values.extend(list(default_values)) + params = dict(zip(keys, values)) + + for idx, v in enumerate(args): + params[keys[idx]] = v + for k, v in kwargs.items(): + params[k] = v + + return params + + +def stats_wrapper(executor_func): + def _warpper(self, *args, **kwargs): + try: + _note_one_stat( + type(self).__name__, _parse_args(executor_func, *args, + **kwargs)) + except Exception: + pass + return executor_func(self, *args, **kwargs) + + return _warpper