diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py index db1e8341f4f25cd871265b51b86cbbc7e68f112e..8de96476818d749ab8c164543c59a8aafc320c75 100644 --- a/paddlespeech/cli/asr/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -31,6 +31,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.transform.transformation import Transformation from paddlespeech.s2t.utils.dynamic_import import dynamic_import @@ -425,6 +426,7 @@ class ASRExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, audio_file: os.PathLike, model: str='conformer_wenetspeech', diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index c31ad36105e599596fc023b87496f37d993b399c..52bc1972df9b726c068f3a804ef7b0c37be88aca 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -26,6 +26,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddleaudio import load from paddleaudio.features import LogMelSpectrogram from paddlespeech.s2t.utils.dynamic_import import dynamic_import @@ -245,6 +246,7 @@ class CLSExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, audio_file: os.PathLike, model: str='panns_cnn14', diff --git a/paddlespeech/cli/st/infer.py b/paddlespeech/cli/st/infer.py index 553b025f022b064e936a9312624249106c3c95bb..d6bd6304dfeed14bcb6b94496d381d6f407d2de6 100644 --- a/paddlespeech/cli/st/infer.py +++ b/paddlespeech/cli/st/infer.py @@ -30,6 +30,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.s2t.utils.utility import UpdateConfig @@ -334,6 +335,7 @@ class STExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, audio_file: os.PathLike, model: str='fat_st_ted', diff --git a/paddlespeech/cli/text/infer.py b/paddlespeech/cli/text/infer.py index da9c5fe05710d9262e8ab8461020820720ed481d..1cef8fcfd2919f1dc9ede0baa22b2f1e80eb9d2c 100644 --- a/paddlespeech/cli/text/infer.py +++ b/paddlespeech/cli/text/infer.py @@ -26,6 +26,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper __all__ = ['TextExecutor'] @@ -272,6 +273,7 @@ class TextExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__( self, text: str, diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py index 75470e89768f4168c37757c0f363f71fb3bf0d6a..d66bc30d90a8289640f524c01c5d1c008793c765 100644 --- a/paddlespeech/cli/tts/infer.py +++ b/paddlespeech/cli/tts/infer.py @@ -29,6 +29,7 @@ from ..log import logger from ..utils import cli_register from ..utils import download_and_decompress from ..utils import MODEL_HOME +from ..utils import stats_wrapper from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.t2s.frontend import English from paddlespeech.t2s.frontend.zh_frontend import Frontend @@ -645,6 +646,7 @@ class TTSExecutor(BaseExecutor): logger.exception(e) return False + @stats_wrapper def __call__(self, text: str, am: str='fastspeech2_csmsc', diff --git a/paddlespeech/cli/utils.py b/paddlespeech/cli/utils.py index ee31b771bd7a0e4cbbb2e5d29fa70ebc8e8a2de4..63b670c863111719c3158d82dc4517bf0dc29d6a 100644 --- a/paddlespeech/cli/utils.py +++ b/paddlespeech/cli/utils.py @@ -11,22 +11,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import hashlib +import inspect +import json import os import tarfile +import threading +import time +import uuid import zipfile from typing import Any from typing import Dict +import paddle +import paddleaudio +import requests +import yaml from paddle.framework import load from . import download +from .. import __version__ from .entry import commands +requests.adapters.DEFAULT_RETRIES = 3 + __all__ = [ 'cli_register', 'get_command', 'download_and_decompress', 'load_state_dict_from_url', + 'stats_wrapper', ] @@ -101,6 +115,13 @@ def download_and_decompress(archive: Dict[str, str], path: str) -> os.PathLike: if not os.path.isdir(uncompress_path): download._decompress(filepath) else: + StatsWorker( + task='download', + version=__version__, + extra_info={ + 'download_url': archive['url'], + 'paddle_version': paddle.__version__ + }).start() uncompress_path = download.get_path_from_url(archive['url'], path, archive['md5']) @@ -146,3 +167,171 @@ def _get_sub_home(directory): PPSPEECH_HOME = _get_paddlespcceh_home() MODEL_HOME = _get_sub_home('models') +CONF_HOME = _get_sub_home('conf') + + +def _md5(text: str): + '''Calculate the md5 value of the input text.''' + md5code = hashlib.md5(text.encode()) + return md5code.hexdigest() + + +class ConfigCache: + def __init__(self): + self._data = {} + self._initialize() + self.file = os.path.join(CONF_HOME, 'cache.yaml') + if not os.path.exists(self.file): + self.flush() + return + + with open(self.file, 'r') as file: + try: + cfg = yaml.load(file, Loader=yaml.FullLoader) + self._data.update(cfg) + except: + self.flush() + + @property + def cache_info(self): + return self._data['cache_info'] + + def _initialize(self): + # Set default configuration values. + cache_info = _md5(str(uuid.uuid1())[-12:]) + "-" + str(int(time.time())) + self._data['cache_info'] = cache_info + + def flush(self): + '''Flush the current configuration into the configuration file.''' + with open(self.file, 'w') as file: + cfg = json.loads(json.dumps(self._data)) + yaml.dump(cfg, file) + + +stats_api = "http://paddlepaddle.org.cn/paddlehub/stat" +cache_info = ConfigCache().cache_info + + +class StatsWorker(threading.Thread): + def __init__(self, + task="asr", + model=None, + version=__version__, + extra_info={}): + threading.Thread.__init__(self) + self._task = task + self._model = model + self._version = version + self._extra_info = extra_info + + def run(self): + params = { + 'task': self._task, + 'version': self._version, + 'from': 'ppspeech' + } + if self._model: + params['model'] = self._model + + self._extra_info.update({ + 'cache_info': cache_info, + }) + params.update({"extra": json.dumps(self._extra_info)}) + + try: + requests.get(stats_api, params) + except Exception: + pass + + return + + +def _note_one_stat(cls_name, params={}): + task = cls_name.replace('Executor', '').lower() # XXExecutor + extra_info = { + 'paddle_version': paddle.__version__, + } + + if 'model' in params: + model = params['model'] + else: + model = None + + if 'audio_file' in params: + try: + _, sr = paddleaudio.load(params['audio_file']) + except Exception: + sr = -1 + + if task == 'asr': + extra_info.update({ + 'lang': params['lang'], + 'inp_sr': sr, + 'model_sr': params['sample_rate'], + }) + elif task == 'st': + extra_info.update({ + 'lang': + params['src_lang'] + '-' + params['tgt_lang'], + 'inp_sr': + sr, + 'model_sr': + params['sample_rate'], + }) + elif task == 'tts': + model = params['am'] + extra_info.update({ + 'lang': params['lang'], + 'vocoder': params['voc'], + }) + elif task == 'cls': + extra_info.update({ + 'inp_sr': sr, + }) + elif task == 'text': + extra_info.update({ + 'sub_task': params['task'], + 'lang': params['lang'], + }) + else: + return + + StatsWorker( + task=task, + model=model, + version=__version__, + extra_info=extra_info, ).start() + + +def _parse_args(func, *args, **kwargs): + # FullArgSpec(args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, annotations) + argspec = inspect.getfullargspec(func) + + keys = argspec[0] + if keys[0] == 'self': # Remove self pointer. + keys = keys[1:] + + default_values = argspec[3] + values = [None] * (len(keys) - len(default_values)) + values.extend(list(default_values)) + params = dict(zip(keys, values)) + + for idx, v in enumerate(args): + params[keys[idx]] = v + for k, v in kwargs.items(): + params[k] = v + + return params + + +def stats_wrapper(executor_func): + def _warpper(self, *args, **kwargs): + try: + _note_one_stat( + type(self).__name__, _parse_args(executor_func, *args, + **kwargs)) + except Exception: + pass + return executor_func(self, *args, **kwargs) + + return _warpper diff --git a/paddlespeech/s2t/exps/u2/model.py b/paddlespeech/s2t/exps/u2/model.py index 6ab671ee351c12dd426126017b75637b8abacad5..992be5cd42376c4b0bc7bbf857a115f0ca037353 100644 --- a/paddlespeech/s2t/exps/u2/model.py +++ b/paddlespeech/s2t/exps/u2/model.py @@ -238,7 +238,9 @@ class U2Trainer(Trainer): preprocess_conf=config.preprocess_config, n_iter_processes=config.num_workers, subsampling_factor=1, - num_encs=1) + num_encs=1, + dist_sampler=False, + shortest_first=False) self.valid_loader = BatchDataLoader( json_file=config.dev_manifest, @@ -257,7 +259,9 @@ class U2Trainer(Trainer): preprocess_conf=config.preprocess_config, n_iter_processes=config.num_workers, subsampling_factor=1, - num_encs=1) + num_encs=1, + dist_sampler=False, + shortest_first=False) logger.info("Setup train/valid Dataloader!") else: decode_batch_size = config.get('decode', dict()).get( diff --git a/paddlespeech/s2t/io/dataloader.py b/paddlespeech/s2t/io/dataloader.py index 455303f70e167f12d621c9dc81fc879881df1674..920de34fc2144aa00ab988257b502a73cae31f33 100644 --- a/paddlespeech/s2t/io/dataloader.py +++ b/paddlespeech/s2t/io/dataloader.py @@ -78,7 +78,8 @@ class BatchDataLoader(): load_aux_input: bool=False, load_aux_output: bool=False, num_encs: int=1, - dist_sampler: bool=False): + dist_sampler: bool=False, + shortest_first: bool=False): self.json_file = json_file self.train_mode = train_mode self.use_sortagrad = sortagrad == -1 or sortagrad > 0 @@ -97,6 +98,7 @@ class BatchDataLoader(): self.load_aux_input = load_aux_input self.load_aux_output = load_aux_output self.dist_sampler = dist_sampler + self.shortest_first = shortest_first # read json data with jsonlines.open(json_file, 'r') as reader: @@ -113,7 +115,7 @@ class BatchDataLoader(): maxlen_out, minibatches, # for debug min_batch_size=mini_batch_size, - shortest_first=self.use_sortagrad, + shortest_first=self.shortest_first or self.use_sortagrad, count=batch_count, batch_bins=batch_bins, batch_frames_in=batch_frames_in, @@ -149,13 +151,13 @@ class BatchDataLoader(): self.reader) if self.dist_sampler: - self.sampler = DistributedBatchSampler( + self.batch_sampler = DistributedBatchSampler( dataset=self.dataset, batch_size=1, shuffle=not self.use_sortagrad if self.train_mode else False, drop_last=False, ) else: - self.sampler = BatchSampler( + self.batch_sampler = BatchSampler( dataset=self.dataset, batch_size=1, shuffle=not self.use_sortagrad if self.train_mode else False, @@ -163,7 +165,7 @@ class BatchDataLoader(): self.dataloader = DataLoader( dataset=self.dataset, - batch_sampler=self.sampler, + batch_sampler=self.batch_sampler, collate_fn=batch_collate, num_workers=self.n_iter_processes, ) @@ -194,5 +196,6 @@ class BatchDataLoader(): echo += f"load_aux_input: {self.load_aux_input}, " echo += f"load_aux_output: {self.load_aux_output}, " echo += f"dist_sampler: {self.dist_sampler}, " + echo += f"shortest_first: {self.shortest_first}, " echo += f"file: {self.json_file}" return echo diff --git a/paddlespeech/s2t/modules/ctc.py b/paddlespeech/s2t/modules/ctc.py index ffc9f038736d89224abb0275d3fe24ceb4a3ed71..6e9655799c42f5c2deb41bed873dd88774261ec4 100644 --- a/paddlespeech/s2t/modules/ctc.py +++ b/paddlespeech/s2t/modules/ctc.py @@ -39,10 +39,6 @@ except ImportError: except Exception as e: logger.info("paddlespeech_ctcdecoders not installed!") -#try: -#except Exception as e: -# logger.info("ctcdecoder not installed!") - __all__ = ['CTCDecoder'] diff --git a/paddlespeech/s2t/training/scheduler.py b/paddlespeech/s2t/training/scheduler.py index 0222246e8649bd2b934cc60483e35c9c49224fef..b22f7ef85081032e0fdb370c5883e775b2c64693 100644 --- a/paddlespeech/s2t/training/scheduler.py +++ b/paddlespeech/s2t/training/scheduler.py @@ -67,18 +67,19 @@ class WarmupLR(LRScheduler): super().__init__(learning_rate, last_epoch, verbose) def __repr__(self): - return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})" + return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps}, lr={self.base_lr}, last_epoch={self.last_epoch})" def get_lr(self): + # self.last_epoch start from zero step_num = self.last_epoch + 1 return self.base_lr * self.warmup_steps**0.5 * min( step_num**-0.5, step_num * self.warmup_steps**-1.5) def set_step(self, step: int=None): ''' - It will update the learning rate in optimizer according to current ``epoch`` . + It will update the learning rate in optimizer according to current ``epoch`` . The new learning rate will take effect on next ``optimizer.step`` . - + Args: step (int, None): specify current epoch. Default: None. Auto-increment from last_epoch=-1. Returns: @@ -94,7 +95,7 @@ class ConstantLR(LRScheduler): learning_rate (float): The initial learning rate. It is a python float number. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` . - + Returns: ``ConstantLR`` instance to schedule learning rate. """ diff --git a/paddlespeech/s2t/training/trainer.py b/paddlespeech/s2t/training/trainer.py index 4b2011eca8ad5e240d4067c43a74a5ab0c2da2ad..cac5e5704421f5ada788fe46c4764e8242d92339 100644 --- a/paddlespeech/s2t/training/trainer.py +++ b/paddlespeech/s2t/training/trainer.py @@ -222,7 +222,7 @@ class Trainer(): batch_sampler = self.train_loader.batch_sampler if isinstance(batch_sampler, paddle.io.DistributedBatchSampler): logger.debug( - f"train_loader.batch_sample set epoch: {self.epoch}") + f"train_loader.batch_sample.set_epoch: {self.epoch}") batch_sampler.set_epoch(self.epoch) def before_train(self):