diff --git a/paddlespeech/cli/README.md b/paddlespeech/cli/README.md index 4cea85b1484ebb499e812423e6ab61f78a06f18f..bd6572f194edb685ddb20f0cad5cb7687591f98a 100644 --- a/paddlespeech/cli/README.md +++ b/paddlespeech/cli/README.md @@ -5,5 +5,5 @@ ## Help `paddlespeech help` - ## S2T - `paddlespeech s2t --config ./s2t.yaml --input ./zh.wav --device gpu` + ## ASR + `paddlespeech asr --input ./test_audio.wav --device gpu` diff --git a/paddlespeech/cli/__init__.py b/paddlespeech/cli/__init__.py index 1cc7e27f58099501282fa6d6ea4373b745b788f7..7e03290412396e68c12aa98a0cdb6696bcccd2bd 100644 --- a/paddlespeech/cli/__init__.py +++ b/paddlespeech/cli/__init__.py @@ -11,6 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from .asr import ASRExecutor from .base_commands import BaseCommand from .base_commands import HelpCommand -from .s2t import S2TExecutor diff --git a/paddlespeech/cli/s2t/__init__.py b/paddlespeech/cli/asr/__init__.py similarity index 95% rename from paddlespeech/cli/s2t/__init__.py rename to paddlespeech/cli/asr/__init__.py index 57e814b9eb792d014108f3c29aad204f98382c99..8ab0991fcda4c5eb9e5bc0c58de0e417c113f4b4 100644 --- a/paddlespeech/cli/s2t/__init__.py +++ b/paddlespeech/cli/asr/__init__.py @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .infer import S2TExecutor +from .infer import ASRExecutor diff --git a/paddlespeech/cli/s2t/infer.py b/paddlespeech/cli/asr/infer.py similarity index 95% rename from paddlespeech/cli/s2t/infer.py rename to paddlespeech/cli/asr/infer.py index b3507cb60f750fa5b314242ec067c331749e873b..605163803a7e5b3e3b816060d7d37fd804d6a3a8 100644 --- a/paddlespeech/cli/s2t/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -33,7 +33,7 @@ from paddlespeech.s2t.transform.transformation import Transformation from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.s2t.utils.utility import UpdateConfig -__all__ = ['S2TExecutor'] +__all__ = ['ASRExecutor'] pretrained_models = { "wenetspeech_zh": { @@ -58,13 +58,15 @@ model_alias = { @cli_register( - name='paddlespeech.s2t', description='Speech to text infer command.') -class S2TExecutor(BaseExecutor): + name='paddlespeech.asr', description='Speech to text infer command.') +class ASRExecutor(BaseExecutor): def __init__(self): - super(S2TExecutor, self).__init__() + super(ASRExecutor, self).__init__() self.parser = argparse.ArgumentParser( - prog='paddlespeech.s2t', add_help=True) + prog='paddlespeech.asr', add_help=True) + self.parser.add_argument( + '--input', type=str, required=True, help='Audio file to recognize.') self.parser.add_argument( '--model', type=str, @@ -76,16 +78,12 @@ class S2TExecutor(BaseExecutor): '--config', type=str, default=None, - help='Config of s2t task. Use deault config when it is None.') + help='Config of asr task. Use deault config when it is None.') self.parser.add_argument( '--ckpt_path', type=str, default=None, help='Checkpoint file of model.') - self.parser.add_argument( - '--input', - type=str, - help='Audio file to recognize.') self.parser.add_argument( '--device', type=str, @@ -178,13 +176,12 @@ class S2TExecutor(BaseExecutor): def preprocess(self, input: Union[str, os.PathLike]): """ Input preprocess and return paddle.Tensor stored in self.input. - Input content can be a text(t2s), a file(s2t, cls) or a streaming(not supported yet). + Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet). """ parser_args = self.parser_args config = self.config audio_file = input - #print("audio_file", audio_file) logger.info("audio_file" + audio_file) self.sr = config.collator.target_sample_rate @@ -290,7 +287,6 @@ class S2TExecutor(BaseExecutor): Command line entry. """ self.parser_args = self.parser.parse_args(argv) - print(self.parser_args) model = self.parser_args.model lang = self.parser_args.lang @@ -301,7 +297,7 @@ class S2TExecutor(BaseExecutor): try: res = self(model, lang, config, ckpt_path, audio_file, device) - print(res) + logger.info('ASR Result: {}'.format(res)) return True except Exception as e: print(e) @@ -314,6 +310,6 @@ class S2TExecutor(BaseExecutor): self._init_from_path(model, lang, config, ckpt_path) self.preprocess(audio_file) self.infer() - res = self.postprocess() # Retrieve result of s2t. + res = self.postprocess() # Retrieve result of asr. return res diff --git a/paddlespeech/cli/entry.py b/paddlespeech/cli/entry.py index 726cff1afd6832ef36c4a3ad7e9d197063e562e3..32123ece750457dac8ca90aff1a8731fea569188 100644 --- a/paddlespeech/cli/entry.py +++ b/paddlespeech/cli/entry.py @@ -23,9 +23,12 @@ def _CommandDict(): def _execute(): com = commands - for idx, _argv in enumerate(['paddlespeech'] + sys.argv[1:]): + + idx = 0 + for _argv in (['paddlespeech'] + sys.argv[1:]): if _argv not in com: break + idx += 1 com = com[_argv] # The method 'execute' of a command instance returns 'True' for a success diff --git a/paddlespeech/cli/executor.py b/paddlespeech/cli/executor.py index 2314bd6d3ad41d88c7e182fc79d2cefc81bbc02b..e307a287b7fa127c747c7d8b23bffbd30a44bb98 100644 --- a/paddlespeech/cli/executor.py +++ b/paddlespeech/cli/executor.py @@ -47,7 +47,7 @@ class BaseExecutor(ABC): def preprocess(self, input: Union[str, os.PathLike]): """ Input preprocess and return paddle.Tensor stored in self.input. - Input content can be a text(t2s), a file(s2t, cls) or a streaming(not supported yet). + Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet). """ pass diff --git a/paddlespeech/cli/t2s/__init.__py b/paddlespeech/cli/tts/__init.__py similarity index 100% rename from paddlespeech/cli/t2s/__init.__py rename to paddlespeech/cli/tts/__init.__py