diff --git a/paddlespeech/cli/README.md b/paddlespeech/cli/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4cea85b1484ebb499e812423e6ab61f78a06f18f --- /dev/null +++ b/paddlespeech/cli/README.md @@ -0,0 +1,9 @@ +# PaddleSpeech Command Line + + The simplest approach to use PaddleSpeech models. + + ## Help + `paddlespeech help` + + ## S2T + `paddlespeech s2t --config ./s2t.yaml --input ./zh.wav --device gpu` diff --git a/paddlespeech/cli/__init__.py b/paddlespeech/cli/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1cc7e27f58099501282fa6d6ea4373b745b788f7 --- /dev/null +++ b/paddlespeech/cli/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .base_commands import BaseCommand +from .base_commands import HelpCommand +from .s2t import S2TExecutor diff --git a/paddlespeech/cli/base_commands.py b/paddlespeech/cli/base_commands.py new file mode 100644 index 0000000000000000000000000000000000000000..97d5cd7fa3ad30ee2338b50cfd5123fe4cd99d05 --- /dev/null +++ b/paddlespeech/cli/base_commands.py @@ -0,0 +1,49 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List + +from .entry import commands +from .utils import cli_register +from .utils import get_command + +__all__ = [ + 'BaseCommand', + 'HelpCommand', +] + + +@cli_register(name='paddlespeech') +class BaseCommand: + def execute(self, argv: List[str]) -> bool: + help = get_command('paddlespeech.help') + return help().execute(argv) + + +@cli_register(name='paddlespeech.help', description='Show help for commands.') +class HelpCommand: + def execute(self, argv: List[str]) -> bool: + msg = 'Usage:\n' + msg += ' paddlespeech \n\n' + msg += 'Commands:\n' + for command, detail in commands['paddlespeech'].items(): + if command.startswith('_'): + continue + + if '_description' not in detail: + continue + msg += ' {:<15} {}\n'.format(command, + detail['_description']) + + print(msg) + return True diff --git a/paddlespeech/cli/cls/__init.__py b/paddlespeech/cli/cls/__init.__py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/paddlespeech/cli/entry.py b/paddlespeech/cli/entry.py new file mode 100644 index 0000000000000000000000000000000000000000..726cff1afd6832ef36c4a3ad7e9d197063e562e3 --- /dev/null +++ b/paddlespeech/cli/entry.py @@ -0,0 +1,38 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +from collections import defaultdict + +__all__ = ['commands'] + + +def _CommandDict(): + return defaultdict(_CommandDict) + + +def _execute(): + com = commands + for idx, _argv in enumerate(['paddlespeech'] + sys.argv[1:]): + if _argv not in com: + break + com = com[_argv] + + # The method 'execute' of a command instance returns 'True' for a success + # while 'False' for a failure. Here converts this result into a exit status + # in bash: 0 for a success and 1 for a failure. + status = 0 if com['_entry']().execute(sys.argv[idx:]) else 1 + return status + + +commands = _CommandDict() diff --git a/paddlespeech/cli/executor.py b/paddlespeech/cli/executor.py new file mode 100644 index 0000000000000000000000000000000000000000..45472fa4b62de440ed27c737fb9dbc1349ff49b2 --- /dev/null +++ b/paddlespeech/cli/executor.py @@ -0,0 +1,67 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from abc import ABC +from abc import abstractmethod +from typing import Optional +from typing import Union + +import paddle + + +class BaseExecutor(ABC): + """ + An abstract executor of paddlespeech tasks. + """ + + def __init__(self): + self.input = None + self.output = None + + @abstractmethod + def _get_default_cfg_path(self): + """ + Returns a default config file path of current task. + """ + pass + + @abstractmethod + def _init_from_cfg(self, cfg_path: Optional[os.PathLike]=None): + """ + Init model from a specific config file. + """ + pass + + @abstractmethod + def preprocess(self, input: Union[str, os.PathLike]): + """ + Input preprocess and return paddle.Tensor stored in self.input. + Input content can be a text(t2s), a file(s2t, cls) or a streaming(not supported yet). + """ + pass + + @paddle.no_grad() + @abstractmethod + def infer(self, device: str): + """ + Model inference and result stored in self.output. + """ + pass + + @abstractmethod + def postprocess(self) -> Union[str, os.PathLike]: + """ + Output postprocess and return human-readable results such as texts and audio files. + """ + pass diff --git a/paddlespeech/cli/s2t/__init__.py b/paddlespeech/cli/s2t/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..57e814b9eb792d014108f3c29aad204f98382c99 --- /dev/null +++ b/paddlespeech/cli/s2t/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .infer import S2TExecutor diff --git a/paddlespeech/cli/s2t/conf/default_conf.yaml b/paddlespeech/cli/s2t/conf/default_conf.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/paddlespeech/cli/s2t/infer.py b/paddlespeech/cli/s2t/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..682279852cd9ca7787a013257f30a576000324e6 --- /dev/null +++ b/paddlespeech/cli/s2t/infer.py @@ -0,0 +1,103 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import os +from typing import List +from typing import Optional +from typing import Union + +import paddle + +from ..executor import BaseExecutor +from ..utils import cli_register + +__all__ = ['S2TExecutor'] + + +@cli_register( + name='paddlespeech.s2t', description='Speech to text infer command.') +class S2TExecutor(BaseExecutor): + def __init__(self): + super(S2TExecutor, self).__init__() + + self.parser = argparse.ArgumentParser( + prog='paddlespeech.s2t', add_help=True) + self.parser.add_argument( + '--config', + type=str, + default=None, + help='Config of s2t task. Use deault config when it is None.') + self.parser.add_argument( + '--input', type=str, help='Audio file to recognize.') + self.parser.add_argument( + '--device', + type=str, + default='cpu', + help='Choose device to execute model inference.') + + def _get_default_cfg_path(self): + """ + Returns a default config file path of current task. + """ + pass + + def _init_from_cfg(self, cfg_path: Optional[os.PathLike]=None): + """ + Init model from a specific config file. + """ + pass + + def preprocess(self, input: Union[str, os.PathLike]): + """ + Input preprocess and return paddle.Tensor stored in self.input. + Input content can be a text(t2s), a file(s2t, cls) or a streaming(not supported yet). + """ + pass + + @paddle.no_grad() + def infer(self): + """ + Model inference and result stored in self.output. + """ + pass + + def postprocess(self) -> Union[str, os.PathLike]: + """ + Output postprocess and return human-readable results such as texts and audio files. + """ + pass + + def execute(self, argv: List[str]) -> bool: + parser_args = self.parser.parse_args(argv) + print(parser_args) + + config = parser_args.config + audio_file = parser_args.input + device = parser_args.device + + if config is not None: + assert os.path.isfile(config), 'Config file is not valid.' + else: + config = self._get_default_cfg_path() + + try: + self._init_from_cfg(config) + self.preprocess(audio_file) + self.infer() + res = self.postprocess() # Retrieve result of s2t. + print(res) + return True + except Exception as e: + print(e) + return False diff --git a/paddlespeech/cli/t2s/__init.__py b/paddlespeech/cli/t2s/__init.__py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/paddlespeech/cli/utils.py b/paddlespeech/cli/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c83deee890763ec4bd1e25c99b1d22272b224712 --- /dev/null +++ b/paddlespeech/cli/utils.py @@ -0,0 +1,86 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from typing import Any +from typing import Dict +from typing import List + +from paddle.framework import load +from paddle.utils import download + +from .entry import commands + +__all__ = [ + 'cli_register', + 'get_command', + 'download_and_decompress', + 'load_state_dict_from_url', +] + + +def cli_register(name: str, description: str='') -> Any: + def _warpper(command): + items = name.split('.') + + com = commands + for item in items: + com = com[item] + com['_entry'] = command + if description: + com['_description'] = description + return command + + return _warpper + + +def get_command(name: str) -> Any: + items = name.split('.') + com = commands + for item in items: + com = com[item] + + return com['_entry'] + + +def decompress(file: str): + """ + Extracts all files from a compressed file. + """ + assert os.path.isfile(file), "File: {} not exists.".format(file) + download._decompress(file) + + +def download_and_decompress(archives: List[Dict[str, str]], path: str): + """ + Download archieves and decompress to specific path. + """ + if not os.path.isdir(path): + os.makedirs(path) + + for archive in archives: + assert 'url' in archive and 'md5' in archive, \ + 'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}' + + download.get_path_from_url(archive['url'], path, archive['md5']) + + +def load_state_dict_from_url(url: str, path: str, md5: str=None): + """ + Download and load a state dict from url + """ + if not os.path.isdir(path): + os.makedirs(path) + + download.get_path_from_url(url, path, md5) + return load(os.path.join(path, os.path.basename(url))) diff --git a/setup.py b/setup.py index 310eed1e75b1b6e489df467707dd60c959a1c4e1..a4ce181a9675fd470ce7f0da6cb74e0a2f454f2c 100644 --- a/setup.py +++ b/setup.py @@ -187,6 +187,9 @@ setup_info = dict( 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', - ], ) + ], + entry_points={ + 'console_scripts': ['paddlespeech=paddlespeech.cli.entry:_execute'] + }) setup(**setup_info)