diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md index 515abaf66975ebf660500c7e7c535a63ef31054b..a2f6f221320430aa9f5c0ede5265a836033060db 100644 --- a/demos/speech_server/README.md +++ b/demos/speech_server/README.md @@ -11,21 +11,14 @@ This demo is an implementation of starting the voice service and accessing the s see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). It is recommended to use **paddlepaddle 2.2.1** or above. -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from meduim and hard to install paddlespeech. ### 2. Prepare config File -The configuration file contains the service-related configuration files and the model configuration related to the voice tasks contained in the service. They are all under the `conf` folder. +The configuration file can be found in `conf/application.yaml` . +Among them, `engine_list` indicates the speech engine that will be included in the service to be started, in the format of _. +At present, the speech tasks integrated by the service include: asr (speech recognition) and tts (speech synthesis). +Currently the engine type supports two forms: python and inference (Paddle Inference) -**Note: The configuration of `engine_backend` in `application.yaml` represents all speech tasks included in the started service.** -If the service you want to start contains only a certain speech task, then you need to comment out the speech tasks that do not need to be included. For example, if you only want to use the speech recognition (ASR) service, then you can comment out the speech synthesis (TTS) service, as in the following example: -```bash -engine_backend: - asr: 'conf/asr/asr.yaml' - #tts: 'conf/tts/tts.yaml' -``` - -**Note: The configuration file of `engine_backend` in `application.yaml` needs to match the configuration type of `engine_type`.** -When the configuration file of `engine_backend` is `XXX.yaml`, the configuration type of `engine_type` needs to be set to `python`; when the configuration file of `engine_backend` is `XXX_pd.yaml`, the configuration of `engine_type` needs to be set type is `inference`; The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index da05b686e04e059358579b7582645b798e3514e7..762248a117f26786f44d062aef751d5a9b0549f3 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -11,20 +11,15 @@ 请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). 推荐使用 **paddlepaddle 2.2.1** 或以上版本。 -你可以从 easy,medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 +你可以从 medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 ### 2. 准备配置文件 -配置文件包含服务相关的配置文件和服务中包含的语音任务相关的模型配置。 它们都在 `conf` 文件夹下。 -**注意:`application.yaml` 中 `engine_backend` 的配置表示启动的服务中包含的所有语音任务。** -如果你想启动的服务中只包含某项语音任务,那么你需要注释掉不需要包含的语音任务。例如你只想使用语音识别(ASR)服务,那么你可以将语音合成(TTS)服务注释掉,如下示例: -```bash -engine_backend: - asr: 'conf/asr/asr.yaml' - #tts: 'conf/tts/tts.yaml' -``` -**注意:`application.yaml` 中 `engine_backend` 的配置文件需要和 `engine_type` 的配置类型匹配。** -当`engine_backend` 的配置文件为`XXX.yaml`时,需要设置`engine_type`的配置类型为`python`;当`engine_backend` 的配置文件为`XXX_pd.yaml`时,需要设置`engine_type`的配置类型为`inference`; +配置文件可参见 `conf/application.yaml` 。 +其中,`engine_list`表示即将启动的服务将会包含的语音引擎,格式为 <语音任务>_<引擎类型>。 +目前服务集成的语音任务有: asr(语音识别)、tts(语音合成)。 +目前引擎类型支持两种形式:python 及 inference (Paddle Inference) + 这个 ASR client 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。 diff --git a/demos/speech_server/conf/application.yaml b/demos/speech_server/conf/application.yaml index aba33a514366267f7bd97c8caff4fb3f69fe4030..6048450b7ba5ed928c39ad7ebad20354d7dd8442 100644 --- a/demos/speech_server/conf/application.yaml +++ b/demos/speech_server/conf/application.yaml @@ -1,27 +1,107 @@ # This is the parameter configuration file for PaddleSpeech Serving. -################################################################## -# SERVER SETTING # -################################################################## +################################################################################# +# SERVER SETTING # +################################################################################# host: 127.0.0.1 port: 8090 -################################################################## -# CONFIG FILE # -################################################################## -# add engine backend type (Options: asr, tts) and config file here. -# Adding a speech task to engine_backend means starting the service. -engine_backend: - asr: 'conf/asr/asr.yaml' - tts: 'conf/tts/tts.yaml' - -# The engine_type of speech task needs to keep the same type as the config file of speech task. -# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' -# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' -# -# add engine type (Options: python, inference) -engine_type: - asr: 'python' - tts: 'python' +# The task format in the engin_list is: _ +# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference'] +engine_list: ['asr_python', 'tts_python'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# +################### speech task: asr; engine_type: python ####################### +asr_python: + model: 'conformer_wenetspeech' + lang: 'zh' + sample_rate: 16000 + cfg_path: # [optional] + ckpt_path: # [optional] + decode_method: 'attention_rescoring' + force_yes: True + device: # set 'gpu:id' or 'cpu' + + +################### speech task: asr; engine_type: inference ####################### +asr_inference: + # model_type choices=['deepspeech2offline_aishell'] + model_type: 'deepspeech2offline_aishell' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + +################### speech task: tts; engine_type: python ####################### +tts_python: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', + # 'fastspeech2_ljspeech', 'fastspeech2_aishell3', + # 'fastspeech2_vctk'] + am: 'fastspeech2_csmsc' + am_config: + am_ckpt: + am_stat: + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', + # 'pwgan_vctk', 'mb_melgan_csmsc'] + voc: 'pwgan_csmsc' + voc_config: + voc_ckpt: + voc_stat: + + # others + lang: 'zh' + device: # set 'gpu:id' or 'cpu' + + +################### speech task: tts; engine_type: inference ####################### +tts_inference: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] + am: 'fastspeech2_csmsc' + am_model: # the pdmodel file of your am static model (XX.pdmodel) + am_params: # the pdiparams file of your am static model (XX.pdipparams) + am_sample_rate: 24000 + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] + voc: 'pwgan_csmsc' + voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) + voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) + voc_sample_rate: 24000 + + voc_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # others + lang: 'zh' diff --git a/demos/speech_server/conf/asr/asr.yaml b/demos/speech_server/conf/asr/asr.yaml deleted file mode 100644 index a6743b77513e504f2bcd374ea8235d8e39a7c98c..0000000000000000000000000000000000000000 --- a/demos/speech_server/conf/asr/asr.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: 'conformer_wenetspeech' -lang: 'zh' -sample_rate: 16000 -cfg_path: # [optional] -ckpt_path: # [optional] -decode_method: 'attention_rescoring' -force_yes: True -device: # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/asr/asr_pd.yaml b/demos/speech_server/conf/asr/asr_pd.yaml deleted file mode 100644 index 4c415ac791edeab2d9832e8db2e9a66411aaed06..0000000000000000000000000000000000000000 --- a/demos/speech_server/conf/asr/asr_pd.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# This is the parameter configuration file for ASR server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['deepspeech2offline_aishell'] TODO -################################################################## -model_type: 'deepspeech2offline_aishell' -am_model: # the pdmodel file of am static model [optional] -am_params: # the pdiparams file of am static model [optional] -lang: 'zh' -sample_rate: 16000 -cfg_path: -decode_method: -force_yes: True - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# OTHERS # -################################################################## diff --git a/demos/speech_server/conf/tts/tts.yaml b/demos/speech_server/conf/tts/tts.yaml deleted file mode 100644 index 19207f0b03579a906c80ba6eff356792974eeefd..0000000000000000000000000000000000000000 --- a/demos/speech_server/conf/tts/tts.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# This is the parameter configuration file for TTS server. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', -# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', -# 'fastspeech2_vctk'] -################################################################## -am: 'fastspeech2_csmsc' -am_config: -am_ckpt: -am_stat: -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', -# 'pwgan_vctk', 'mb_melgan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_config: -voc_ckpt: -voc_stat: - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' -device: # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/tts/tts_pd.yaml b/demos/speech_server/conf/tts/tts_pd.yaml deleted file mode 100644 index e27b9665bbe1ee8b5d5c39fd3e5f87d841dd64de..0000000000000000000000000000000000000000 --- a/demos/speech_server/conf/tts/tts_pd.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# This is the parameter configuration file for TTS server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] -################################################################## -am: 'fastspeech2_csmsc' -am_model: # the pdmodel file of your am static model (XX.pdmodel) -am_params: # the pdiparams file of your am static model (XX.pdipparams) -am_sample_rate: 24000 -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) -voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) -voc_sample_rate: 24000 - -voc_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' diff --git a/demos/speech_server/server.sh b/demos/speech_server/server.sh index d9367ec063c79a0c45f2e05c6f53abf05b479f62..e5961286ba459ba6bdf8ee46de4e512cc7e25640 100644 --- a/demos/speech_server/server.sh +++ b/demos/speech_server/server.sh @@ -1,3 +1,3 @@ #!/bin/bash -paddlespeech_server start --config_file ./conf/application.yaml \ No newline at end of file +paddlespeech_server start --config_file ./conf/application.yaml diff --git a/paddlespeech/server/bin/main.py b/paddlespeech/server/bin/main.py index 360d295ef583a4d490a76392ff9a362c40ee4656..de52829930262f5b89a5c85a270bb0f33ec8dea2 100644 --- a/paddlespeech/server/bin/main.py +++ b/paddlespeech/server/bin/main.py @@ -34,7 +34,7 @@ def init(config): bool: """ # init api - api_list = list(config.engine_backend) + api_list = list(engine.split("_")[0] for engine in config.engine_list) api_router = setup_router(api_list) app.include_router(api_router) diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py index 21fc5c65e965a87c483046d66e45036d1b091b5d..3d71f091b3d3a41e209bc0e0e5c96c9ad2cb78f3 100644 --- a/paddlespeech/server/bin/paddlespeech_server.py +++ b/paddlespeech/server/bin/paddlespeech_server.py @@ -62,7 +62,7 @@ class ServerExecutor(BaseExecutor): bool: """ # init api - api_list = list(config.engine_backend) + api_list = list(engine.split("_")[0] for engine in config.engine_list) api_router = setup_router(api_list) app.include_router(api_router) diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml index aba33a514366267f7bd97c8caff4fb3f69fe4030..6048450b7ba5ed928c39ad7ebad20354d7dd8442 100644 --- a/paddlespeech/server/conf/application.yaml +++ b/paddlespeech/server/conf/application.yaml @@ -1,27 +1,107 @@ # This is the parameter configuration file for PaddleSpeech Serving. -################################################################## -# SERVER SETTING # -################################################################## +################################################################################# +# SERVER SETTING # +################################################################################# host: 127.0.0.1 port: 8090 -################################################################## -# CONFIG FILE # -################################################################## -# add engine backend type (Options: asr, tts) and config file here. -# Adding a speech task to engine_backend means starting the service. -engine_backend: - asr: 'conf/asr/asr.yaml' - tts: 'conf/tts/tts.yaml' - -# The engine_type of speech task needs to keep the same type as the config file of speech task. -# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' -# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' -# -# add engine type (Options: python, inference) -engine_type: - asr: 'python' - tts: 'python' +# The task format in the engin_list is: _ +# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference'] +engine_list: ['asr_python', 'tts_python'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# +################### speech task: asr; engine_type: python ####################### +asr_python: + model: 'conformer_wenetspeech' + lang: 'zh' + sample_rate: 16000 + cfg_path: # [optional] + ckpt_path: # [optional] + decode_method: 'attention_rescoring' + force_yes: True + device: # set 'gpu:id' or 'cpu' + + +################### speech task: asr; engine_type: inference ####################### +asr_inference: + # model_type choices=['deepspeech2offline_aishell'] + model_type: 'deepspeech2offline_aishell' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + +################### speech task: tts; engine_type: python ####################### +tts_python: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', + # 'fastspeech2_ljspeech', 'fastspeech2_aishell3', + # 'fastspeech2_vctk'] + am: 'fastspeech2_csmsc' + am_config: + am_ckpt: + am_stat: + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', + # 'pwgan_vctk', 'mb_melgan_csmsc'] + voc: 'pwgan_csmsc' + voc_config: + voc_ckpt: + voc_stat: + + # others + lang: 'zh' + device: # set 'gpu:id' or 'cpu' + + +################### speech task: tts; engine_type: inference ####################### +tts_inference: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] + am: 'fastspeech2_csmsc' + am_model: # the pdmodel file of your am static model (XX.pdmodel) + am_params: # the pdiparams file of your am static model (XX.pdipparams) + am_sample_rate: 24000 + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] + voc: 'pwgan_csmsc' + voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) + voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) + voc_sample_rate: 24000 + + voc_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # others + lang: 'zh' diff --git a/paddlespeech/server/conf/asr/asr.yaml b/paddlespeech/server/conf/asr/asr.yaml deleted file mode 100644 index a6743b77513e504f2bcd374ea8235d8e39a7c98c..0000000000000000000000000000000000000000 --- a/paddlespeech/server/conf/asr/asr.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: 'conformer_wenetspeech' -lang: 'zh' -sample_rate: 16000 -cfg_path: # [optional] -ckpt_path: # [optional] -decode_method: 'attention_rescoring' -force_yes: True -device: # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/asr/asr_pd.yaml b/paddlespeech/server/conf/asr/asr_pd.yaml deleted file mode 100644 index 4c415ac791edeab2d9832e8db2e9a66411aaed06..0000000000000000000000000000000000000000 --- a/paddlespeech/server/conf/asr/asr_pd.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# This is the parameter configuration file for ASR server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['deepspeech2offline_aishell'] TODO -################################################################## -model_type: 'deepspeech2offline_aishell' -am_model: # the pdmodel file of am static model [optional] -am_params: # the pdiparams file of am static model [optional] -lang: 'zh' -sample_rate: 16000 -cfg_path: -decode_method: -force_yes: True - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# OTHERS # -################################################################## diff --git a/paddlespeech/server/conf/tts/tts.yaml b/paddlespeech/server/conf/tts/tts.yaml deleted file mode 100644 index 19207f0b03579a906c80ba6eff356792974eeefd..0000000000000000000000000000000000000000 --- a/paddlespeech/server/conf/tts/tts.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# This is the parameter configuration file for TTS server. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', -# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', -# 'fastspeech2_vctk'] -################################################################## -am: 'fastspeech2_csmsc' -am_config: -am_ckpt: -am_stat: -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', -# 'pwgan_vctk', 'mb_melgan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_config: -voc_ckpt: -voc_stat: - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' -device: # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/tts/tts_pd.yaml b/paddlespeech/server/conf/tts/tts_pd.yaml deleted file mode 100644 index e27b9665bbe1ee8b5d5c39fd3e5f87d841dd64de..0000000000000000000000000000000000000000 --- a/paddlespeech/server/conf/tts/tts_pd.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# This is the parameter configuration file for TTS server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] -################################################################## -am: 'fastspeech2_csmsc' -am_model: # the pdmodel file of your am static model (XX.pdmodel) -am_params: # the pdiparams file of your am static model (XX.pdipparams) -am_sample_rate: 24000 -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) -voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) -voc_sample_rate: 24000 - -voc_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' diff --git a/paddlespeech/server/engine/asr/paddleinference/asr_engine.py b/paddlespeech/server/engine/asr/paddleinference/asr_engine.py index cb973e924efb5bcd7de440f97a27c0d29fda29c0..1925bf1d623613d073bb028133a348842b591127 100644 --- a/paddlespeech/server/engine/asr/paddleinference/asr_engine.py +++ b/paddlespeech/server/engine/asr/paddleinference/asr_engine.py @@ -26,7 +26,6 @@ from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.modules.ctc import CTCDecoder from paddlespeech.s2t.utils.utility import UpdateConfig from paddlespeech.server.engine.base_engine import BaseEngine -from paddlespeech.server.utils.config import get_config from paddlespeech.server.utils.paddle_predictor import init_predictor from paddlespeech.server.utils.paddle_predictor import run_model @@ -184,7 +183,7 @@ class ASREngine(BaseEngine): def __init__(self): super(ASREngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: """init engine resource Args: @@ -196,7 +195,7 @@ class ASREngine(BaseEngine): self.input = None self.output = None self.executor = ASRServerExecutor() - self.config = get_config(config_file) + self.config = config self.executor._init_from_path( model_type=self.config.model_type, diff --git a/paddlespeech/server/engine/asr/python/asr_engine.py b/paddlespeech/server/engine/asr/python/asr_engine.py index 1e2c5cc270dab1f82caa9c0810411211c8cdbe2e..e76c49a79a66be505f239f9f04b5fdd050701fda 100644 --- a/paddlespeech/server/engine/asr/python/asr_engine.py +++ b/paddlespeech/server/engine/asr/python/asr_engine.py @@ -19,7 +19,6 @@ import paddle from paddlespeech.cli.asr.infer import ASRExecutor from paddlespeech.cli.log import logger from paddlespeech.server.engine.base_engine import BaseEngine -from paddlespeech.server.utils.config import get_config __all__ = ['ASREngine'] @@ -40,7 +39,7 @@ class ASREngine(BaseEngine): def __init__(self): super(ASREngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: """init engine resource Args: @@ -52,8 +51,7 @@ class ASREngine(BaseEngine): self.input = None self.output = None self.executor = ASRServerExecutor() - - self.config = get_config(config_file) + self.config = config try: if self.config.device: self.device = self.config.device diff --git a/paddlespeech/server/engine/engine_pool.py b/paddlespeech/server/engine/engine_pool.py index f6a4d2aab2c894149efae75afacf6a275a5dd6b0..9de73567e47c8150a7b2807d4bf1cc299e0e1b40 100644 --- a/paddlespeech/server/engine/engine_pool.py +++ b/paddlespeech/server/engine/engine_pool.py @@ -28,11 +28,13 @@ def init_engine_pool(config) -> bool: """ Init engine pool """ global ENGINE_POOL - for engine in config.engine_backend: + + for engine_and_type in config.engine_list: + engine = engine_and_type.split("_")[0] + engine_type = engine_and_type.split("_")[1] ENGINE_POOL[engine] = EngineFactory.get_engine( - engine_name=engine, engine_type=config.engine_type[engine]) - if not ENGINE_POOL[engine].init( - config_file=config.engine_backend[engine]): + engine_name=engine, engine_type=engine_type) + if not ENGINE_POOL[engine].init(config=config[engine_and_type]): return False return True diff --git a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py index 5955c1a216a304629c4896a0f9462d39d9121715..1bbbe0ea3e1135ab89d9704923d1d3d77baa0212 100644 --- a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py +++ b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py @@ -29,7 +29,6 @@ from paddlespeech.cli.utils import download_and_decompress from paddlespeech.cli.utils import MODEL_HOME from paddlespeech.server.engine.base_engine import BaseEngine from paddlespeech.server.utils.audio_process import change_speed -from paddlespeech.server.utils.config import get_config from paddlespeech.server.utils.errors import ErrorCode from paddlespeech.server.utils.exception import ServerBaseException from paddlespeech.server.utils.paddle_predictor import init_predictor @@ -357,11 +356,11 @@ class TTSEngine(BaseEngine): """ super(TTSEngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: self.executor = TTSServerExecutor() try: - self.config = get_config(config_file) + self.config = config self.executor._init_from_path( am=self.config.am, am_model=self.config.am_model, diff --git a/paddlespeech/server/engine/tts/python/tts_engine.py b/paddlespeech/server/engine/tts/python/tts_engine.py index 7dd576699d02c2ecef8b0993a0273f9826c08a6b..8d6c7fd17e54b578070697d8360c592f191e7c46 100644 --- a/paddlespeech/server/engine/tts/python/tts_engine.py +++ b/paddlespeech/server/engine/tts/python/tts_engine.py @@ -25,7 +25,6 @@ from paddlespeech.cli.log import logger from paddlespeech.cli.tts.infer import TTSExecutor from paddlespeech.server.engine.base_engine import BaseEngine from paddlespeech.server.utils.audio_process import change_speed -from paddlespeech.server.utils.config import get_config from paddlespeech.server.utils.errors import ErrorCode from paddlespeech.server.utils.exception import ServerBaseException @@ -50,11 +49,11 @@ class TTSEngine(BaseEngine): """ super(TTSEngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: self.executor = TTSServerExecutor() try: - self.config = get_config(config_file) + self.config = config if self.config.device: self.device = self.config.device else: diff --git a/tests/unit/server/change_yaml.py b/tests/unit/server/change_yaml.py index 5a5d9ae0146bce5d2ab1dbb6c5ca6dc65a2636f2..1f063d8f50996184d6c3e82c17d95e78807d34f5 100644 --- a/tests/unit/server/change_yaml.py +++ b/tests/unit/server/change_yaml.py @@ -5,7 +5,7 @@ import os import yaml -def change_speech_yaml(yaml_name: str, device: str): +def change_device(yamlfile: str, engine: str, device: str): """Change the settings of the device under the voice task configuration file Args: @@ -13,68 +13,54 @@ def change_speech_yaml(yaml_name: str, device: str): cpu (bool): True means set device to "cpu" model_type (dict): change model type """ - if "asr" in yaml_name: - dirpath = "./conf/asr/" - elif 'tts' in yaml_name: - dirpath = "./conf/tts/" - yamlfile = dirpath + yaml_name + ".yaml" - tmp_yamlfile = dirpath + yaml_name + "_tmp.yaml" + tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml" os.system("cp %s %s" % (yamlfile, tmp_yamlfile)) + if device == 'cpu': + set_device = 'cpu' + elif device == 'gpu': + set_device = 'gpu:0' + else: + print("Please set correct device: cpu or gpu.") + with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw: y = yaml.safe_load(f) - if device == 'cpu': - print("Set device: cpu") - if yaml_name == 'asr': - y['device'] = 'cpu' - elif yaml_name == 'asr_pd': - y['am_predictor_conf']['device'] = 'cpu' - elif yaml_name == 'tts': - y['device'] = 'cpu' - elif yaml_name == 'tts_pd': - y['am_predictor_conf']['device'] = 'cpu' - y['voc_predictor_conf']['device'] = 'cpu' - elif device == 'gpu': - print("Set device: gpu") - if yaml_name == 'asr': - y['device'] = 'gpu:0' - elif yaml_name == 'asr_pd': - y['am_predictor_conf']['device'] = 'gpu:0' - elif yaml_name == 'tts': - y['device'] = 'gpu:0' - elif yaml_name == 'tts_pd': - y['am_predictor_conf']['device'] = 'gpu:0' - y['voc_predictor_conf']['device'] = 'gpu:0' + if engine == 'asr_python' or engine == 'tts_python': + y[engine]['device'] = set_device + elif engine == 'asr_inference': + y[engine]['am_predictor_conf']['device'] = set_device + elif engine == 'tts_inference': + y[engine]['am_predictor_conf']['device'] = set_device + y[engine]['voc_predictor_conf']['device'] = set_device else: - print("Please set correct device: cpu or gpu.") + print( + "Please set correct engine: asr_python, tts_python, asr_inference, tts_inference." + ) - print("The content of '%s': " % (yamlfile)) print(yaml.dump(y, default_flow_style=False, sort_keys=False)) yaml.dump(y, fw, allow_unicode=True) os.system("rm %s" % (tmp_yamlfile)) print("Change %s successfully." % (yamlfile)) -def change_app_yaml(task: str, engine_type: str): +def change_engine_type(yamlfile: str, engine_type): """Change the engine type and corresponding configuration file of the speech task in application.yaml Args: task (str): asr or tts """ - yamlfile = "./conf/application.yaml" - tmp_yamlfile = "./conf/application_tmp.yaml" + tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml" os.system("cp %s %s" % (yamlfile, tmp_yamlfile)) + speech_task = engine_type.split("_")[0] + with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw: y = yaml.safe_load(f) - y['engine_type'][task] = engine_type - path_list = ["./conf/", task, "/", task] - if engine_type == 'python': - path_list.append(".yaml") - - elif engine_type == 'inference': - path_list.append("_pd.yaml") - y['engine_backend'][task] = ''.join(path_list) - print("The content of './conf/application.yaml': ") + engine_list = y['engine_list'] + for engine in engine_list: + if speech_task in engine: + engine_list.remove(engine) + engine_list.append(engine_type) + y['engine_list'] = engine_list print(yaml.dump(y, default_flow_style=False, sort_keys=False)) yaml.dump(y, fw, allow_unicode=True) os.system("rm %s" % (tmp_yamlfile)) @@ -83,32 +69,37 @@ def change_app_yaml(task: str, engine_type: str): if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument( + '--config_file', + type=str, + default='./conf/application.yaml', + help='server yaml file.') parser.add_argument( '--change_task', type=str, default=None, help='Change task', choices=[ - 'app-asr-python', - 'app-asr-inference', - 'app-tts-python', - 'app-tts-inference', - 'speech-asr-cpu', - 'speech-asr-gpu', - 'speech-asr_pd-cpu', - 'speech-asr_pd-gpu', - 'speech-tts-cpu', - 'speech-tts-gpu', - 'speech-tts_pd-cpu', - 'speech-tts_pd-gpu', + 'enginetype-asr_python', + 'enginetype-asr_inference', + 'enginetype-tts_python', + 'enginetype-tts_inference', + 'device-asr_python-cpu', + 'device-asr_python-gpu', + 'device-asr_inference-cpu', + 'device-asr_inference-gpu', + 'device-tts_python-cpu', + 'device-tts_python-gpu', + 'device-tts_inference-cpu', + 'device-tts_inference-gpu', ], required=True) args = parser.parse_args() types = args.change_task.split("-") - if types[0] == "app": - change_app_yaml(types[1], types[2]) - elif types[0] == "speech": - change_speech_yaml(types[1], types[2]) + if types[0] == "enginetype": + change_engine_type(args.config_file, types[1]) + elif types[0] == "device": + change_device(args.config_file, types[1], types[2]) else: print("Error change task, please check change_task.") diff --git a/tests/unit/server/conf/application.yaml b/tests/unit/server/conf/application.yaml index aba33a514366267f7bd97c8caff4fb3f69fe4030..6048450b7ba5ed928c39ad7ebad20354d7dd8442 100644 --- a/tests/unit/server/conf/application.yaml +++ b/tests/unit/server/conf/application.yaml @@ -1,27 +1,107 @@ # This is the parameter configuration file for PaddleSpeech Serving. -################################################################## -# SERVER SETTING # -################################################################## +################################################################################# +# SERVER SETTING # +################################################################################# host: 127.0.0.1 port: 8090 -################################################################## -# CONFIG FILE # -################################################################## -# add engine backend type (Options: asr, tts) and config file here. -# Adding a speech task to engine_backend means starting the service. -engine_backend: - asr: 'conf/asr/asr.yaml' - tts: 'conf/tts/tts.yaml' - -# The engine_type of speech task needs to keep the same type as the config file of speech task. -# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' -# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' -# -# add engine type (Options: python, inference) -engine_type: - asr: 'python' - tts: 'python' +# The task format in the engin_list is: _ +# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference'] +engine_list: ['asr_python', 'tts_python'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# +################### speech task: asr; engine_type: python ####################### +asr_python: + model: 'conformer_wenetspeech' + lang: 'zh' + sample_rate: 16000 + cfg_path: # [optional] + ckpt_path: # [optional] + decode_method: 'attention_rescoring' + force_yes: True + device: # set 'gpu:id' or 'cpu' + + +################### speech task: asr; engine_type: inference ####################### +asr_inference: + # model_type choices=['deepspeech2offline_aishell'] + model_type: 'deepspeech2offline_aishell' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + +################### speech task: tts; engine_type: python ####################### +tts_python: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', + # 'fastspeech2_ljspeech', 'fastspeech2_aishell3', + # 'fastspeech2_vctk'] + am: 'fastspeech2_csmsc' + am_config: + am_ckpt: + am_stat: + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', + # 'pwgan_vctk', 'mb_melgan_csmsc'] + voc: 'pwgan_csmsc' + voc_config: + voc_ckpt: + voc_stat: + + # others + lang: 'zh' + device: # set 'gpu:id' or 'cpu' + + +################### speech task: tts; engine_type: inference ####################### +tts_inference: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] + am: 'fastspeech2_csmsc' + am_model: # the pdmodel file of your am static model (XX.pdmodel) + am_params: # the pdiparams file of your am static model (XX.pdipparams) + am_sample_rate: 24000 + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] + voc: 'pwgan_csmsc' + voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) + voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) + voc_sample_rate: 24000 + + voc_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # others + lang: 'zh' diff --git a/tests/unit/server/conf/asr/asr.yaml b/tests/unit/server/conf/asr/asr.yaml deleted file mode 100644 index a6743b77513e504f2bcd374ea8235d8e39a7c98c..0000000000000000000000000000000000000000 --- a/tests/unit/server/conf/asr/asr.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: 'conformer_wenetspeech' -lang: 'zh' -sample_rate: 16000 -cfg_path: # [optional] -ckpt_path: # [optional] -decode_method: 'attention_rescoring' -force_yes: True -device: # set 'gpu:id' or 'cpu' diff --git a/tests/unit/server/conf/asr/asr_pd.yaml b/tests/unit/server/conf/asr/asr_pd.yaml deleted file mode 100644 index 4c415ac791edeab2d9832e8db2e9a66411aaed06..0000000000000000000000000000000000000000 --- a/tests/unit/server/conf/asr/asr_pd.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# This is the parameter configuration file for ASR server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['deepspeech2offline_aishell'] TODO -################################################################## -model_type: 'deepspeech2offline_aishell' -am_model: # the pdmodel file of am static model [optional] -am_params: # the pdiparams file of am static model [optional] -lang: 'zh' -sample_rate: 16000 -cfg_path: -decode_method: -force_yes: True - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# OTHERS # -################################################################## diff --git a/tests/unit/server/conf/tts/tts.yaml b/tests/unit/server/conf/tts/tts.yaml deleted file mode 100644 index 19207f0b03579a906c80ba6eff356792974eeefd..0000000000000000000000000000000000000000 --- a/tests/unit/server/conf/tts/tts.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# This is the parameter configuration file for TTS server. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', -# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', -# 'fastspeech2_vctk'] -################################################################## -am: 'fastspeech2_csmsc' -am_config: -am_ckpt: -am_stat: -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', -# 'pwgan_vctk', 'mb_melgan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_config: -voc_ckpt: -voc_stat: - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' -device: # set 'gpu:id' or 'cpu' diff --git a/tests/unit/server/conf/tts/tts_pd.yaml b/tests/unit/server/conf/tts/tts_pd.yaml deleted file mode 100644 index e27b9665bbe1ee8b5d5c39fd3e5f87d841dd64de..0000000000000000000000000000000000000000 --- a/tests/unit/server/conf/tts/tts_pd.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# This is the parameter configuration file for TTS server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] -################################################################## -am: 'fastspeech2_csmsc' -am_model: # the pdmodel file of your am static model (XX.pdmodel) -am_params: # the pdiparams file of your am static model (XX.pdipparams) -am_sample_rate: 24000 -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) -voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) -voc_sample_rate: 24000 - -voc_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' diff --git a/tests/unit/server/test_server_client.sh b/tests/unit/server/test_server_client.sh index 795a23e0136a4a68cd079b8a872fda76488b478f..b48e7111b870566a47aa81acc2f0a97b99536313 100644 --- a/tests/unit/server/test_server_client.sh +++ b/tests/unit/server/test_server_client.sh @@ -99,8 +99,8 @@ echo "************************************************************************** # start server: asr engine type: python; tts engine type: python; device: cpu -python change_yaml.py --change_task speech-asr-cpu # change asr.yaml device: cpu -python change_yaml.py --change_task speech-tts-cpu # change tts.yaml device: cpu +python change_yaml.py --change_task device-asr_python-cpu # change asr.yaml device: cpu +python change_yaml.py --change_task device-tts_python-cpu # change tts.yaml device: cpu echo "Start the service: asr engine type: python; tts engine type: python; device: cpu" | tee -a ./log/test_result.log ((target_start_num+=1)) @@ -125,8 +125,8 @@ echo "************************************************************************** # start server: asr engine type: inference; tts engine type: inference; device: gpu -python change_yaml.py --change_task app-asr-inference # change application.yaml, asr engine_type: inference; asr engine_backend: asr_pd.yaml -python change_yaml.py --change_task app-tts-inference # change application.yaml, tts engine_type: inference; tts engine_backend: tts_pd.yaml +python change_yaml.py --change_task enginetype-asr_inference # change application.yaml, asr engine_type: inference; asr engine_backend: asr_pd.yaml +python change_yaml.py --change_task enginetype-tts_inference # change application.yaml, tts engine_type: inference; tts engine_backend: tts_pd.yaml echo "Start the service: asr engine type: inference; tts engine type: inference; device: gpu" | tee -a ./log/test_result.log ((target_start_num+=1)) @@ -151,8 +151,8 @@ echo "************************************************************************** # start server: asr engine type: inference; tts engine type: inference; device: cpu -python change_yaml.py --change_task speech-asr_pd-cpu # change asr_pd.yaml device: cpu -python change_yaml.py --change_task speech-tts_pd-cpu # change tts_pd.yaml device: cpu +python change_yaml.py --change_task device-asr_inference-cpu # change asr_pd.yaml device: cpu +python change_yaml.py --change_task device-tts_inference-cpu # change tts_pd.yaml device: cpu echo "start the service: asr engine type: inference; tts engine type: inference; device: cpu" | tee -a ./log/test_result.log ((target_start_num+=1)) @@ -182,4 +182,5 @@ echo "***************** Here are all the test results ********************" cat ./log/test_result.log # Restoring conf is the same as demos/speech_server +rm -rf ./conf cp ../../../demos/speech_server/conf/ ./ -rf \ No newline at end of file