提交 6b2dd168 编写于 作者: L lym0302

update server cli, test=doc

上级 fe350ddd
......@@ -9,9 +9,17 @@ port: 8090
##################################################################
# CONFIG FILE #
##################################################################
# add engine type (Options: asr, tts) and config file here.
# The engine_type of speech task needs to keep the same type as the config file of speech task.
# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml'
# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml'
#
# add engine type (Options: python, inference)
engine_type:
asr: 'inference'
tts: 'inference'
# add engine backend type (Options: asr, tts) and config file here.
# Adding a speech task to engine_backend means starting the service.
engine_backend:
asr: 'conf/asr/asr.yaml'
tts: 'conf/tts/tts.yaml'
asr: 'conf/asr/asr_pd.yaml'
tts: 'conf/tts/tts_pd.yaml'
model: 'conformer_wenetspeech'
lang: 'zh'
sample_rate: 16000
cfg_path:
ckpt_path:
cfg_path: # [optional]
ckpt_path: # [optional]
decode_method: 'attention_rescoring'
force_yes: False
force_yes: True
device: 'gpu:3' # set 'gpu:id' or 'cpu'
# This is the parameter configuration file for ASR server.
# These are the static models that support paddle inference.
##################################################################
# ACOUSTIC MODEL SETTING #
# am choices=['deepspeech2offline_aishell'] TODO
##################################################################
model_type: 'deepspeech2offline_aishell'
am_model: # the pdmodel file of am static model [optional]
am_params: # the pdiparams file of am static model [optional]
lang: 'zh'
sample_rate: 16000
cfg_path:
decode_method:
force_yes: True
am_predictor_conf:
device: 'gpu:3' # set 'gpu:id' or 'cpu'
enable_mkldnn: True
switch_ir_optim: True
##################################################################
# OTHERS #
##################################################################
......@@ -29,4 +29,4 @@ voc_stat:
# OTHERS #
##################################################################
lang: 'zh'
device: 'gpu:2'
device: 'gpu:3' # set 'gpu:id' or 'cpu'
......@@ -6,8 +6,8 @@
# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc']
##################################################################
am: 'fastspeech2_csmsc'
am_model: # the pdmodel file of am static model
am_params: # the pdiparams file of am static model
am_model: # the pdmodel file of your am static model (XX.pdmodel)
am_params: # the pdiparams file of your am static model (XX.pdipparams)
am_sample_rate: 24000
phones_dict:
tones_dict:
......@@ -15,9 +15,9 @@ speaker_dict:
spk_id: 0
am_predictor_conf:
use_gpu: True
enable_mkldnn: True
switch_ir_optim: True
device: 'gpu:3' # set 'gpu:id' or 'cpu'
enable_mkldnn: False
switch_ir_optim: False
##################################################################
......@@ -25,17 +25,16 @@ am_predictor_conf:
# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc']
##################################################################
voc: 'pwgan_csmsc'
voc_model: # the pdmodel file of vocoder static model
voc_params: # the pdiparams file of vocoder static model
voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel)
voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams)
voc_sample_rate: 24000
voc_predictor_conf:
use_gpu: True
enable_mkldnn: True
switch_ir_optim: True
device: 'gpu:3' # set 'gpu:id' or 'cpu'
enable_mkldnn: False
switch_ir_optim: False
##################################################################
# OTHERS #
##################################################################
lang: 'zh'
device: paddle.get_device()
......@@ -20,7 +20,7 @@ from fastapi import FastAPI
from ..executor import BaseExecutor
from ..util import cli_server_register
from ..util import stats_wrapper
from paddlespeech.server.engine.engine_factory import EngineFactory
from paddlespeech.server.engine.engine_pool import init_engine_pool
from paddlespeech.server.restful.api import setup_router
from paddlespeech.server.utils.config import get_config
......@@ -51,8 +51,10 @@ class ServerExecutor(BaseExecutor):
def init(self, config) -> bool:
"""system initialization
Args:
config (CfgNode): config object
Returns:
bool:
"""
......@@ -61,13 +63,8 @@ class ServerExecutor(BaseExecutor):
api_router = setup_router(api_list)
app.include_router(api_router)
# init engine
engine_pool = []
for engine in config.engine_backend:
engine_pool.append(EngineFactory.get_engine(engine_name=engine))
if not engine_pool[-1].init(
config_file=config.engine_backend[engine]):
return False
if not init_engine_pool(config):
return False
return True
......
......@@ -9,12 +9,17 @@ port: 8090
##################################################################
# CONFIG FILE #
##################################################################
# The engine_type of speech task needs to keep the same type as the config file of speech task.
# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml'
# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml'
#
# add engine type (Options: python, inference)
engine_type:
asr: 'inference'
# tts: 'inference'
asr: 'python'
tts: 'python'
# add engine backend type (Options: asr, tts) and config file here.
# Adding a speech task to engine_backend means starting the service.
engine_backend:
asr: 'conf/asr/asr_pd.yaml'
#tts: 'conf/tts/tts_pd.yaml'
asr: 'conf/asr/asr.yaml'
tts: 'conf/tts/tts.yaml'
......@@ -5,3 +5,4 @@ cfg_path: # [optional]
ckpt_path: # [optional]
decode_method: 'attention_rescoring'
force_yes: True
device: 'gpu:3' # set 'gpu:id' or 'cpu'
......@@ -15,7 +15,7 @@ decode_method:
force_yes: True
am_predictor_conf:
use_gpu: True
device: 'gpu:3' # set 'gpu:id' or 'cpu'
enable_mkldnn: True
switch_ir_optim: True
......
......@@ -29,4 +29,4 @@ voc_stat:
# OTHERS #
##################################################################
lang: 'zh'
device: paddle.get_device()
\ No newline at end of file
device: 'gpu:3' # set 'gpu:id' or 'cpu'
......@@ -6,18 +6,18 @@
# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc']
##################################################################
am: 'fastspeech2_csmsc'
am_model: # the pdmodel file of am static model
am_params: # the pdiparams file of am static model
am_sample_rate: 24000
am_model: # the pdmodel file of your am static model (XX.pdmodel)
am_params: # the pdiparams file of your am static model (XX.pdipparams)
am_sample_rate: 24000 # must match the model
phones_dict:
tones_dict:
speaker_dict:
spk_id: 0
am_predictor_conf:
use_gpu: True
enable_mkldnn: True
switch_ir_optim: True
device: 'gpu:3' # set 'gpu:id' or 'cpu'
enable_mkldnn: False
switch_ir_optim: False
##################################################################
......@@ -25,17 +25,16 @@ am_predictor_conf:
# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc']
##################################################################
voc: 'pwgan_csmsc'
voc_model: # the pdmodel file of vocoder static model
voc_params: # the pdiparams file of vocoder static model
voc_sample_rate: 24000
voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel)
voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams)
voc_sample_rate: 24000 #must match the model
voc_predictor_conf:
use_gpu: True
enable_mkldnn: True
switch_ir_optim: True
device: 'gpu:3' # set 'gpu:id' or 'cpu'
enable_mkldnn: False
switch_ir_optim: False
##################################################################
# OTHERS #
##################################################################
lang: 'zh'
device: paddle.get_device()
......@@ -12,21 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import os
from typing import List
from typing import Optional
from typing import Union
import librosa
import paddle
import soundfile
from paddlespeech.cli.asr.infer import ASRExecutor
from paddlespeech.cli.log import logger
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.transform.transformation import Transformation
from paddlespeech.s2t.utils.dynamic_import import dynamic_import
from paddlespeech.s2t.utils.utility import UpdateConfig
from paddlespeech.server.engine.base_engine import BaseEngine
from paddlespeech.server.utils.config import get_config
......@@ -63,7 +53,7 @@ class ASREngine(BaseEngine):
self.executor = ASRServerExecutor()
self.config = get_config(config_file)
paddle.set_device(paddle.get_device())
paddle.set_device(self.config.device)
self.executor._init_from_path(
self.config.model, self.config.lang, self.config.sample_rate,
self.config.cfg_path, self.config.decode_method,
......
......@@ -344,7 +344,6 @@ class TTSEngine(BaseEngine):
try:
self.config = get_config(config_file)
self.executor._init_from_path(
am=self.config.am,
am_model=self.config.am_model,
......
......@@ -16,7 +16,7 @@ from typing import Union
from fastapi import APIRouter
from paddlespeech.server.engine.tts.paddleinference.tts_engine import TTSEngine
from paddlespeech.server.engine.engine_pool import get_engine_pool
from paddlespeech.server.restful.request import TTSRequest
from paddlespeech.server.restful.response import ErrorResponse
from paddlespeech.server.restful.response import TTSResponse
......@@ -60,28 +60,41 @@ def tts(request_body: TTSRequest):
Returns:
json: [description]
"""
# json to dict
item_dict = request_body.dict()
sentence = item_dict['text']
spk_id = item_dict['spk_id']
speed = item_dict['speed']
volume = item_dict['volume']
sample_rate = item_dict['sample_rate']
save_path = item_dict['save_path']
# get params
text = request_body.text
spk_id = request_body.spk_id
speed = request_body.speed
volume = request_body.volume
sample_rate = request_body.sample_rate
save_path = request_body.save_path
# Check parameters
if speed <=0 or speed > 3 or volume <=0 or volume > 3 or \
sample_rate not in [0, 16000, 8000] or \
(save_path is not None and not save_path.endswith("pcm") and not save_path.endswith("wav")):
return failed_response(ErrorCode.SERVER_PARAM_ERR)
# single
tts_engine = TTSEngine()
if speed <= 0 or speed > 3:
return failed_response(
ErrorCode.SERVER_PARAM_ERR,
"invalid speed value, the value should be between 0 and 3.")
if volume <= 0 or volume > 3:
return failed_response(
ErrorCode.SERVER_PARAM_ERR,
"invalid volume value, the value should be between 0 and 3.")
if sample_rate not in [0, 16000, 8000]:
return failed_response(
ErrorCode.SERVER_PARAM_ERR,
"invalid sample_rate value, the choice of value is 0, 8000, 16000.")
if save_path is not None and not save_path.endswith(
"pcm") and not save_path.endswith("wav"):
return failed_response(
ErrorCode.SERVER_PARAM_ERR,
"invalid save_path, saved audio formats support pcm and wav")
# run
try:
# get single engine from engine pool
engine_pool = get_engine_pool()
tts_engine = engine_pool['tts']
lang, target_sample_rate, wav_base64 = tts_engine.run(
sentence, spk_id, speed, volume, sample_rate, save_path)
text, spk_id, speed, volume, sample_rate, save_path)
response = {
"success": True,
......
......@@ -41,8 +41,9 @@ def init_predictor(model_dir: Optional[os.PathLike]=None,
config = Config(model_file, params_file)
config.enable_memory_optim()
if predictor_conf["use_gpu"]:
config.enable_use_gpu(1000, 0)
if "gpu" in predictor_conf["device"]:
gpu_id = predictor_conf["device"].split(":")[-1]
config.enable_use_gpu(1000, int(gpu_id))
if predictor_conf["enable_mkldnn"]:
config.enable_mkldnn()
if predictor_conf["switch_ir_optim"]:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册