提交 4e9db4ff 编写于 作者: L lym0302

add onnx tts engine, test=doc

上级 9e41ac85
......@@ -7,7 +7,7 @@ host: 127.0.0.1
port: 8092
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online', 'tts_online']
# task choices = ['tts_online', 'tts_online-onnx']
# protocol = ['websocket', 'http'] (only one can be selected).
protocol: 'http'
engine_list: ['tts_online']
......@@ -20,8 +20,8 @@ engine_list: ['tts_online']
################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online:
# am (acoustic model) choices=['fastspeech2_csmsc']
am: 'fastspeech2_csmsc'
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
am: 'fastspeech2_cnndecoder_csmsc'
am_config:
am_ckpt:
am_stat:
......@@ -30,7 +30,7 @@ tts_online:
speaker_dict:
spk_id: 0
# voc (vocoder) choices=['mb_melgan_csmsc']
# voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
voc: 'mb_melgan_csmsc'
voc_config:
voc_ckpt:
......@@ -38,9 +38,51 @@ tts_online:
# others
lang: 'zh'
device: # set 'gpu:id' or 'cpu'
device: 'cpu' # set 'gpu:id' or 'cpu'
am_block: 42
am_pad: 12
voc_block: 14
voc_pad: 14
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx:
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
am: 'fastspeech2_cnndecoder_csmsc_onnx'
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
am_ckpt: # list
am_stat:
phones_dict:
tones_dict:
speaker_dict:
spk_id: 0
am_sample_rate: 24000
am_sess_conf:
device: "cpu" # set 'gpu:id' or 'cpu'
use_trt: False
cpu_threads: 1
# voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
voc: 'mb_melgan_csmsc_onnx'
voc_ckpt:
voc_sample_rate: 24000
voc_sess_conf:
device: "cpu" # set 'gpu:id' or 'cpu'
use_trt: False
cpu_threads: 1
# others
lang: 'zh'
am_block: 42
am_pad: 12
voc_block: 14
voc_pad: 14
voc_upsample: 300
......@@ -35,7 +35,10 @@ class EngineFactory(object):
from paddlespeech.server.engine.tts.python.tts_engine import TTSEngine
return TTSEngine()
elif engine_name == 'tts' and engine_type == 'online':
from paddlespeech.server.engine.tts.online.tts_engine import TTSEngine
from paddlespeech.server.engine.tts.online.python.tts_engine import TTSEngine
return TTSEngine()
elif engine_name == 'tts' and engine_type == 'online-onnx':
from paddlespeech.server.engine.tts.online.onnx.tts_engine import TTSEngine
return TTSEngine()
elif engine_name == 'cls' and engine_type == 'inference':
from paddlespeech.server.engine.cls.paddleinference.cls_engine import CLSEngine
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -202,6 +202,7 @@ class TTSServerExecutor(TTSExecutor):
"""
Init model and other resources from a specific path.
"""
#import pdb;pdb.set_trace()
if hasattr(self, 'am_inference') and hasattr(self, 'voc_inference'):
logger.info('Models had been initialized.')
return
......@@ -302,23 +303,6 @@ class TTSServerExecutor(TTSExecutor):
self.voc_inference.eval()
print("voc done!")
def get_phone(self, sentence, lang, merge_sentences, get_tone_ids):
tone_ids = None
if lang == 'zh':
input_ids = self.frontend.get_input_ids(
sentence,
merge_sentences=merge_sentences,
get_tone_ids=get_tone_ids)
phone_ids = input_ids["phone_ids"]
if get_tone_ids:
tone_ids = input_ids["tone_ids"]
elif lang == 'en':
input_ids = self.frontend.get_input_ids(
sentence, merge_sentences=merge_sentences)
phone_ids = input_ids["phone_ids"]
else:
print("lang should in {'zh', 'en'}!")
def depadding(self, data, chunk_num, chunk_id, block, pad, upsample):
"""
Streaming inference removes the result of pad inference
......@@ -479,6 +463,7 @@ class TTSEngine(BaseEngine):
def __init__(self, name=None):
"""Initialize TTS server engine
"""
#super(TTSEngine, self).__init__()
super().__init__()
def init(self, config: dict) -> bool:
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Optional
import onnxruntime as ort
def get_sess(model_path: Optional[os.PathLike]=None, sess_conf: dict=None):
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
if "gpu" in sess_conf["device"]:
# fastspeech2/mb_melgan can't use trt now!
if sess_conf["use_trt"]:
providers = ['TensorrtExecutionProvider']
else:
providers = ['CUDAExecutionProvider']
elif sess_conf["device"] == "cpu":
providers = ['CPUExecutionProvider']
sess_options.intra_op_num_threads = sess_conf["cpu_threads"]
sess = ort.InferenceSession(
model_path, providers=providers, sess_options=sess_options)
return sess
......@@ -51,7 +51,6 @@ async def websocket_endpoint(websocket: WebSocket):
tts_results = next(wav_generator)
resp = {"status": 1, "audio": tts_results}
await websocket.send_json(resp)
logger.info("streaming audio...")
except StopIteration as e:
resp = {"status": 2, "audio": ''}
await websocket.send_json(resp)
......
......@@ -42,6 +42,7 @@ base = [
"loguru",
"matplotlib",
"nara_wpe",
"onnxruntime",
"pandas",
"paddleaudio",
"paddlenlp",
......@@ -64,12 +65,16 @@ base = [
"webrtcvad",
"yacs~=0.1.8",
"prettytable",
"zhon",
]
server = [
"fastapi",
"uvicorn",
"pattern_singleton",
"websockets",
"websocket",
"websocket-client",
]
requirements = {
......@@ -90,7 +95,6 @@ requirements = {
"unidecode",
"yq",
"pre-commit",
"zhon",
]
}
......
#!/usr/bin/python
import argparse
import os
import yaml
"""
def change_value1(yamlfile: str, target_key: str, target_value: str, engine: str="tts_online"):
tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
y = yaml.safe_load(f)
y[engine][target_key] = target_value
print(yaml.dump(y, default_flow_style=False, sort_keys=False))
yaml.dump(y, fw, allow_unicode=True)
os.system("rm %s" % (tmp_yamlfile))
print(f"Change key: {target_key} to value: {target_value} successfully.")
def change_protocol(yamlfile: str, target_key: str, target_value: str):
tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
y = yaml.safe_load(f)
y[target_key] = target_value
print(yaml.dump(y, default_flow_style=False, sort_keys=False))
yaml.dump(y, fw, allow_unicode=True)
os.system("rm %s" % (tmp_yamlfile))
print(f"Change key: {target_key} to value: {target_value} successfully.")
def change_engine_type(yamlfile: str, target_key: str, target_value: str):
tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
y = yaml.safe_load(f)
y[target_key] = [target_value]
print(yaml.dump(y, default_flow_style=False, sort_keys=False))
yaml.dump(y, fw, allow_unicode=True)
os.system("rm %s" % (tmp_yamlfile))
print(f"Change key: {target_key} to value: {target_value} successfully.")
"""
def change_value(args):
yamlfile = args.config_file
change_type = args.change_type
engine_type = args.engine_type
target_key = args.target_key
target_value = args.target_value
tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
y = yaml.safe_load(f)
if change_type == "model":
if engine_type == "tts_online-onnx":
target_value = target_value + "_onnx"
y[engine_type][target_key] = target_value
elif change_type == "protocol":
assert (target_key == "protocol" and (
target_value == "http" or target_value == "websocket"
)), "if change_type is protocol, target_key must be set protocol."
y[target_key] = target_value
elif change_type == "engine_type":
assert (
target_key == "engine_list"
), "if change_type is engine_type, target_key must be set engine_list."
y[target_key] = [target_value]
elif change_type == "device":
assert (
target_key == "device"
), "if change_type is device, target_key must be set device."
if y["engine_list"][0] == "tts_online":
y["tts_online"]["device"] = target_value
elif y["engine_list"][0] == "tts_online-onnx":
y["tts_online-onnx"]["am_sess_conf"]["device"] = target_value
y["tts_online-onnx"]["voc_sess_conf"]["device"] = target_value
else:
print(
"Error engine_list, please set tts_online or tts_online-onnx"
)
else:
print("Error change_type, please set correct change_type.")
print(yaml.dump(y, default_flow_style=False, sort_keys=False))
yaml.dump(y, fw, allow_unicode=True)
os.system("rm %s" % (tmp_yamlfile))
print(f"Change key: {target_key} to value: {target_value} successfully.")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--config_file',
type=str,
default='./conf/application.yaml',
help='server yaml file.')
parser.add_argument(
'--change_type',
type=str,
default="model",
choices=["model", "protocol", "engine_type", "device"],
help='change protocol', )
parser.add_argument(
'--engine_type',
type=str,
default="tts_online",
help='engine type',
choices=["tts_online", "tts_online-onnx"])
parser.add_argument(
'--target_key',
type=str,
default=None,
help='Change key',
required=True)
parser.add_argument(
'--target_value',
type=str,
default=None,
help='target value',
required=True)
args = parser.parse_args()
change_value(args)
"""
if args.change_type == "model":
change_value(args.config_file, args.target_key, args.target_value, args.engine)
elif args.change_type == "protocol":
change_protocol(args.config_file, args.target_key, args.target_value)
else:
print("Please set correct change type, model or protocol")
"""
# This is the parameter configuration file for PaddleSpeech Serving.
#################################################################################
# SERVER SETTING #
#################################################################################
host: 127.0.0.1
port: 8092
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['tts_online', 'tts_online-onnx']
# protocol = ['websocket', 'http'] (only one can be selected).
protocol: 'http'
engine_list: ['tts_online']
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online:
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
am: 'fastspeech2_cnndecoder_csmsc'
am_config:
am_ckpt:
am_stat:
phones_dict:
tones_dict:
speaker_dict:
spk_id: 0
# voc (vocoder) choices=['mb_melgan_csmsc', 'hifigan_csmsc']
voc: 'mb_melgan_csmsc'
voc_config:
voc_ckpt:
voc_stat:
# others
lang: 'zh'
device: 'cpu' # set 'gpu:id' or 'cpu'
am_block: 42
am_pad: 12
voc_block: 14
voc_pad: 14
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx:
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
am: 'fastspeech2_cnndecoder_csmsc_onnx'
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
am_ckpt: # list
am_stat:
phones_dict:
tones_dict:
speaker_dict:
spk_id: 0
am_sample_rate: 24000
am_sess_conf:
device: "cpu" # set 'gpu:id' or 'cpu'
use_trt: False
cpu_threads: 1
# voc (vocoder) choices=['mb_melgan_csmsc_onnx', 'hifigan_csmsc_onnx']
voc: 'mb_melgan_csmsc_onnx'
voc_ckpt:
voc_sample_rate: 24000
voc_sess_conf:
device: "cpu" # set 'gpu:id' or 'cpu'
use_trt: False
cpu_threads: 1
# others
lang: 'zh'
am_block: 42
am_pad: 12
voc_block: 14
voc_pad: 14
voc_upsample: 300
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import base64
import json
import os
import time
import requests
from paddlespeech.server.utils.audio_process import pcm2wav
def save_audio(buffer, audio_path) -> bool:
if args.save_path.endswith("pcm"):
with open(args.save_path, "wb") as f:
f.write(buffer)
elif args.save_path.endswith("wav"):
with open("./tmp.pcm", "wb") as f:
f.write(buffer)
pcm2wav("./tmp.pcm", audio_path, channels=1, bits=16, sample_rate=24000)
os.system("rm ./tmp.pcm")
else:
print("Only supports saved audio format is pcm or wav")
return False
return True
def test(args):
params = {
"text": args.text,
"spk_id": args.spk_id,
"speed": args.speed,
"volume": args.volume,
"sample_rate": args.sample_rate,
"save_path": ''
}
buffer = b''
flag = 1
url = "http://" + str(args.server) + ":" + str(
args.port) + "/paddlespeech/streaming/tts"
st = time.time()
html = requests.post(url, json.dumps(params), stream=True)
for chunk in html.iter_content(chunk_size=1024):
chunk = base64.b64decode(chunk) # bytes
if flag:
first_response = time.time() - st
print(f"首包响应:{first_response} s")
flag = 0
buffer += chunk
final_response = time.time() - st
duration = len(buffer) / 2.0 / 24000
print(f"尾包响应:{final_response} s")
print(f"音频时长:{duration} s")
print(f"RTF: {final_response / duration}")
if args.save_path is not None:
if save_audio(buffer, args.save_path):
print("音频保存至:", args.save_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--text',
type=str,
default="您好,欢迎使用语音合成服务。",
help='A sentence to be synthesized')
parser.add_argument('--spk_id', type=int, default=0, help='Speaker id')
parser.add_argument('--speed', type=float, default=1.0, help='Audio speed')
parser.add_argument(
'--volume', type=float, default=1.0, help='Audio volume')
parser.add_argument(
'--sample_rate',
type=int,
default=0,
help='Sampling rate, the default is the same as the model')
parser.add_argument(
"--server", type=str, help="server ip", default="127.0.0.1")
parser.add_argument("--port", type=int, help="server port", default=8092)
parser.add_argument(
"--save_path", type=str, help="save audio path", default=None)
args = parser.parse_args()
test(args)
#!/bin/bash
# bash test.sh
StartService(){
# Start service
paddlespeech_server start --config_file $config_file 1>>$log/server.log 2>>$log/server.log.wf &
echo $! > pid
start_num=$(cat $log/server.log.wf | grep "INFO: Uvicorn running on http://" -c)
flag="normal"
while [[ $start_num -lt $target_start_num && $flag == "normal" ]]
do
start_num=$(cat $log/server.log.wf | grep "INFO: Uvicorn running on http://" -c)
# start service failed
if [ $(cat $log/server.log.wf | grep -i "Failed to warm up on tts engine." -c) -gt $error_time ];then
echo "Service started failed." | tee -a $log/test_result.log
error_time=$(cat $log/server.log.wf | grep -i "Failed to warm up on tts engine." -c)
flag="unnormal"
elif [ $(cat $log/server.log.wf | grep -i "AssertionError" -c) -gt $error_time ];then
echo "Service started failed." | tee -a $log/test_result.log
error_time+=$(cat $log/server.log.wf | grep -i "AssertionError" -c)
flag="unnormal"
fi
done
}
ClientTest_http(){
for ((i=1; i<=3;i++))
do
python http_client.py --save_path ./out_http.wav
((http_test_times+=1))
done
}
ClientTest_ws(){
for ((i=1; i<=3;i++))
do
python ws_client.py
((ws_test_times+=1))
done
}
GetTestResult_http() {
# Determine if the test was successful
http_response_success_time=$(cat $log/server.log | grep "200 OK" -c)
if (( $http_response_success_time == $http_test_times )) ; then
echo "Testing successfully. $info" | tee -a $log/test_result.log
else
echo "Testing failed. $info" | tee -a $log/test_result.log
fi
http_test_times=$http_response_success_time
}
GetTestResult_ws() {
# Determine if the test was successful
ws_response_success_time=$(cat $log/server.log.wf | grep "Complete the transmission of audio streams" -c)
if (( $ws_response_success_time == $ws_test_times )) ; then
echo "Testing successfully. $info" | tee -a $log/test_result.log
else
echo "Testing failed. $info" | tee -a $log/test_result.log
fi
ws_test_times=$ws_response_success_time
}
engine_type=$1
log=$2
mkdir -p $log
rm -rf $log/server.log.wf
rm -rf $log/server.log
rm -rf $log/test_result.log
config_file=./conf/application.yaml
server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}')
port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}')
echo "Sevice ip: $server_ip" | tee $log/test_result.log
echo "Sevice port: $port" | tee -a $log/test_result.log
# whether a process is listening on $port
pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'`
if [ "$pid" != "" ]; then
echo "The port: $port is occupied, please change another port"
exit
fi
target_start_num=0 # the number of start service
test_times=0 # The number of client test
error_time=0 # The number of error occurrences in the startup failure server.log.wf file
# start server: engine: tts_online, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc
info="start server: engine: $engine_type, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc."
echo "$info" | tee -a $log/test_result.log
((target_start_num+=1))
StartService
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
echo "Service started successfully." | tee -a $log/test_result.log
ClientTest_http
echo "This round of testing is over." | tee -a $log/test_result.log
GetTestResult_http
else
echo "Service failed to start, no client test."
target_start_num=$start_num
fi
kill -9 `cat pid`
rm -rf pid
sleep 2s
echo "**************************************************************************************" | tee -a $log/test_result.log
python change_yaml.py --engine_type $engine_type --target_key voc --target_value hifigan_csmsc # change voc: mb_melgan_csmsc -> hifigan_csmsc
# start server: engine: tts_online, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc
info="start server: engine: $engine_type, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc."
echo "$info" | tee -a $log/test_result.log
((target_start_num+=1))
StartService
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
echo "Service started successfully." | tee -a $log/test_result.log
ClientTest_http
echo "This round of testing is over." | tee -a $log/test_result.log
GetTestResult_http
else
echo "Service failed to start, no client test."
target_start_num=$start_num
fi
kill -9 `cat pid`
rm -rf pid
sleep 2s
echo "**************************************************************************************" | tee -a $log/test_result.log
python change_yaml.py --engine_type $engine_type --target_key am --target_value fastspeech2_csmsc # change am: fastspeech2_cnndecoder_csmsc -> fastspeech2_csmsc
# start server: engine: tts_online, protocol: http, am: fastspeech2_csmsc, voc: hifigan_csmsc
info="start server: engine: $engine_type, protocol: http, am: fastspeech2_csmsc, voc: hifigan_csmsc."
echo "$info" | tee -a $log/test_result.log
((target_start_num+=1))
StartService
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
echo "Service started successfully." | tee -a $log/test_result.log
ClientTest_http
echo "This round of testing is over." | tee -a $log/test_result.log
GetTestResult_http
else
echo "Service failed to start, no client test."
target_start_num=$start_num
fi
kill -9 `cat pid`
rm -rf pid
sleep 2s
echo "**************************************************************************************" | tee -a $log/test_result.log
python change_yaml.py --engine_type $engine_type --target_key voc --target_value mb_melgan_csmsc # change voc: hifigan_csmsc -> mb_melgan_csmsc
# start server: engine: tts_online, protocol: http, am: fastspeech2_csmsc, voc: mb_melgan_csmsc
info="start server: engine: $engine_type, protocol: http, am: fastspeech2_csmsc, voc: mb_melgan_csmsc."
echo "$info" | tee -a $log/test_result.log
((target_start_num+=1))
StartService
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
echo "Service started successfully." | tee -a $log/test_result.log
ClientTest_http
echo "This round of testing is over." | tee -a $log/test_result.log
GetTestResult_http
else
echo "Service failed to start, no client test."
target_start_num=$start_num
fi
kill -9 `cat pid`
rm -rf pid
sleep 2s
echo "**************************************************************************************" | tee -a $log/test_result.log
echo "********************************************* websocket **********************************************************"
python change_yaml.py --engine_type $engine_type --change_type protocol --target_key protocol --target_value websocket
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_csmsc, voc: mb_melgan_csmsc
info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_csmsc, voc: mb_melgan_csmsc."
echo "$info" | tee -a $log/test_result.log
((target_start_num+=1))
StartService
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
echo "Service started successfully." | tee -a $log/test_result.log
ClientTest_ws
echo "This round of testing is over." | tee -a $log/test_result.log
GetTestResult_ws
else
echo "Service failed to start, no client test."
target_start_num=$start_num
fi
kill -9 `cat pid`
rm -rf pid
sleep 2s
echo "**************************************************************************************" | tee -a $log/test_result.log
python change_yaml.py --engine_type $engine_type --target_key voc --target_value hifigan_csmsc # change voc: mb_melgan_csmsc -> hifigan_csmsc
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_csmsc, voc: hifigan_csmsc
info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_csmsc, voc: hifigan_csmsc."
echo "$info" | tee -a $log/test_result.log
((target_start_num+=1))
StartService
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
echo "Service started successfully." | tee -a $log/test_result.log
ClientTest_ws
echo "This round of testing is over." | tee -a $log/test_result.log
GetTestResult_ws
else
echo "Service failed to start, no client test."
target_start_num=$start_num
fi
kill -9 `cat pid`
rm -rf pid
sleep 2s
echo "**************************************************************************************" | tee -a $log/test_result.log
python change_yaml.py --engine_type $engine_type --target_key am --target_value fastspeech2_cnndecoder_csmsc # change am: fastspeech2_csmsc -> fastspeech2_cnndecoder_csmsc
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc
info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc."
echo "$info" | tee -a $log/test_result.log
((target_start_num+=1))
StartService
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
echo "Service started successfully." | tee -a $log/test_result.log
ClientTest_ws
echo "This round of testing is over." | tee -a $log/test_result.log
GetTestResult_ws
else
echo "Service failed to start, no client test."
target_start_num=$start_num
fi
kill -9 `cat pid`
rm -rf pid
sleep 2s
echo "**************************************************************************************" | tee -a $log/test_result.log
python change_yaml.py --engine_type $engine_type --target_key voc --target_value mb_melgan_csmsc # change am: hifigan_csmsc -> mb_melgan_csmsc
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc
info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc."
echo "$info" | tee -a $log/test_result.log
((target_start_num+=1))
StartService
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
echo "Service started successfully." | tee -a $log/test_result.log
ClientTest_ws
echo "This round of testing is over." | tee -a $log/test_result.log
GetTestResult_ws
else
echo "Service failed to start, no client test."
target_start_num=$start_num
fi
kill -9 `cat pid`
rm -rf pid
sleep 2s
echo "**************************************************************************************" | tee -a $log/test_result.log
echo "All tests completed." | tee -a $log/test_result.log
# sohw all the test results
echo "***************** Here are all the test results ********************"
cat $log/test_result.log
# Restoring conf is the same as demos/speech_server
cp ./tts_online_application.yaml ./conf/application.yaml -rf
sleep 2s
\ No newline at end of file
#!/bin/bash
# bash test_all.sh
log_all_dir=./log
bash test.sh tts_online $log_all_dir/log_tts_online_cpu
python change_yaml.py --change_type engine_type --target_key engine_list --target_value tts_online-onnx
bash test.sh tts_online-onnx $log_all_dir/log_tts_online-onnx_cpu
python change_yaml.py --change_type device --target_key device --target_value gpu:3
bash test.sh tts_online $log_all_dir/log_tts_online_gpu
python change_yaml.py --change_type engine_type --target_key engine_list --target_value tts_online-onnx
python change_yaml.py --change_type device --target_key device --target_value gpu:3
bash test.sh tts_online-onnx $log_all_dir/log_tts_online-onnx_gpu
echo "************************************** show all test results ****************************************"
cat $log_all_dir/log_tts_online_cpu/test_result.log
cat $log_all_dir/log_tts_online-onnx_cpu/test_result.log
cat $log_all_dir/log_tts_online_gpu/test_result.log
cat $log_all_dir/log_tts_online-onnx_gpu/test_result.log
# This is the parameter configuration file for PaddleSpeech Serving.
#################################################################################
# SERVER SETTING #
#################################################################################
host: 127.0.0.1
port: 8092
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['tts_online', 'tts_online-onnx']
# protocol = ['websocket', 'http'] (only one can be selected).
protocol: 'http'
engine_list: ['tts_online']
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online:
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
am: 'fastspeech2_cnndecoder_csmsc'
am_config:
am_ckpt:
am_stat:
phones_dict:
tones_dict:
speaker_dict:
spk_id: 0
# voc (vocoder) choices=['mb_melgan_csmsc', 'hifigan_csmsc']
voc: 'mb_melgan_csmsc'
voc_config:
voc_ckpt:
voc_stat:
# others
lang: 'zh'
device: 'cpu' # set 'gpu:id' or 'cpu'
am_block: 42
am_pad: 12
voc_block: 14
voc_pad: 14
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx:
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
am: 'fastspeech2_cnndecoder_csmsc_onnx'
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
am_ckpt: # list
am_stat:
phones_dict:
tones_dict:
speaker_dict:
spk_id: 0
am_sample_rate: 24000
am_sess_conf:
device: "cpu" # set 'gpu:id' or 'cpu'
use_trt: False
cpu_threads: 1
# voc (vocoder) choices=['mb_melgan_csmsc_onnx', 'hifigan_csmsc_onnx']
voc: 'mb_melgan_csmsc_onnx'
voc_ckpt:
voc_sample_rate: 24000
voc_sess_conf:
device: "cpu" # set 'gpu:id' or 'cpu'
use_trt: False
cpu_threads: 1
# others
lang: 'zh'
am_block: 42
am_pad: 12
voc_block: 14
voc_pad: 14
voc_upsample: 300
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import _thread as thread
import argparse
import base64
import json
import ssl
import time
import websocket
flag = 1
st = 0.0
all_bytes = b''
class WsParam(object):
# 初始化
def __init__(self, text, server="127.0.0.1", port=8090):
self.server = server
self.port = port
self.url = "ws://" + self.server + ":" + str(self.port) + "/ws/tts"
self.text = text
# 生成url
def create_url(self):
return self.url
def on_message(ws, message):
global flag
global st
global all_bytes
try:
message = json.loads(message)
audio = message["audio"]
audio = base64.b64decode(audio) # bytes
status = message["status"]
all_bytes += audio
if status == 0:
print("create successfully.")
elif status == 1:
if flag:
print(f"首包响应:{time.time() - st} s")
flag = 0
elif status == 2:
final_response = time.time() - st
duration = len(all_bytes) / 2.0 / 24000
print(f"尾包响应:{final_response} s")
print(f"音频时长:{duration} s")
print(f"RTF: {final_response / duration}")
with open("./out.pcm", "wb") as f:
f.write(all_bytes)
print("ws is closed")
ws.close()
else:
print("infer error")
except Exception as e:
print("receive msg,but parse exception:", e)
# 收到websocket错误的处理
def on_error(ws, error):
print("### error:", error)
# 收到websocket关闭的处理
def on_close(ws):
print("### closed ###")
# 收到websocket连接建立的处理
def on_open(ws):
def run(*args):
global st
text_base64 = str(
base64.b64encode((wsParam.text).encode('utf-8')), "UTF8")
d = {"text": text_base64}
d = json.dumps(d)
print("Start sending text data")
st = time.time()
ws.send(d)
thread.start_new_thread(run, ())
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--text",
type=str,
help="A sentence to be synthesized",
default="您好,欢迎使用语音合成服务。")
parser.add_argument(
"--server", type=str, help="server ip", default="127.0.0.1")
parser.add_argument("--port", type=int, help="server port", default=8092)
args = parser.parse_args()
print("***************************************")
print("Server ip: ", args.server)
print("Server port: ", args.port)
print("Sentence to be synthesized: ", args.text)
print("***************************************")
wsParam = WsParam(text=args.text, server=args.server, port=args.port)
websocket.enableTrace(False)
wsUrl = wsParam.create_url()
ws = websocket.WebSocketApp(
wsUrl, on_message=on_message, on_error=on_error, on_close=on_close)
ws.on_open = on_open
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册