add onnx tts engine, test=doc

4e9db4ff · lym0302 · 9e41ac85 · 4e9db4ff · 4e9db4ff · 4e9db4ff
19 changed file
--- a/paddlespeech/server/conf/tts_online_application.yaml
+++ b/paddlespeech/server/conf/tts_online_application.yaml
@@ -7,7 +7,7 @@ host: 127.0.0.1
 port: 8092

 # The task format in the engin_list is: <speech task>_<engine type>
-# task choices = ['asr_online', 'tts_online']
+# task choices = ['tts_online', 'tts_online-onnx']
 # protocol = ['websocket', 'http'] (only one can be selected).
 protocol: 'http'
 engine_list: ['tts_online']
@@ -20,8 +20,8 @@ engine_list: ['tts_online']
 ################################### TTS #########################################
 ################### speech task: tts; engine_type: online #######################
 tts_online: 
-    # am (acoustic model) choices=['fastspeech2_csmsc']        
-    am: 'fastspeech2_csmsc'   
+    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']        
+    am: 'fastspeech2_cnndecoder_csmsc'   
    am_config: 
    am_ckpt: 
    am_stat: 
@@ -30,7 +30,7 @@ tts_online:
    speaker_dict: 
    spk_id: 0

-    # voc (vocoder) choices=['mb_melgan_csmsc']
+    # voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
    voc: 'mb_melgan_csmsc'
    voc_config: 
    voc_ckpt: 
@@ -38,9 +38,51 @@ tts_online:

    # others
    lang: 'zh'
-    device:  # set 'gpu:id' or 'cpu'
+    device: 'cpu' # set 'gpu:id' or 'cpu'
    am_block: 42
    am_pad: 12
    voc_block: 14
    voc_pad: 14
    
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### TTS #########################################
+################### speech task: tts; engine_type: online-onnx #######################
+tts_online-onnx: 
+    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']        
+    am: 'fastspeech2_cnndecoder_csmsc_onnx' 
+    # am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
+    # if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
+    am_ckpt:   # list
+    am_stat: 
+    phones_dict: 
+    tones_dict: 
+    speaker_dict: 
+    spk_id: 0
+    am_sample_rate: 24000
+    am_sess_conf:
+        device: "cpu" # set 'gpu:id' or 'cpu'
+        use_trt: False
+        cpu_threads: 1
+
+    # voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
+    voc: 'mb_melgan_csmsc_onnx'
+    voc_ckpt: 
+    voc_sample_rate: 24000
+    voc_sess_conf:
+        device: "cpu" # set 'gpu:id' or 'cpu'
+        use_trt: False
+        cpu_threads: 1
+
+    # others
+    lang: 'zh'
+    am_block: 42
+    am_pad: 12
+    voc_block: 14
+    voc_pad: 14
+    voc_upsample: 300
+    
--- a/paddlespeech/server/engine/engine_factory.py
+++ b/paddlespeech/server/engine/engine_factory.py
@@ -35,7 +35,10 @@ class EngineFactory(object):
            from paddlespeech.server.engine.tts.python.tts_engine import TTSEngine
            return TTSEngine()
        elif engine_name == 'tts' and engine_type == 'online':
-            from paddlespeech.server.engine.tts.online.tts_engine import TTSEngine
+            from paddlespeech.server.engine.tts.online.python.tts_engine import TTSEngine
+            return TTSEngine()
+        elif engine_name == 'tts' and engine_type == 'online-onnx':
+            from paddlespeech.server.engine.tts.online.onnx.tts_engine import TTSEngine
            return TTSEngine()
        elif engine_name == 'cls' and engine_type == 'inference':
            from paddlespeech.server.engine.cls.paddleinference.cls_engine import CLSEngine

--- a/paddlespeech/server/engine/tts/online/onnx/__init__.py
+++ b/paddlespeech/server/engine/tts/online/onnx/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/paddlespeech/server/engine/tts/online/onnx/tts_engine.py
+++ b/paddlespeech/server/engine/tts/online/onnx/tts_engine.py
--- a/paddlespeech/server/engine/tts/online/python/__init__.py
+++ b/paddlespeech/server/engine/tts/online/python/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/paddlespeech/server/engine/tts/online/tts_engine.py
+++ b/paddlespeech/server/engine/tts/online/tts_engine.py
@@ -202,6 +202,7 @@ class TTSServerExecutor(TTSExecutor):
        """
        Init model and other resources from a specific path.
        """
+        #import pdb;pdb.set_trace()
        if hasattr(self, 'am_inference') and hasattr(self, 'voc_inference'):
            logger.info('Models had been initialized.')
            return
@@ -302,23 +303,6 @@ class TTSServerExecutor(TTSExecutor):
        self.voc_inference.eval()
        print("voc done!")

-    def get_phone(self, sentence, lang, merge_sentences, get_tone_ids):
-        tone_ids = None
-        if lang == 'zh':
-            input_ids = self.frontend.get_input_ids(
-                sentence,
-                merge_sentences=merge_sentences,
-                get_tone_ids=get_tone_ids)
-            phone_ids = input_ids["phone_ids"]
-            if get_tone_ids:
-                tone_ids = input_ids["tone_ids"]
-        elif lang == 'en':
-            input_ids = self.frontend.get_input_ids(
-                sentence, merge_sentences=merge_sentences)
-            phone_ids = input_ids["phone_ids"]
-        else:
-            print("lang should in {'zh', 'en'}!")
-
    def depadding(self, data, chunk_num, chunk_id, block, pad, upsample):
        """ 
        Streaming inference removes the result of pad inference
@@ -479,6 +463,7 @@ class TTSEngine(BaseEngine):
    def __init__(self, name=None):
        """Initialize TTS server engine
        """
+        #super(TTSEngine, self).__init__()
        super().__init__()

    def init(self, config: dict) -> bool:

--- a/paddlespeech/server/utils/onnx_infer.py
+++ b/paddlespeech/server/utils/onnx_infer.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from typing import Optional
+
+import onnxruntime as ort
+
+
+def get_sess(model_path: Optional[os.PathLike]=None, sess_conf: dict=None):
+    sess_options = ort.SessionOptions()
+    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+    sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
+
+    if "gpu" in sess_conf["device"]:
+        # fastspeech2/mb_melgan can't use trt now!
+        if sess_conf["use_trt"]:
+            providers = ['TensorrtExecutionProvider']
+        else:
+            providers = ['CUDAExecutionProvider']
+    elif sess_conf["device"] == "cpu":
+        providers = ['CPUExecutionProvider']
+    sess_options.intra_op_num_threads = sess_conf["cpu_threads"]
+    sess = ort.InferenceSession(
+        model_path, providers=providers, sess_options=sess_options)
+    return sess
--- a/paddlespeech/server/ws/tts_socket.py
+++ b/paddlespeech/server/ws/tts_socket.py
@@ -51,7 +51,6 @@ async def websocket_endpoint(websocket: WebSocket):
                tts_results = next(wav_generator)
                resp = {"status": 1, "audio": tts_results}
                await websocket.send_json(resp)
-                logger.info("streaming audio...")
            except StopIteration as e:
                resp = {"status": 2, "audio": ''}
                await websocket.send_json(resp)

--- a/setup.py
+++ b/setup.py
@@ -42,6 +42,7 @@ base = [
    "loguru",
    "matplotlib",
    "nara_wpe",
+    "onnxruntime",
    "pandas",
    "paddleaudio",
    "paddlenlp",
@@ -64,12 +65,16 @@ base = [
    "webrtcvad",
    "yacs~=0.1.8",
    "prettytable",
+    "zhon",
 ]

 server = [
    "fastapi",
    "uvicorn",
    "pattern_singleton",
+    "websockets",
+    "websocket",
+    "websocket-client",
 ]

 requirements = {
@@ -90,7 +95,6 @@ requirements = {
        "unidecode",
        "yq",
        "pre-commit",
-        "zhon",
    ]
 }


--- a/tests/unit/server/change_yaml.py
+++ b/tests/unit/server/change_yaml.py
--- a/tests/unit/server/conf/application.yaml
+++ b/tests/unit/server/conf/application.yaml
--- a/tests/unit/server/test_server_client.sh
+++ b/tests/unit/server/test_server_client.sh
--- a/tests/unit/server/online/tts/change_yaml.py
+++ b/tests/unit/server/online/tts/change_yaml.py
+#!/usr/bin/python
+import argparse
+import os
+
+import yaml
+"""
+def change_value1(yamlfile: str, target_key: str, target_value: str, engine: str="tts_online"):
+    tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
+    os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
+
+    with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
+        y = yaml.safe_load(f)
+        y[engine][target_key] = target_value
+
+        print(yaml.dump(y, default_flow_style=False, sort_keys=False))
+        yaml.dump(y, fw, allow_unicode=True)
+    os.system("rm %s" % (tmp_yamlfile))
+    print(f"Change key: {target_key} to value: {target_value} successfully.")
+
+def change_protocol(yamlfile: str, target_key: str, target_value: str):
+    tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
+    os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
+
+    with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
+        y = yaml.safe_load(f)
+        y[target_key] = target_value
+
+        print(yaml.dump(y, default_flow_style=False, sort_keys=False))
+        yaml.dump(y, fw, allow_unicode=True)
+    os.system("rm %s" % (tmp_yamlfile))
+    print(f"Change key: {target_key} to value: {target_value} successfully.")
+
+def change_engine_type(yamlfile: str, target_key: str, target_value: str):
+    tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
+    os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
+
+    with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
+        y = yaml.safe_load(f)
+        y[target_key] = [target_value]
+
+        print(yaml.dump(y, default_flow_style=False, sort_keys=False))
+        yaml.dump(y, fw, allow_unicode=True)
+    os.system("rm %s" % (tmp_yamlfile))
+    print(f"Change key: {target_key} to value: {target_value} successfully.")
+"""
+
+
+def change_value(args):
+    yamlfile = args.config_file
+    change_type = args.change_type
+    engine_type = args.engine_type
+    target_key = args.target_key
+    target_value = args.target_value
+
+    tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
+    os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
+
+    with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
+        y = yaml.safe_load(f)
+
+        if change_type == "model":
+            if engine_type == "tts_online-onnx":
+                target_value = target_value + "_onnx"
+            y[engine_type][target_key] = target_value
+        elif change_type == "protocol":
+            assert (target_key == "protocol" and (
+                target_value == "http" or target_value == "websocket"
+            )), "if change_type is protocol, target_key must be set protocol."
+            y[target_key] = target_value
+        elif change_type == "engine_type":
+            assert (
+                target_key == "engine_list"
+            ), "if change_type is engine_type, target_key must be set engine_list."
+            y[target_key] = [target_value]
+        elif change_type == "device":
+            assert (
+                target_key == "device"
+            ), "if change_type is device, target_key must be set device."
+            if y["engine_list"][0] == "tts_online":
+                y["tts_online"]["device"] = target_value
+            elif y["engine_list"][0] == "tts_online-onnx":
+                y["tts_online-onnx"]["am_sess_conf"]["device"] = target_value
+                y["tts_online-onnx"]["voc_sess_conf"]["device"] = target_value
+            else:
+                print(
+                    "Error engine_list, please set tts_online or tts_online-onnx"
+                )
+
+        else:
+            print("Error change_type, please set correct change_type.")
+
+        print(yaml.dump(y, default_flow_style=False, sort_keys=False))
+        yaml.dump(y, fw, allow_unicode=True)
+    os.system("rm %s" % (tmp_yamlfile))
+    print(f"Change key: {target_key} to value: {target_value} successfully.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--config_file',
+        type=str,
+        default='./conf/application.yaml',
+        help='server yaml file.')
+    parser.add_argument(
+        '--change_type',
+        type=str,
+        default="model",
+        choices=["model", "protocol", "engine_type", "device"],
+        help='change protocol', )
+    parser.add_argument(
+        '--engine_type',
+        type=str,
+        default="tts_online",
+        help='engine type',
+        choices=["tts_online", "tts_online-onnx"])
+    parser.add_argument(
+        '--target_key',
+        type=str,
+        default=None,
+        help='Change key',
+        required=True)
+    parser.add_argument(
+        '--target_value',
+        type=str,
+        default=None,
+        help='target value',
+        required=True)
+
+    args = parser.parse_args()
+
+    change_value(args)
+    """
+    if args.change_type == "model":
+        change_value(args.config_file, args.target_key, args.target_value, args.engine)
+    elif args.change_type == "protocol":
+        change_protocol(args.config_file, args.target_key, args.target_value)
+    else:
+        print("Please set correct change type, model or protocol")
+    """
--- a/tests/unit/server/online/tts/conf/application.yaml
+++ b/tests/unit/server/online/tts/conf/application.yaml
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 127.0.0.1
+port: 8092
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# task choices = ['tts_online', 'tts_online-onnx']
+# protocol = ['websocket', 'http'] (only one can be selected).
+protocol: 'http'
+engine_list: ['tts_online']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### TTS #########################################
+################### speech task: tts; engine_type: online #######################
+tts_online: 
+    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']        
+    am: 'fastspeech2_cnndecoder_csmsc'   
+    am_config: 
+    am_ckpt: 
+    am_stat: 
+    phones_dict: 
+    tones_dict: 
+    speaker_dict: 
+    spk_id: 0
+
+    # voc (vocoder) choices=['mb_melgan_csmsc', 'hifigan_csmsc']
+    voc: 'mb_melgan_csmsc'
+    voc_config: 
+    voc_ckpt: 
+    voc_stat: 
+
+    # others
+    lang: 'zh'
+    device: 'cpu' # set 'gpu:id' or 'cpu'
+    am_block: 42
+    am_pad: 12
+    voc_block: 14
+    voc_pad: 14
+    
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### TTS #########################################
+################### speech task: tts; engine_type: online-onnx #######################
+tts_online-onnx: 
+    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']        
+    am: 'fastspeech2_cnndecoder_csmsc_onnx' 
+    # am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
+    # if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
+    am_ckpt:   # list
+    am_stat: 
+    phones_dict: 
+    tones_dict: 
+    speaker_dict: 
+    spk_id: 0
+    am_sample_rate: 24000
+    am_sess_conf:
+        device: "cpu" # set 'gpu:id' or 'cpu'
+        use_trt: False
+        cpu_threads: 1
+
+    # voc (vocoder) choices=['mb_melgan_csmsc_onnx', 'hifigan_csmsc_onnx']
+    voc: 'mb_melgan_csmsc_onnx'
+    voc_ckpt: 
+    voc_sample_rate: 24000
+    voc_sess_conf:
+        device: "cpu" # set 'gpu:id' or 'cpu'
+        use_trt: False
+        cpu_threads: 1
+
+    # others
+    lang: 'zh'
+    am_block: 42
+    am_pad: 12
+    voc_block: 14
+    voc_pad: 14
+    voc_upsample: 300
+    
--- a/tests/unit/server/online/tts/http_client.py
+++ b/tests/unit/server/online/tts/http_client.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import base64
+import json
+import os
+import time
+
+import requests
+
+from paddlespeech.server.utils.audio_process import pcm2wav
+
+
+def save_audio(buffer, audio_path) -> bool:
+    if args.save_path.endswith("pcm"):
+        with open(args.save_path, "wb") as f:
+            f.write(buffer)
+    elif args.save_path.endswith("wav"):
+        with open("./tmp.pcm", "wb") as f:
+            f.write(buffer)
+        pcm2wav("./tmp.pcm", audio_path, channels=1, bits=16, sample_rate=24000)
+        os.system("rm ./tmp.pcm")
+    else:
+        print("Only supports saved audio format is pcm or wav")
+        return False
+
+    return True
+
+
+def test(args):
+    params = {
+        "text": args.text,
+        "spk_id": args.spk_id,
+        "speed": args.speed,
+        "volume": args.volume,
+        "sample_rate": args.sample_rate,
+        "save_path": ''
+    }
+
+    buffer = b''
+    flag = 1
+    url = "http://" + str(args.server) + ":" + str(
+        args.port) + "/paddlespeech/streaming/tts"
+    st = time.time()
+    html = requests.post(url, json.dumps(params), stream=True)
+    for chunk in html.iter_content(chunk_size=1024):
+        chunk = base64.b64decode(chunk)  # bytes
+        if flag:
+            first_response = time.time() - st
+            print(f"首包响应：{first_response} s")
+            flag = 0
+        buffer += chunk
+
+    final_response = time.time() - st
+    duration = len(buffer) / 2.0 / 24000
+
+    print(f"尾包响应：{final_response} s")
+    print(f"音频时长：{duration} s")
+    print(f"RTF: {final_response / duration}")
+
+    if args.save_path is not None:
+        if save_audio(buffer, args.save_path):
+            print("音频保存至：", args.save_path)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--text',
+        type=str,
+        default="您好，欢迎使用语音合成服务。",
+        help='A sentence to be synthesized')
+    parser.add_argument('--spk_id', type=int, default=0, help='Speaker id')
+    parser.add_argument('--speed', type=float, default=1.0, help='Audio speed')
+    parser.add_argument(
+        '--volume', type=float, default=1.0, help='Audio volume')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=0,
+        help='Sampling rate, the default is the same as the model')
+    parser.add_argument(
+        "--server", type=str, help="server ip", default="127.0.0.1")
+    parser.add_argument("--port", type=int, help="server port", default=8092)
+    parser.add_argument(
+        "--save_path", type=str, help="save audio path", default=None)
+
+    args = parser.parse_args()
+    test(args)
--- a/tests/unit/server/online/tts/test.sh
+++ b/tests/unit/server/online/tts/test.sh
+#!/bin/bash
+# bash test.sh
+
+StartService(){
+    # Start service 
+    paddlespeech_server start --config_file $config_file 1>>$log/server.log 2>>$log/server.log.wf &
+    echo $! > pid
+
+    start_num=$(cat $log/server.log.wf | grep "INFO:     Uvicorn running on http://" -c)
+    flag="normal"
+    while [[ $start_num -lt $target_start_num && $flag == "normal" ]]
+    do
+        start_num=$(cat $log/server.log.wf | grep "INFO:     Uvicorn running on http://" -c)
+        # start service failed
+        if [ $(cat $log/server.log.wf | grep -i "Failed to warm up on tts engine." -c) -gt $error_time ];then
+            echo "Service started failed."  | tee -a $log/test_result.log
+            error_time=$(cat $log/server.log.wf | grep -i "Failed to warm up on tts engine." -c)
+            flag="unnormal"
+
+        elif [ $(cat $log/server.log.wf | grep -i "AssertionError" -c) -gt $error_time ];then
+            echo "Service started failed."  | tee -a $log/test_result.log
+            error_time+=$(cat $log/server.log.wf | grep -i "AssertionError" -c)
+            flag="unnormal"
+        fi
+    done
+}
+
+ClientTest_http(){
+    for ((i=1; i<=3;i++))
+    do
+    python http_client.py --save_path ./out_http.wav 
+    ((http_test_times+=1))
+    done
+}
+
+ClientTest_ws(){
+    for ((i=1; i<=3;i++))
+    do
+    python ws_client.py
+    ((ws_test_times+=1))
+    done
+}
+
+GetTestResult_http() {
+    # Determine if the test was successful
+    http_response_success_time=$(cat $log/server.log | grep "200 OK" -c)
+    if (( $http_response_success_time == $http_test_times )) ; then
+        echo "Testing successfully. $info"  | tee -a $log/test_result.log
+    else
+        echo "Testing failed. $info" | tee -a $log/test_result.log
+    fi
+    http_test_times=$http_response_success_time
+}
+
+GetTestResult_ws() {
+    # Determine if the test was successful
+    ws_response_success_time=$(cat $log/server.log.wf | grep "Complete the transmission of audio streams" -c)
+    if (( $ws_response_success_time == $ws_test_times )) ; then
+        echo "Testing successfully. $info"  | tee -a $log/test_result.log
+    else
+        echo "Testing failed. $info" | tee -a $log/test_result.log
+    fi
+    ws_test_times=$ws_response_success_time
+}
+
+
+engine_type=$1
+log=$2
+mkdir -p $log
+rm -rf $log/server.log.wf 
+rm -rf $log/server.log
+rm -rf $log/test_result.log
+
+config_file=./conf/application.yaml
+server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}')
+port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}')
+
+echo "Sevice ip: $server_ip" | tee $log/test_result.log
+echo "Sevice port: $port" | tee -a $log/test_result.log
+
+# whether a process is listening on $port
+pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'`
+if [ "$pid" != "" ]; then
+    echo "The port: $port is occupied, please change another port"
+    exit
+fi
+
+
+
+target_start_num=0  # the number of start service
+test_times=0  # The number of client test
+error_time=0  # The number of error occurrences in the startup failure server.log.wf file
+
+# start server: engine: tts_online, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc
+info="start server: engine: $engine_type, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc."
+echo "$info"  | tee -a $log/test_result.log
+((target_start_num+=1))
+StartService
+
+if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
+    echo "Service started successfully."  | tee -a $log/test_result.log
+    ClientTest_http
+    echo "This round of testing is over."  | tee -a $log/test_result.log
+
+    GetTestResult_http
+else
+    echo "Service failed to start, no client test."
+    target_start_num=$start_num  
+
+fi
+
+kill -9 `cat pid`
+rm -rf pid
+sleep 2s
+echo "**************************************************************************************" | tee -a $log/test_result.log
+
+
+
+
+python change_yaml.py --engine_type $engine_type --target_key voc --target_value hifigan_csmsc    # change voc: mb_melgan_csmsc -> hifigan_csmsc
+# start server: engine: tts_online, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc
+info="start server: engine: $engine_type, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc."
+
+echo "$info"  | tee -a $log/test_result.log
+((target_start_num+=1))
+StartService
+
+if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
+    echo "Service started successfully."  | tee -a $log/test_result.log
+    ClientTest_http
+    echo "This round of testing is over."  | tee -a $log/test_result.log
+
+    GetTestResult_http
+else
+    echo "Service failed to start, no client test."
+    target_start_num=$start_num  
+
+fi
+
+kill -9 `cat pid`
+rm -rf pid
+sleep 2s
+echo "**************************************************************************************" | tee -a $log/test_result.log
+
+
+
+python change_yaml.py --engine_type $engine_type --target_key am --target_value fastspeech2_csmsc    # change am: fastspeech2_cnndecoder_csmsc -> fastspeech2_csmsc
+# start server: engine: tts_online, protocol: http, am: fastspeech2_csmsc, voc: hifigan_csmsc
+info="start server: engine: $engine_type, protocol: http, am: fastspeech2_csmsc, voc: hifigan_csmsc."
+
+echo "$info"  | tee -a $log/test_result.log
+((target_start_num+=1))
+StartService
+
+if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
+    echo "Service started successfully."  | tee -a $log/test_result.log
+    ClientTest_http
+    echo "This round of testing is over."  | tee -a $log/test_result.log
+
+    GetTestResult_http
+else
+    echo "Service failed to start, no client test."
+    target_start_num=$start_num  
+
+fi
+
+kill -9 `cat pid`
+rm -rf pid
+sleep 2s
+echo "**************************************************************************************" | tee -a $log/test_result.log
+
+
+python change_yaml.py --engine_type $engine_type  --target_key voc --target_value mb_melgan_csmsc    # change voc: hifigan_csmsc -> mb_melgan_csmsc
+# start server: engine: tts_online, protocol: http, am: fastspeech2_csmsc, voc: mb_melgan_csmsc
+info="start server: engine: $engine_type, protocol: http, am: fastspeech2_csmsc, voc: mb_melgan_csmsc."
+
+echo "$info"  | tee -a $log/test_result.log
+((target_start_num+=1))
+StartService
+
+if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
+    echo "Service started successfully."  | tee -a $log/test_result.log
+    ClientTest_http
+    echo "This round of testing is over."  | tee -a $log/test_result.log
+
+    GetTestResult_http
+else
+    echo "Service failed to start, no client test."
+    target_start_num=$start_num  
+    
+fi
+
+kill -9 `cat pid`
+rm -rf pid
+sleep 2s
+echo "**************************************************************************************" | tee -a $log/test_result.log
+
+
+echo "********************************************* websocket **********************************************************"
+
+python change_yaml.py --engine_type $engine_type --change_type protocol --target_key protocol --target_value websocket
+# start server: engine: tts_online, protocol: websocket, am: fastspeech2_csmsc, voc: mb_melgan_csmsc
+info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_csmsc, voc: mb_melgan_csmsc."
+
+echo "$info"  | tee -a $log/test_result.log
+((target_start_num+=1))
+StartService
+
+if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
+    echo "Service started successfully."  | tee -a $log/test_result.log
+    ClientTest_ws
+    echo "This round of testing is over."  | tee -a $log/test_result.log
+
+    GetTestResult_ws
+else
+    echo "Service failed to start, no client test."
+    target_start_num=$start_num  
+    
+fi
+
+kill -9 `cat pid`
+rm -rf pid
+sleep 2s
+echo "**************************************************************************************" | tee -a $log/test_result.log
+
+python change_yaml.py --engine_type $engine_type --target_key voc --target_value hifigan_csmsc    # change voc: mb_melgan_csmsc -> hifigan_csmsc
+# start server: engine: tts_online, protocol: websocket, am: fastspeech2_csmsc, voc: hifigan_csmsc
+info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_csmsc, voc: hifigan_csmsc."
+
+echo "$info"  | tee -a $log/test_result.log
+((target_start_num+=1))
+StartService
+
+if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
+    echo "Service started successfully."  | tee -a $log/test_result.log
+    ClientTest_ws
+    echo "This round of testing is over."  | tee -a $log/test_result.log
+
+    GetTestResult_ws
+else
+    echo "Service failed to start, no client test."
+    target_start_num=$start_num  
+
+fi
+
+kill -9 `cat pid`
+rm -rf pid
+sleep 2s
+echo "**************************************************************************************" | tee -a $log/test_result.log
+
+
+python change_yaml.py --engine_type $engine_type --target_key am --target_value fastspeech2_cnndecoder_csmsc    # change am: fastspeech2_csmsc -> fastspeech2_cnndecoder_csmsc
+# start server: engine: tts_online, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc
+info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc."
+
+echo "$info"  | tee -a $log/test_result.log
+((target_start_num+=1))
+StartService
+
+if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
+    echo "Service started successfully."  | tee -a $log/test_result.log
+    ClientTest_ws
+    echo "This round of testing is over."  | tee -a $log/test_result.log
+
+    GetTestResult_ws
+else
+    echo "Service failed to start, no client test."
+    target_start_num=$start_num  
+
+fi
+
+kill -9 `cat pid`
+rm -rf pid
+sleep 2s
+echo "**************************************************************************************" | tee -a $log/test_result.log
+
+
+
+python change_yaml.py --engine_type $engine_type --target_key voc --target_value mb_melgan_csmsc    # change am: hifigan_csmsc -> mb_melgan_csmsc
+# start server: engine: tts_online, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc
+info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc."
+
+echo "$info"  | tee -a $log/test_result.log
+((target_start_num+=1))
+StartService
+
+if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
+    echo "Service started successfully."  | tee -a $log/test_result.log
+    ClientTest_ws
+    echo "This round of testing is over."  | tee -a $log/test_result.log
+
+    GetTestResult_ws
+else
+    echo "Service failed to start, no client test."
+    target_start_num=$start_num  
+
+fi
+
+kill -9 `cat pid`
+rm -rf pid
+sleep 2s
+echo "**************************************************************************************" | tee -a $log/test_result.log
+
+
+
+echo "All tests completed."  | tee -a $log/test_result.log
+
+
+# sohw all the test results
+echo "***************** Here are all the test results ********************"
+cat $log/test_result.log
+
+# Restoring conf is the same as demos/speech_server
+cp ./tts_online_application.yaml ./conf/application.yaml -rf
+sleep 2s
\ No newline at end of file
--- a/tests/unit/server/online/tts/test_all.sh
+++ b/tests/unit/server/online/tts/test_all.sh
+#!/bin/bash
+# bash test_all.sh
+
+log_all_dir=./log
+
+bash test.sh tts_online $log_all_dir/log_tts_online_cpu
+
+python change_yaml.py --change_type engine_type --target_key engine_list --target_value tts_online-onnx
+bash test.sh tts_online-onnx $log_all_dir/log_tts_online-onnx_cpu
+
+
+python change_yaml.py --change_type device --target_key device --target_value gpu:3
+bash test.sh tts_online $log_all_dir/log_tts_online_gpu
+
+python change_yaml.py --change_type engine_type --target_key engine_list --target_value tts_online-onnx
+python change_yaml.py --change_type device --target_key device --target_value gpu:3
+bash test.sh tts_online-onnx $log_all_dir/log_tts_online-onnx_gpu 
+
+echo "************************************** show all test results ****************************************"
+cat $log_all_dir/log_tts_online_cpu/test_result.log
+cat $log_all_dir/log_tts_online-onnx_cpu/test_result.log
+cat $log_all_dir/log_tts_online_gpu/test_result.log
+cat $log_all_dir/log_tts_online-onnx_gpu/test_result.log
--- a/tests/unit/server/online/tts/tts_online_application.yaml
+++ b/tests/unit/server/online/tts/tts_online_application.yaml
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 127.0.0.1
+port: 8092
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# task choices = ['tts_online', 'tts_online-onnx']
+# protocol = ['websocket', 'http'] (only one can be selected).
+protocol: 'http'
+engine_list: ['tts_online']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### TTS #########################################
+################### speech task: tts; engine_type: online #######################
+tts_online: 
+    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']        
+    am: 'fastspeech2_cnndecoder_csmsc'   
+    am_config: 
+    am_ckpt: 
+    am_stat: 
+    phones_dict: 
+    tones_dict: 
+    speaker_dict: 
+    spk_id: 0
+
+    # voc (vocoder) choices=['mb_melgan_csmsc', 'hifigan_csmsc']
+    voc: 'mb_melgan_csmsc'
+    voc_config: 
+    voc_ckpt: 
+    voc_stat: 
+
+    # others
+    lang: 'zh'
+    device: 'cpu' # set 'gpu:id' or 'cpu'
+    am_block: 42
+    am_pad: 12
+    voc_block: 14
+    voc_pad: 14
+    
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### TTS #########################################
+################### speech task: tts; engine_type: online-onnx #######################
+tts_online-onnx: 
+    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']        
+    am: 'fastspeech2_cnndecoder_csmsc_onnx' 
+    # am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
+    # if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
+    am_ckpt:   # list
+    am_stat: 
+    phones_dict: 
+    tones_dict: 
+    speaker_dict: 
+    spk_id: 0
+    am_sample_rate: 24000
+    am_sess_conf:
+        device: "cpu" # set 'gpu:id' or 'cpu'
+        use_trt: False
+        cpu_threads: 1
+
+    # voc (vocoder) choices=['mb_melgan_csmsc_onnx', 'hifigan_csmsc_onnx']
+    voc: 'mb_melgan_csmsc_onnx'
+    voc_ckpt: 
+    voc_sample_rate: 24000
+    voc_sess_conf:
+        device: "cpu" # set 'gpu:id' or 'cpu'
+        use_trt: False
+        cpu_threads: 1
+
+    # others
+    lang: 'zh'
+    am_block: 42
+    am_pad: 12
+    voc_block: 14
+    voc_pad: 14
+    voc_upsample: 300
+    
--- a/tests/unit/server/online/tts/ws_client.py
+++ b/tests/unit/server/online/tts/ws_client.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import _thread as thread
+import argparse
+import base64
+import json
+import ssl
+import time
+
+import websocket
+
+flag = 1
+st = 0.0
+all_bytes = b''
+
+
+class WsParam(object):
+    # 初始化
+    def __init__(self, text, server="127.0.0.1", port=8090):
+        self.server = server
+        self.port = port
+        self.url = "ws://" + self.server + ":" + str(self.port) + "/ws/tts"
+        self.text = text
+
+    # 生成url
+    def create_url(self):
+        return self.url
+
+
+def on_message(ws, message):
+    global flag
+    global st
+    global all_bytes
+
+    try:
+        message = json.loads(message)
+        audio = message["audio"]
+        audio = base64.b64decode(audio)  # bytes
+        status = message["status"]
+        all_bytes += audio
+
+        if status == 0:
+            print("create successfully.")
+        elif status == 1:
+            if flag:
+                print(f"首包响应：{time.time() - st} s")
+                flag = 0
+        elif status == 2:
+            final_response = time.time() - st
+            duration = len(all_bytes) / 2.0 / 24000
+            print(f"尾包响应：{final_response} s")
+            print(f"音频时长：{duration} s")
+            print(f"RTF: {final_response / duration}")
+            with open("./out.pcm", "wb") as f:
+                f.write(all_bytes)
+            print("ws is closed")
+            ws.close()
+        else:
+            print("infer error")
+
+    except Exception as e:
+        print("receive msg,but parse exception:", e)
+
+
+# 收到websocket错误的处理
+def on_error(ws, error):
+    print("### error:", error)
+
+
+# 收到websocket关闭的处理
+def on_close(ws):
+    print("### closed ###")
+
+
+# 收到websocket连接建立的处理
+def on_open(ws):
+    def run(*args):
+        global st
+        text_base64 = str(
+            base64.b64encode((wsParam.text).encode('utf-8')), "UTF8")
+        d = {"text": text_base64}
+        d = json.dumps(d)
+        print("Start sending text data")
+        st = time.time()
+        ws.send(d)
+
+    thread.start_new_thread(run, ())
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--text",
+        type=str,
+        help="A sentence to be synthesized",
+        default="您好，欢迎使用语音合成服务。")
+    parser.add_argument(
+        "--server", type=str, help="server ip", default="127.0.0.1")
+    parser.add_argument("--port", type=int, help="server port", default=8092)
+    args = parser.parse_args()
+
+    print("***************************************")
+    print("Server ip: ", args.server)
+    print("Server port: ", args.port)
+    print("Sentence to be synthesized: ", args.text)
+    print("***************************************")
+
+    wsParam = WsParam(text=args.text, server=args.server, port=args.port)
+
+    websocket.enableTrace(False)
+    wsUrl = wsParam.create_url()
+    ws = websocket.WebSocketApp(
+        wsUrl, on_message=on_message, on_error=on_error, on_close=on_close)
+    ws.on_open = on_open
+    ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})