Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
4e9db4ff
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4e9db4ff
编写于
4月 21, 2022
作者:
L
lym0302
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add onnx tts engine, test=doc
上级
9e41ac85
变更
19
展开全部
隐藏空白更改
内联
并排
Showing
19 changed file
with
1582 addition
and
25 deletion
+1582
-25
paddlespeech/server/conf/tts_online_application.yaml
paddlespeech/server/conf/tts_online_application.yaml
+47
-5
paddlespeech/server/engine/engine_factory.py
paddlespeech/server/engine/engine_factory.py
+4
-1
paddlespeech/server/engine/tts/online/onnx/__init__.py
paddlespeech/server/engine/tts/online/onnx/__init__.py
+13
-0
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
+582
-0
paddlespeech/server/engine/tts/online/python/__init__.py
paddlespeech/server/engine/tts/online/python/__init__.py
+13
-0
paddlespeech/server/engine/tts/online/python/tts_engine.py
paddlespeech/server/engine/tts/online/python/tts_engine.py
+2
-17
paddlespeech/server/utils/onnx_infer.py
paddlespeech/server/utils/onnx_infer.py
+36
-0
paddlespeech/server/ws/tts_socket.py
paddlespeech/server/ws/tts_socket.py
+0
-1
setup.py
setup.py
+5
-1
tests/unit/server/offline/change_yaml.py
tests/unit/server/offline/change_yaml.py
+0
-0
tests/unit/server/offline/conf/application.yaml
tests/unit/server/offline/conf/application.yaml
+0
-0
tests/unit/server/offline/test_server_client.sh
tests/unit/server/offline/test_server_client.sh
+0
-0
tests/unit/server/online/tts/change_yaml.py
tests/unit/server/online/tts/change_yaml.py
+140
-0
tests/unit/server/online/tts/conf/application.yaml
tests/unit/server/online/tts/conf/application.yaml
+88
-0
tests/unit/server/online/tts/http_client.py
tests/unit/server/online/tts/http_client.py
+100
-0
tests/unit/server/online/tts/test.sh
tests/unit/server/online/tts/test.sh
+315
-0
tests/unit/server/online/tts/test_all.sh
tests/unit/server/online/tts/test_all.sh
+23
-0
tests/unit/server/online/tts/tts_online_application.yaml
tests/unit/server/online/tts/tts_online_application.yaml
+88
-0
tests/unit/server/online/tts/ws_client.py
tests/unit/server/online/tts/ws_client.py
+126
-0
未找到文件。
paddlespeech/server/conf/tts_online_application.yaml
浏览文件 @
4e9db4ff
...
...
@@ -7,7 +7,7 @@ host: 127.0.0.1
port
:
8092
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['
asr_online', 'tts_online
']
# task choices = ['
tts_online', 'tts_online-onnx
']
# protocol = ['websocket', 'http'] (only one can be selected).
protocol
:
'
http'
engine_list
:
[
'
tts_online'
]
...
...
@@ -20,8 +20,8 @@ engine_list: ['tts_online']
################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online
:
# am (acoustic model) choices=['fastspeech2_csmsc']
am
:
'
fastspeech2_csmsc'
# am (acoustic model) choices=['fastspeech2_csmsc'
, 'fastspeech2_cnndecoder_csmsc'
]
am
:
'
fastspeech2_c
nndecoder_c
smsc'
am_config
:
am_ckpt
:
am_stat
:
...
...
@@ -30,7 +30,7 @@ tts_online:
speaker_dict
:
spk_id
:
0
# voc (vocoder) choices=['mb_melgan_csmsc']
# voc (vocoder) choices=['mb_melgan_csmsc
, hifigan_csmsc
']
voc
:
'
mb_melgan_csmsc'
voc_config
:
voc_ckpt
:
...
...
@@ -38,9 +38,51 @@ tts_online:
# others
lang
:
'
zh'
device
:
# set 'gpu:id' or 'cpu'
device
:
'
cpu'
# set 'gpu:id' or 'cpu'
am_block
:
42
am_pad
:
12
voc_block
:
14
voc_pad
:
14
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx
:
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
am
:
'
fastspeech2_cnndecoder_csmsc_onnx'
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
am_ckpt
:
# list
am_stat
:
phones_dict
:
tones_dict
:
speaker_dict
:
spk_id
:
0
am_sample_rate
:
24000
am_sess_conf
:
device
:
"
cpu"
# set 'gpu:id' or 'cpu'
use_trt
:
False
cpu_threads
:
1
# voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
voc
:
'
mb_melgan_csmsc_onnx'
voc_ckpt
:
voc_sample_rate
:
24000
voc_sess_conf
:
device
:
"
cpu"
# set 'gpu:id' or 'cpu'
use_trt
:
False
cpu_threads
:
1
# others
lang
:
'
zh'
am_block
:
42
am_pad
:
12
voc_block
:
14
voc_pad
:
14
voc_upsample
:
300
paddlespeech/server/engine/engine_factory.py
浏览文件 @
4e9db4ff
...
...
@@ -35,7 +35,10 @@ class EngineFactory(object):
from
paddlespeech.server.engine.tts.python.tts_engine
import
TTSEngine
return
TTSEngine
()
elif
engine_name
==
'tts'
and
engine_type
==
'online'
:
from
paddlespeech.server.engine.tts.online.tts_engine
import
TTSEngine
from
paddlespeech.server.engine.tts.online.python.tts_engine
import
TTSEngine
return
TTSEngine
()
elif
engine_name
==
'tts'
and
engine_type
==
'online-onnx'
:
from
paddlespeech.server.engine.tts.online.onnx.tts_engine
import
TTSEngine
return
TTSEngine
()
elif
engine_name
==
'cls'
and
engine_type
==
'inference'
:
from
paddlespeech.server.engine.cls.paddleinference.cls_engine
import
CLSEngine
...
...
paddlespeech/server/engine/tts/online/onnx/__init__.py
0 → 100644
浏览文件 @
4e9db4ff
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
0 → 100644
浏览文件 @
4e9db4ff
此差异已折叠。
点击以展开。
paddlespeech/server/engine/tts/online/python/__init__.py
0 → 100644
浏览文件 @
4e9db4ff
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
paddlespeech/server/engine/tts/online/tts_engine.py
→
paddlespeech/server/engine/tts/online/
python/
tts_engine.py
浏览文件 @
4e9db4ff
...
...
@@ -202,6 +202,7 @@ class TTSServerExecutor(TTSExecutor):
"""
Init model and other resources from a specific path.
"""
#import pdb;pdb.set_trace()
if
hasattr
(
self
,
'am_inference'
)
and
hasattr
(
self
,
'voc_inference'
):
logger
.
info
(
'Models had been initialized.'
)
return
...
...
@@ -302,23 +303,6 @@ class TTSServerExecutor(TTSExecutor):
self
.
voc_inference
.
eval
()
print
(
"voc done!"
)
def
get_phone
(
self
,
sentence
,
lang
,
merge_sentences
,
get_tone_ids
):
tone_ids
=
None
if
lang
==
'zh'
:
input_ids
=
self
.
frontend
.
get_input_ids
(
sentence
,
merge_sentences
=
merge_sentences
,
get_tone_ids
=
get_tone_ids
)
phone_ids
=
input_ids
[
"phone_ids"
]
if
get_tone_ids
:
tone_ids
=
input_ids
[
"tone_ids"
]
elif
lang
==
'en'
:
input_ids
=
self
.
frontend
.
get_input_ids
(
sentence
,
merge_sentences
=
merge_sentences
)
phone_ids
=
input_ids
[
"phone_ids"
]
else
:
print
(
"lang should in {'zh', 'en'}!"
)
def
depadding
(
self
,
data
,
chunk_num
,
chunk_id
,
block
,
pad
,
upsample
):
"""
Streaming inference removes the result of pad inference
...
...
@@ -479,6 +463,7 @@ class TTSEngine(BaseEngine):
def
__init__
(
self
,
name
=
None
):
"""Initialize TTS server engine
"""
#super(TTSEngine, self).__init__()
super
().
__init__
()
def
init
(
self
,
config
:
dict
)
->
bool
:
...
...
paddlespeech/server/utils/onnx_infer.py
0 → 100644
浏览文件 @
4e9db4ff
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
from
typing
import
Optional
import
onnxruntime
as
ort
def
get_sess
(
model_path
:
Optional
[
os
.
PathLike
]
=
None
,
sess_conf
:
dict
=
None
):
sess_options
=
ort
.
SessionOptions
()
sess_options
.
graph_optimization_level
=
ort
.
GraphOptimizationLevel
.
ORT_ENABLE_ALL
sess_options
.
execution_mode
=
ort
.
ExecutionMode
.
ORT_SEQUENTIAL
if
"gpu"
in
sess_conf
[
"device"
]:
# fastspeech2/mb_melgan can't use trt now!
if
sess_conf
[
"use_trt"
]:
providers
=
[
'TensorrtExecutionProvider'
]
else
:
providers
=
[
'CUDAExecutionProvider'
]
elif
sess_conf
[
"device"
]
==
"cpu"
:
providers
=
[
'CPUExecutionProvider'
]
sess_options
.
intra_op_num_threads
=
sess_conf
[
"cpu_threads"
]
sess
=
ort
.
InferenceSession
(
model_path
,
providers
=
providers
,
sess_options
=
sess_options
)
return
sess
paddlespeech/server/ws/tts_socket.py
浏览文件 @
4e9db4ff
...
...
@@ -51,7 +51,6 @@ async def websocket_endpoint(websocket: WebSocket):
tts_results
=
next
(
wav_generator
)
resp
=
{
"status"
:
1
,
"audio"
:
tts_results
}
await
websocket
.
send_json
(
resp
)
logger
.
info
(
"streaming audio..."
)
except
StopIteration
as
e
:
resp
=
{
"status"
:
2
,
"audio"
:
''
}
await
websocket
.
send_json
(
resp
)
...
...
setup.py
浏览文件 @
4e9db4ff
...
...
@@ -42,6 +42,7 @@ base = [
"loguru"
,
"matplotlib"
,
"nara_wpe"
,
"onnxruntime"
,
"pandas"
,
"paddleaudio"
,
"paddlenlp"
,
...
...
@@ -64,12 +65,16 @@ base = [
"webrtcvad"
,
"yacs~=0.1.8"
,
"prettytable"
,
"zhon"
,
]
server
=
[
"fastapi"
,
"uvicorn"
,
"pattern_singleton"
,
"websockets"
,
"websocket"
,
"websocket-client"
,
]
requirements
=
{
...
...
@@ -90,7 +95,6 @@ requirements = {
"unidecode"
,
"yq"
,
"pre-commit"
,
"zhon"
,
]
}
...
...
tests/unit/server/change_yaml.py
→
tests/unit/server/
offline/
change_yaml.py
浏览文件 @
4e9db4ff
文件已移动
tests/unit/server/conf/application.yaml
→
tests/unit/server/
offline/
conf/application.yaml
浏览文件 @
4e9db4ff
文件已移动
tests/unit/server/test_server_client.sh
→
tests/unit/server/
offline/
test_server_client.sh
浏览文件 @
4e9db4ff
文件已移动
tests/unit/server/online/tts/change_yaml.py
0 → 100644
浏览文件 @
4e9db4ff
#!/usr/bin/python
import
argparse
import
os
import
yaml
"""
def change_value1(yamlfile: str, target_key: str, target_value: str, engine: str="tts_online"):
tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
y = yaml.safe_load(f)
y[engine][target_key] = target_value
print(yaml.dump(y, default_flow_style=False, sort_keys=False))
yaml.dump(y, fw, allow_unicode=True)
os.system("rm %s" % (tmp_yamlfile))
print(f"Change key: {target_key} to value: {target_value} successfully.")
def change_protocol(yamlfile: str, target_key: str, target_value: str):
tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
y = yaml.safe_load(f)
y[target_key] = target_value
print(yaml.dump(y, default_flow_style=False, sort_keys=False))
yaml.dump(y, fw, allow_unicode=True)
os.system("rm %s" % (tmp_yamlfile))
print(f"Change key: {target_key} to value: {target_value} successfully.")
def change_engine_type(yamlfile: str, target_key: str, target_value: str):
tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
y = yaml.safe_load(f)
y[target_key] = [target_value]
print(yaml.dump(y, default_flow_style=False, sort_keys=False))
yaml.dump(y, fw, allow_unicode=True)
os.system("rm %s" % (tmp_yamlfile))
print(f"Change key: {target_key} to value: {target_value} successfully.")
"""
def
change_value
(
args
):
yamlfile
=
args
.
config_file
change_type
=
args
.
change_type
engine_type
=
args
.
engine_type
target_key
=
args
.
target_key
target_value
=
args
.
target_value
tmp_yamlfile
=
yamlfile
.
split
(
".yaml"
)[
0
]
+
"_tmp.yaml"
os
.
system
(
"cp %s %s"
%
(
yamlfile
,
tmp_yamlfile
))
with
open
(
tmp_yamlfile
)
as
f
,
open
(
yamlfile
,
"w+"
,
encoding
=
"utf-8"
)
as
fw
:
y
=
yaml
.
safe_load
(
f
)
if
change_type
==
"model"
:
if
engine_type
==
"tts_online-onnx"
:
target_value
=
target_value
+
"_onnx"
y
[
engine_type
][
target_key
]
=
target_value
elif
change_type
==
"protocol"
:
assert
(
target_key
==
"protocol"
and
(
target_value
==
"http"
or
target_value
==
"websocket"
)),
"if change_type is protocol, target_key must be set protocol."
y
[
target_key
]
=
target_value
elif
change_type
==
"engine_type"
:
assert
(
target_key
==
"engine_list"
),
"if change_type is engine_type, target_key must be set engine_list."
y
[
target_key
]
=
[
target_value
]
elif
change_type
==
"device"
:
assert
(
target_key
==
"device"
),
"if change_type is device, target_key must be set device."
if
y
[
"engine_list"
][
0
]
==
"tts_online"
:
y
[
"tts_online"
][
"device"
]
=
target_value
elif
y
[
"engine_list"
][
0
]
==
"tts_online-onnx"
:
y
[
"tts_online-onnx"
][
"am_sess_conf"
][
"device"
]
=
target_value
y
[
"tts_online-onnx"
][
"voc_sess_conf"
][
"device"
]
=
target_value
else
:
print
(
"Error engine_list, please set tts_online or tts_online-onnx"
)
else
:
print
(
"Error change_type, please set correct change_type."
)
print
(
yaml
.
dump
(
y
,
default_flow_style
=
False
,
sort_keys
=
False
))
yaml
.
dump
(
y
,
fw
,
allow_unicode
=
True
)
os
.
system
(
"rm %s"
%
(
tmp_yamlfile
))
print
(
f
"Change key:
{
target_key
}
to value:
{
target_value
}
successfully."
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--config_file'
,
type
=
str
,
default
=
'./conf/application.yaml'
,
help
=
'server yaml file.'
)
parser
.
add_argument
(
'--change_type'
,
type
=
str
,
default
=
"model"
,
choices
=
[
"model"
,
"protocol"
,
"engine_type"
,
"device"
],
help
=
'change protocol'
,
)
parser
.
add_argument
(
'--engine_type'
,
type
=
str
,
default
=
"tts_online"
,
help
=
'engine type'
,
choices
=
[
"tts_online"
,
"tts_online-onnx"
])
parser
.
add_argument
(
'--target_key'
,
type
=
str
,
default
=
None
,
help
=
'Change key'
,
required
=
True
)
parser
.
add_argument
(
'--target_value'
,
type
=
str
,
default
=
None
,
help
=
'target value'
,
required
=
True
)
args
=
parser
.
parse_args
()
change_value
(
args
)
"""
if args.change_type == "model":
change_value(args.config_file, args.target_key, args.target_value, args.engine)
elif args.change_type == "protocol":
change_protocol(args.config_file, args.target_key, args.target_value)
else:
print("Please set correct change type, model or protocol")
"""
tests/unit/server/online/tts/conf/application.yaml
0 → 100644
浏览文件 @
4e9db4ff
# This is the parameter configuration file for PaddleSpeech Serving.
#################################################################################
# SERVER SETTING #
#################################################################################
host
:
127.0.0.1
port
:
8092
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['tts_online', 'tts_online-onnx']
# protocol = ['websocket', 'http'] (only one can be selected).
protocol
:
'
http'
engine_list
:
[
'
tts_online'
]
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online
:
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
am
:
'
fastspeech2_cnndecoder_csmsc'
am_config
:
am_ckpt
:
am_stat
:
phones_dict
:
tones_dict
:
speaker_dict
:
spk_id
:
0
# voc (vocoder) choices=['mb_melgan_csmsc', 'hifigan_csmsc']
voc
:
'
mb_melgan_csmsc'
voc_config
:
voc_ckpt
:
voc_stat
:
# others
lang
:
'
zh'
device
:
'
cpu'
# set 'gpu:id' or 'cpu'
am_block
:
42
am_pad
:
12
voc_block
:
14
voc_pad
:
14
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx
:
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
am
:
'
fastspeech2_cnndecoder_csmsc_onnx'
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
am_ckpt
:
# list
am_stat
:
phones_dict
:
tones_dict
:
speaker_dict
:
spk_id
:
0
am_sample_rate
:
24000
am_sess_conf
:
device
:
"
cpu"
# set 'gpu:id' or 'cpu'
use_trt
:
False
cpu_threads
:
1
# voc (vocoder) choices=['mb_melgan_csmsc_onnx', 'hifigan_csmsc_onnx']
voc
:
'
mb_melgan_csmsc_onnx'
voc_ckpt
:
voc_sample_rate
:
24000
voc_sess_conf
:
device
:
"
cpu"
# set 'gpu:id' or 'cpu'
use_trt
:
False
cpu_threads
:
1
# others
lang
:
'
zh'
am_block
:
42
am_pad
:
12
voc_block
:
14
voc_pad
:
14
voc_upsample
:
300
tests/unit/server/online/tts/http_client.py
0 → 100644
浏览文件 @
4e9db4ff
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
base64
import
json
import
os
import
time
import
requests
from
paddlespeech.server.utils.audio_process
import
pcm2wav
def
save_audio
(
buffer
,
audio_path
)
->
bool
:
if
args
.
save_path
.
endswith
(
"pcm"
):
with
open
(
args
.
save_path
,
"wb"
)
as
f
:
f
.
write
(
buffer
)
elif
args
.
save_path
.
endswith
(
"wav"
):
with
open
(
"./tmp.pcm"
,
"wb"
)
as
f
:
f
.
write
(
buffer
)
pcm2wav
(
"./tmp.pcm"
,
audio_path
,
channels
=
1
,
bits
=
16
,
sample_rate
=
24000
)
os
.
system
(
"rm ./tmp.pcm"
)
else
:
print
(
"Only supports saved audio format is pcm or wav"
)
return
False
return
True
def
test
(
args
):
params
=
{
"text"
:
args
.
text
,
"spk_id"
:
args
.
spk_id
,
"speed"
:
args
.
speed
,
"volume"
:
args
.
volume
,
"sample_rate"
:
args
.
sample_rate
,
"save_path"
:
''
}
buffer
=
b
''
flag
=
1
url
=
"http://"
+
str
(
args
.
server
)
+
":"
+
str
(
args
.
port
)
+
"/paddlespeech/streaming/tts"
st
=
time
.
time
()
html
=
requests
.
post
(
url
,
json
.
dumps
(
params
),
stream
=
True
)
for
chunk
in
html
.
iter_content
(
chunk_size
=
1024
):
chunk
=
base64
.
b64decode
(
chunk
)
# bytes
if
flag
:
first_response
=
time
.
time
()
-
st
print
(
f
"首包响应:
{
first_response
}
s"
)
flag
=
0
buffer
+=
chunk
final_response
=
time
.
time
()
-
st
duration
=
len
(
buffer
)
/
2.0
/
24000
print
(
f
"尾包响应:
{
final_response
}
s"
)
print
(
f
"音频时长:
{
duration
}
s"
)
print
(
f
"RTF:
{
final_response
/
duration
}
"
)
if
args
.
save_path
is
not
None
:
if
save_audio
(
buffer
,
args
.
save_path
):
print
(
"音频保存至:"
,
args
.
save_path
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--text'
,
type
=
str
,
default
=
"您好,欢迎使用语音合成服务。"
,
help
=
'A sentence to be synthesized'
)
parser
.
add_argument
(
'--spk_id'
,
type
=
int
,
default
=
0
,
help
=
'Speaker id'
)
parser
.
add_argument
(
'--speed'
,
type
=
float
,
default
=
1.0
,
help
=
'Audio speed'
)
parser
.
add_argument
(
'--volume'
,
type
=
float
,
default
=
1.0
,
help
=
'Audio volume'
)
parser
.
add_argument
(
'--sample_rate'
,
type
=
int
,
default
=
0
,
help
=
'Sampling rate, the default is the same as the model'
)
parser
.
add_argument
(
"--server"
,
type
=
str
,
help
=
"server ip"
,
default
=
"127.0.0.1"
)
parser
.
add_argument
(
"--port"
,
type
=
int
,
help
=
"server port"
,
default
=
8092
)
parser
.
add_argument
(
"--save_path"
,
type
=
str
,
help
=
"save audio path"
,
default
=
None
)
args
=
parser
.
parse_args
()
test
(
args
)
tests/unit/server/online/tts/test.sh
0 → 100644
浏览文件 @
4e9db4ff
#!/bin/bash
# bash test.sh
StartService
(){
# Start service
paddlespeech_server start
--config_file
$config_file
1>>
$log
/server.log 2>>
$log
/server.log.wf &
echo
$!
>
pid
start_num
=
$(
cat
$log
/server.log.wf |
grep
"INFO: Uvicorn running on http://"
-c
)
flag
=
"normal"
while
[[
$start_num
-lt
$target_start_num
&&
$flag
==
"normal"
]]
do
start_num
=
$(
cat
$log
/server.log.wf |
grep
"INFO: Uvicorn running on http://"
-c
)
# start service failed
if
[
$(
cat
$log
/server.log.wf |
grep
-i
"Failed to warm up on tts engine."
-c
)
-gt
$error_time
]
;
then
echo
"Service started failed."
|
tee
-a
$log
/test_result.log
error_time
=
$(
cat
$log
/server.log.wf |
grep
-i
"Failed to warm up on tts engine."
-c
)
flag
=
"unnormal"
elif
[
$(
cat
$log
/server.log.wf |
grep
-i
"AssertionError"
-c
)
-gt
$error_time
]
;
then
echo
"Service started failed."
|
tee
-a
$log
/test_result.log
error_time+
=
$(
cat
$log
/server.log.wf |
grep
-i
"AssertionError"
-c
)
flag
=
"unnormal"
fi
done
}
ClientTest_http
(){
for
((
i
=
1
;
i<
=
3
;
i++
))
do
python http_client.py
--save_path
./out_http.wav
((
http_test_times+
=
1
))
done
}
ClientTest_ws
(){
for
((
i
=
1
;
i<
=
3
;
i++
))
do
python ws_client.py
((
ws_test_times+
=
1
))
done
}
GetTestResult_http
()
{
# Determine if the test was successful
http_response_success_time
=
$(
cat
$log
/server.log |
grep
"200 OK"
-c
)
if
((
$http_response_success_time
==
$http_test_times
))
;
then
echo
"Testing successfully.
$info
"
|
tee
-a
$log
/test_result.log
else
echo
"Testing failed.
$info
"
|
tee
-a
$log
/test_result.log
fi
http_test_times
=
$http_response_success_time
}
GetTestResult_ws
()
{
# Determine if the test was successful
ws_response_success_time
=
$(
cat
$log
/server.log.wf |
grep
"Complete the transmission of audio streams"
-c
)
if
((
$ws_response_success_time
==
$ws_test_times
))
;
then
echo
"Testing successfully.
$info
"
|
tee
-a
$log
/test_result.log
else
echo
"Testing failed.
$info
"
|
tee
-a
$log
/test_result.log
fi
ws_test_times
=
$ws_response_success_time
}
engine_type
=
$1
log
=
$2
mkdir
-p
$log
rm
-rf
$log
/server.log.wf
rm
-rf
$log
/server.log
rm
-rf
$log
/test_result.log
config_file
=
./conf/application.yaml
server_ip
=
$(
cat
$config_file
|
grep
"host"
|
awk
-F
" "
'{print $2}'
)
port
=
$(
cat
$config_file
|
grep
"port"
|
awk
'/port:/ {print $2}'
)
echo
"Sevice ip:
$server_ip
"
|
tee
$log
/test_result.log
echo
"Sevice port:
$port
"
|
tee
-a
$log
/test_result.log
# whether a process is listening on $port
pid
=
`
lsof
-i
:
"
$port
"
|grep
-v
"PID"
|
awk
'{print $2}'
`
if
[
"
$pid
"
!=
""
]
;
then
echo
"The port:
$port
is occupied, please change another port"
exit
fi
target_start_num
=
0
# the number of start service
test_times
=
0
# The number of client test
error_time
=
0
# The number of error occurrences in the startup failure server.log.wf file
# start server: engine: tts_online, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc
info
=
"start server: engine:
$engine_type
, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc."
echo
"
$info
"
|
tee
-a
$log
/test_result.log
((
target_start_num+
=
1
))
StartService
if
[[
$start_num
-eq
$target_start_num
&&
$flag
==
"normal"
]]
;
then
echo
"Service started successfully."
|
tee
-a
$log
/test_result.log
ClientTest_http
echo
"This round of testing is over."
|
tee
-a
$log
/test_result.log
GetTestResult_http
else
echo
"Service failed to start, no client test."
target_start_num
=
$start_num
fi
kill
-9
`
cat
pid
`
rm
-rf
pid
sleep
2s
echo
"**************************************************************************************"
|
tee
-a
$log
/test_result.log
python change_yaml.py
--engine_type
$engine_type
--target_key
voc
--target_value
hifigan_csmsc
# change voc: mb_melgan_csmsc -> hifigan_csmsc
# start server: engine: tts_online, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc
info
=
"start server: engine:
$engine_type
, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc."
echo
"
$info
"
|
tee
-a
$log
/test_result.log
((
target_start_num+
=
1
))
StartService
if
[[
$start_num
-eq
$target_start_num
&&
$flag
==
"normal"
]]
;
then
echo
"Service started successfully."
|
tee
-a
$log
/test_result.log
ClientTest_http
echo
"This round of testing is over."
|
tee
-a
$log
/test_result.log
GetTestResult_http
else
echo
"Service failed to start, no client test."
target_start_num
=
$start_num
fi
kill
-9
`
cat
pid
`
rm
-rf
pid
sleep
2s
echo
"**************************************************************************************"
|
tee
-a
$log
/test_result.log
python change_yaml.py
--engine_type
$engine_type
--target_key
am
--target_value
fastspeech2_csmsc
# change am: fastspeech2_cnndecoder_csmsc -> fastspeech2_csmsc
# start server: engine: tts_online, protocol: http, am: fastspeech2_csmsc, voc: hifigan_csmsc
info
=
"start server: engine:
$engine_type
, protocol: http, am: fastspeech2_csmsc, voc: hifigan_csmsc."
echo
"
$info
"
|
tee
-a
$log
/test_result.log
((
target_start_num+
=
1
))
StartService
if
[[
$start_num
-eq
$target_start_num
&&
$flag
==
"normal"
]]
;
then
echo
"Service started successfully."
|
tee
-a
$log
/test_result.log
ClientTest_http
echo
"This round of testing is over."
|
tee
-a
$log
/test_result.log
GetTestResult_http
else
echo
"Service failed to start, no client test."
target_start_num
=
$start_num
fi
kill
-9
`
cat
pid
`
rm
-rf
pid
sleep
2s
echo
"**************************************************************************************"
|
tee
-a
$log
/test_result.log
python change_yaml.py
--engine_type
$engine_type
--target_key
voc
--target_value
mb_melgan_csmsc
# change voc: hifigan_csmsc -> mb_melgan_csmsc
# start server: engine: tts_online, protocol: http, am: fastspeech2_csmsc, voc: mb_melgan_csmsc
info
=
"start server: engine:
$engine_type
, protocol: http, am: fastspeech2_csmsc, voc: mb_melgan_csmsc."
echo
"
$info
"
|
tee
-a
$log
/test_result.log
((
target_start_num+
=
1
))
StartService
if
[[
$start_num
-eq
$target_start_num
&&
$flag
==
"normal"
]]
;
then
echo
"Service started successfully."
|
tee
-a
$log
/test_result.log
ClientTest_http
echo
"This round of testing is over."
|
tee
-a
$log
/test_result.log
GetTestResult_http
else
echo
"Service failed to start, no client test."
target_start_num
=
$start_num
fi
kill
-9
`
cat
pid
`
rm
-rf
pid
sleep
2s
echo
"**************************************************************************************"
|
tee
-a
$log
/test_result.log
echo
"********************************************* websocket **********************************************************"
python change_yaml.py
--engine_type
$engine_type
--change_type
protocol
--target_key
protocol
--target_value
websocket
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_csmsc, voc: mb_melgan_csmsc
info
=
"start server: engine:
$engine_type
, protocol: websocket, am: fastspeech2_csmsc, voc: mb_melgan_csmsc."
echo
"
$info
"
|
tee
-a
$log
/test_result.log
((
target_start_num+
=
1
))
StartService
if
[[
$start_num
-eq
$target_start_num
&&
$flag
==
"normal"
]]
;
then
echo
"Service started successfully."
|
tee
-a
$log
/test_result.log
ClientTest_ws
echo
"This round of testing is over."
|
tee
-a
$log
/test_result.log
GetTestResult_ws
else
echo
"Service failed to start, no client test."
target_start_num
=
$start_num
fi
kill
-9
`
cat
pid
`
rm
-rf
pid
sleep
2s
echo
"**************************************************************************************"
|
tee
-a
$log
/test_result.log
python change_yaml.py
--engine_type
$engine_type
--target_key
voc
--target_value
hifigan_csmsc
# change voc: mb_melgan_csmsc -> hifigan_csmsc
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_csmsc, voc: hifigan_csmsc
info
=
"start server: engine:
$engine_type
, protocol: websocket, am: fastspeech2_csmsc, voc: hifigan_csmsc."
echo
"
$info
"
|
tee
-a
$log
/test_result.log
((
target_start_num+
=
1
))
StartService
if
[[
$start_num
-eq
$target_start_num
&&
$flag
==
"normal"
]]
;
then
echo
"Service started successfully."
|
tee
-a
$log
/test_result.log
ClientTest_ws
echo
"This round of testing is over."
|
tee
-a
$log
/test_result.log
GetTestResult_ws
else
echo
"Service failed to start, no client test."
target_start_num
=
$start_num
fi
kill
-9
`
cat
pid
`
rm
-rf
pid
sleep
2s
echo
"**************************************************************************************"
|
tee
-a
$log
/test_result.log
python change_yaml.py
--engine_type
$engine_type
--target_key
am
--target_value
fastspeech2_cnndecoder_csmsc
# change am: fastspeech2_csmsc -> fastspeech2_cnndecoder_csmsc
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc
info
=
"start server: engine:
$engine_type
, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc."
echo
"
$info
"
|
tee
-a
$log
/test_result.log
((
target_start_num+
=
1
))
StartService
if
[[
$start_num
-eq
$target_start_num
&&
$flag
==
"normal"
]]
;
then
echo
"Service started successfully."
|
tee
-a
$log
/test_result.log
ClientTest_ws
echo
"This round of testing is over."
|
tee
-a
$log
/test_result.log
GetTestResult_ws
else
echo
"Service failed to start, no client test."
target_start_num
=
$start_num
fi
kill
-9
`
cat
pid
`
rm
-rf
pid
sleep
2s
echo
"**************************************************************************************"
|
tee
-a
$log
/test_result.log
python change_yaml.py
--engine_type
$engine_type
--target_key
voc
--target_value
mb_melgan_csmsc
# change am: hifigan_csmsc -> mb_melgan_csmsc
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc
info
=
"start server: engine:
$engine_type
, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc."
echo
"
$info
"
|
tee
-a
$log
/test_result.log
((
target_start_num+
=
1
))
StartService
if
[[
$start_num
-eq
$target_start_num
&&
$flag
==
"normal"
]]
;
then
echo
"Service started successfully."
|
tee
-a
$log
/test_result.log
ClientTest_ws
echo
"This round of testing is over."
|
tee
-a
$log
/test_result.log
GetTestResult_ws
else
echo
"Service failed to start, no client test."
target_start_num
=
$start_num
fi
kill
-9
`
cat
pid
`
rm
-rf
pid
sleep
2s
echo
"**************************************************************************************"
|
tee
-a
$log
/test_result.log
echo
"All tests completed."
|
tee
-a
$log
/test_result.log
# sohw all the test results
echo
"***************** Here are all the test results ********************"
cat
$log
/test_result.log
# Restoring conf is the same as demos/speech_server
cp
./tts_online_application.yaml ./conf/application.yaml
-rf
sleep
2s
\ No newline at end of file
tests/unit/server/online/tts/test_all.sh
0 → 100644
浏览文件 @
4e9db4ff
#!/bin/bash
# bash test_all.sh
log_all_dir
=
./log
bash test.sh tts_online
$log_all_dir
/log_tts_online_cpu
python change_yaml.py
--change_type
engine_type
--target_key
engine_list
--target_value
tts_online-onnx
bash test.sh tts_online-onnx
$log_all_dir
/log_tts_online-onnx_cpu
python change_yaml.py
--change_type
device
--target_key
device
--target_value
gpu:3
bash test.sh tts_online
$log_all_dir
/log_tts_online_gpu
python change_yaml.py
--change_type
engine_type
--target_key
engine_list
--target_value
tts_online-onnx
python change_yaml.py
--change_type
device
--target_key
device
--target_value
gpu:3
bash test.sh tts_online-onnx
$log_all_dir
/log_tts_online-onnx_gpu
echo
"************************************** show all test results ****************************************"
cat
$log_all_dir
/log_tts_online_cpu/test_result.log
cat
$log_all_dir
/log_tts_online-onnx_cpu/test_result.log
cat
$log_all_dir
/log_tts_online_gpu/test_result.log
cat
$log_all_dir
/log_tts_online-onnx_gpu/test_result.log
tests/unit/server/online/tts/tts_online_application.yaml
0 → 100644
浏览文件 @
4e9db4ff
# This is the parameter configuration file for PaddleSpeech Serving.
#################################################################################
# SERVER SETTING #
#################################################################################
host
:
127.0.0.1
port
:
8092
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['tts_online', 'tts_online-onnx']
# protocol = ['websocket', 'http'] (only one can be selected).
protocol
:
'
http'
engine_list
:
[
'
tts_online'
]
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online
:
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
am
:
'
fastspeech2_cnndecoder_csmsc'
am_config
:
am_ckpt
:
am_stat
:
phones_dict
:
tones_dict
:
speaker_dict
:
spk_id
:
0
# voc (vocoder) choices=['mb_melgan_csmsc', 'hifigan_csmsc']
voc
:
'
mb_melgan_csmsc'
voc_config
:
voc_ckpt
:
voc_stat
:
# others
lang
:
'
zh'
device
:
'
cpu'
# set 'gpu:id' or 'cpu'
am_block
:
42
am_pad
:
12
voc_block
:
14
voc_pad
:
14
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx
:
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
am
:
'
fastspeech2_cnndecoder_csmsc_onnx'
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
am_ckpt
:
# list
am_stat
:
phones_dict
:
tones_dict
:
speaker_dict
:
spk_id
:
0
am_sample_rate
:
24000
am_sess_conf
:
device
:
"
cpu"
# set 'gpu:id' or 'cpu'
use_trt
:
False
cpu_threads
:
1
# voc (vocoder) choices=['mb_melgan_csmsc_onnx', 'hifigan_csmsc_onnx']
voc
:
'
mb_melgan_csmsc_onnx'
voc_ckpt
:
voc_sample_rate
:
24000
voc_sess_conf
:
device
:
"
cpu"
# set 'gpu:id' or 'cpu'
use_trt
:
False
cpu_threads
:
1
# others
lang
:
'
zh'
am_block
:
42
am_pad
:
12
voc_block
:
14
voc_pad
:
14
voc_upsample
:
300
tests/unit/server/online/tts/ws_client.py
0 → 100644
浏览文件 @
4e9db4ff
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
_thread
as
thread
import
argparse
import
base64
import
json
import
ssl
import
time
import
websocket
flag
=
1
st
=
0.0
all_bytes
=
b
''
class
WsParam
(
object
):
# 初始化
def
__init__
(
self
,
text
,
server
=
"127.0.0.1"
,
port
=
8090
):
self
.
server
=
server
self
.
port
=
port
self
.
url
=
"ws://"
+
self
.
server
+
":"
+
str
(
self
.
port
)
+
"/ws/tts"
self
.
text
=
text
# 生成url
def
create_url
(
self
):
return
self
.
url
def
on_message
(
ws
,
message
):
global
flag
global
st
global
all_bytes
try
:
message
=
json
.
loads
(
message
)
audio
=
message
[
"audio"
]
audio
=
base64
.
b64decode
(
audio
)
# bytes
status
=
message
[
"status"
]
all_bytes
+=
audio
if
status
==
0
:
print
(
"create successfully."
)
elif
status
==
1
:
if
flag
:
print
(
f
"首包响应:
{
time
.
time
()
-
st
}
s"
)
flag
=
0
elif
status
==
2
:
final_response
=
time
.
time
()
-
st
duration
=
len
(
all_bytes
)
/
2.0
/
24000
print
(
f
"尾包响应:
{
final_response
}
s"
)
print
(
f
"音频时长:
{
duration
}
s"
)
print
(
f
"RTF:
{
final_response
/
duration
}
"
)
with
open
(
"./out.pcm"
,
"wb"
)
as
f
:
f
.
write
(
all_bytes
)
print
(
"ws is closed"
)
ws
.
close
()
else
:
print
(
"infer error"
)
except
Exception
as
e
:
print
(
"receive msg,but parse exception:"
,
e
)
# 收到websocket错误的处理
def
on_error
(
ws
,
error
):
print
(
"### error:"
,
error
)
# 收到websocket关闭的处理
def
on_close
(
ws
):
print
(
"### closed ###"
)
# 收到websocket连接建立的处理
def
on_open
(
ws
):
def
run
(
*
args
):
global
st
text_base64
=
str
(
base64
.
b64encode
((
wsParam
.
text
).
encode
(
'utf-8'
)),
"UTF8"
)
d
=
{
"text"
:
text_base64
}
d
=
json
.
dumps
(
d
)
print
(
"Start sending text data"
)
st
=
time
.
time
()
ws
.
send
(
d
)
thread
.
start_new_thread
(
run
,
())
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--text"
,
type
=
str
,
help
=
"A sentence to be synthesized"
,
default
=
"您好,欢迎使用语音合成服务。"
)
parser
.
add_argument
(
"--server"
,
type
=
str
,
help
=
"server ip"
,
default
=
"127.0.0.1"
)
parser
.
add_argument
(
"--port"
,
type
=
int
,
help
=
"server port"
,
default
=
8092
)
args
=
parser
.
parse_args
()
print
(
"***************************************"
)
print
(
"Server ip: "
,
args
.
server
)
print
(
"Server port: "
,
args
.
port
)
print
(
"Sentence to be synthesized: "
,
args
.
text
)
print
(
"***************************************"
)
wsParam
=
WsParam
(
text
=
args
.
text
,
server
=
args
.
server
,
port
=
args
.
port
)
websocket
.
enableTrace
(
False
)
wsUrl
=
wsParam
.
create_url
()
ws
=
websocket
.
WebSocketApp
(
wsUrl
,
on_message
=
on_message
,
on_error
=
on_error
,
on_close
=
on_close
)
ws
.
on_open
=
on_open
ws
.
run_forever
(
sslopt
=
{
"cert_reqs"
:
ssl
.
CERT_NONE
})
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录