Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
5b06b76e
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
5b06b76e
编写于
7月 07, 2022
作者:
L
lym0302
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
change sr, test=doc
上级
a0d1888c
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
65 addition
and
96 deletion
+65
-96
paddlespeech/server/bin/paddlespeech_client.py
paddlespeech/server/bin/paddlespeech_client.py
+3
-28
paddlespeech/server/engine/engine_factory.py
paddlespeech/server/engine/engine_factory.py
+1
-1
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
+12
-21
paddlespeech/server/engine/tts/online/python/tts_engine.py
paddlespeech/server/engine/tts/online/python/tts_engine.py
+10
-20
paddlespeech/server/utils/audio_handler.py
paddlespeech/server/utils/audio_handler.py
+38
-25
paddlespeech/server/utils/onnx_infer.py
paddlespeech/server/utils/onnx_infer.py
+1
-1
未找到文件。
paddlespeech/server/bin/paddlespeech_client.py
浏览文件 @
5b06b76e
...
...
@@ -192,23 +192,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
self
.
parser
.
add_argument
(
'--spk_id'
,
type
=
int
,
default
=
0
,
help
=
'Speaker id'
)
self
.
parser
.
add_argument
(
'--speed'
,
type
=
float
,
default
=
1.0
,
help
=
'Audio speed, the value should be set between 0 and 3'
)
self
.
parser
.
add_argument
(
'--volume'
,
type
=
float
,
default
=
1.0
,
help
=
'Audio volume, the value should be set between 0 and 3'
)
self
.
parser
.
add_argument
(
'--sample_rate'
,
type
=
int
,
default
=
0
,
choices
=
[
0
,
8000
,
16000
],
help
=
'Sampling rate, the default is the same as the model'
)
self
.
parser
.
add_argument
(
'--output'
,
type
=
str
,
default
=
None
,
help
=
'Synthesized audio file'
)
'--output'
,
type
=
str
,
default
=
None
,
help
=
'Client saves synthesized audio'
)
self
.
parser
.
add_argument
(
"--play"
,
type
=
bool
,
help
=
"whether to play audio"
,
default
=
False
)
...
...
@@ -219,9 +203,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
port
=
args
.
port
protocol
=
args
.
protocol
spk_id
=
args
.
spk_id
speed
=
args
.
speed
volume
=
args
.
volume
sample_rate
=
args
.
sample_rate
output
=
args
.
output
play
=
args
.
play
...
...
@@ -232,9 +213,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
port
=
port
,
protocol
=
protocol
,
spk_id
=
spk_id
,
speed
=
speed
,
volume
=
volume
,
sample_rate
=
sample_rate
,
output
=
output
,
play
=
play
)
return
True
...
...
@@ -250,9 +228,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
port
:
int
=
8092
,
protocol
:
str
=
"http"
,
spk_id
:
int
=
0
,
speed
:
float
=
1.0
,
volume
:
float
=
1.0
,
sample_rate
:
int
=
0
,
output
:
str
=
None
,
play
:
bool
=
False
):
"""
...
...
@@ -264,7 +239,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
from
paddlespeech.server.utils.audio_handler
import
TTSHttpHandler
handler
=
TTSHttpHandler
(
server_ip
,
port
,
play
)
first_response
,
final_response
,
duration
,
save_audio_success
,
receive_time_list
,
chunk_duration_list
=
handler
.
run
(
input
,
spk_id
,
speed
,
volume
,
sample_rate
,
output
)
input
,
spk_id
,
output
)
delay_time_list
=
compute_delay
(
receive_time_list
,
chunk_duration_list
)
...
...
@@ -274,7 +249,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
handler
=
TTSWsHandler
(
server_ip
,
port
,
play
)
loop
=
asyncio
.
get_event_loop
()
first_response
,
final_response
,
duration
,
save_audio_success
,
receive_time_list
,
chunk_duration_list
=
loop
.
run_until_complete
(
handler
.
run
(
input
,
output
))
handler
.
run
(
input
,
spk_id
,
output
))
delay_time_list
=
compute_delay
(
receive_time_list
,
chunk_duration_list
)
...
...
paddlespeech/server/engine/engine_factory.py
浏览文件 @
5b06b76e
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
from
typing
import
Text
from
..utils
.log
import
logger
from
paddlespeech.cli
.log
import
logger
__all__
=
[
'EngineFactory'
]
...
...
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
浏览文件 @
5b06b76e
...
...
@@ -19,6 +19,8 @@ from typing import Optional
import
numpy
as
np
import
paddle
import
librosa
from
scipy
import
signal
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.tts.infer
import
TTSExecutor
...
...
@@ -30,6 +32,8 @@ from paddlespeech.server.utils.util import denorm
from
paddlespeech.server.utils.util
import
get_chunks
from
paddlespeech.t2s.frontend
import
English
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
from
paddlespeech.server.utils.audio_process
import
change_speed
from
paddlespeech.server.utils.exception
import
ServerBaseException
__all__
=
[
'TTSEngine'
,
'PaddleTTSConnectionHandler'
]
...
...
@@ -64,6 +68,8 @@ class TTSServerExecutor(TTSExecutor):
self
,
'am_postnet_sess'
)))
and
hasattr
(
self
,
'voc_inference'
):
logger
.
info
(
'Models had been initialized.'
)
return
# am
am_tag
=
am
+
'-'
+
lang
if
am
==
"fastspeech2_csmsc_onnx"
:
...
...
@@ -213,6 +219,8 @@ class TTSEngine(BaseEngine):
self
.
config
.
voc_sample_rate
==
self
.
config
.
am_sample_rate
),
"The sample rate of AM and Vocoder model are different, please check model."
self
.
sample_rate
=
self
.
config
.
voc_sample_rate
try
:
if
self
.
config
.
am_sess_conf
.
device
is
not
None
:
self
.
device
=
self
.
config
.
am_sess_conf
.
device
...
...
@@ -441,33 +449,16 @@ class PaddleTTSConnectionHandler:
self
.
final_response_time
=
time
.
time
()
-
frontend_st
def
preprocess
(
self
,
text_bese64
:
str
=
None
,
text_bytes
:
bytes
=
None
):
# Convert byte to text
if
text_bese64
:
text_bytes
=
base64
.
b64decode
(
text_bese64
)
# base64 to bytes
text
=
text_bytes
.
decode
(
'utf-8'
)
# bytes to text
return
text
def
run
(
self
,
sentence
:
str
,
spk_id
:
int
=
0
,
speed
:
float
=
1.0
,
volume
:
float
=
1.0
,
sample_rate
:
int
=
0
,
save_path
:
str
=
None
):
spk_id
:
int
=
0
):
""" run include inference and postprocess.
Args:
sentence (str): text to be synthesized
spk_id (int, optional): speaker id for multi-speaker speech synthesis. Defaults to 0.
speed (float, optional): speed. Defaults to 1.0.
volume (float, optional): volume. Defaults to 1.0.
sample_rate (int, optional): target sample rate for synthesized audio,
0 means the same as the model sampling rate. Defaults to 0.
save_path (str, optional): The save path of the synthesized audio.
None means do not save audio. Defaults to None.
Returns:
wav_base64: The base64 format of the synthesized audio.
"""
...
...
@@ -488,7 +479,7 @@ class PaddleTTSConnectionHandler:
yield
wav_base64
wav_all
=
np
.
concatenate
(
wav_list
,
axis
=
0
)
duration
=
len
(
wav_all
)
/
self
.
config
.
voc_
sample_rate
duration
=
len
(
wav_all
)
/
self
.
tts_engine
.
sample_rate
logger
.
info
(
f
"sentence:
{
sentence
}
"
)
logger
.
info
(
f
"The durations of audio is:
{
duration
}
s"
)
logger
.
info
(
f
"first response time:
{
self
.
first_response_time
}
s"
)
...
...
@@ -496,4 +487,4 @@ class PaddleTTSConnectionHandler:
logger
.
info
(
f
"RTF:
{
self
.
final_response_time
/
duration
}
"
)
logger
.
info
(
f
"Other info: front time:
{
self
.
frontend_time
}
s, first am infer time:
{
self
.
first_am_infer
}
s, first voc infer time:
{
self
.
first_voc_infer
}
s,"
)
)
\ No newline at end of file
paddlespeech/server/engine/tts/online/python/tts_engine.py
浏览文件 @
5b06b76e
...
...
@@ -276,6 +276,13 @@ class TTSEngine(BaseEngine):
logger
.
error
(
e
)
return
False
assert
(
self
.
executor
.
am_config
.
fs
==
self
.
executor
.
voc_config
.
fs
),
"The sample rate of AM and Vocoder model are different, please check model."
self
.
sample_rate
=
self
.
executor
.
am_config
.
fs
self
.
am_block
=
self
.
config
.
am_block
self
.
am_pad
=
self
.
config
.
am_pad
self
.
voc_block
=
self
.
config
.
voc_block
...
...
@@ -458,33 +465,16 @@ class PaddleTTSConnectionHandler:
)
self
.
final_response_time
=
time
.
time
()
-
frontend_st
def
preprocess
(
self
,
text_bese64
:
str
=
None
,
text_bytes
:
bytes
=
None
):
# Convert byte to text
if
text_bese64
:
text_bytes
=
base64
.
b64decode
(
text_bese64
)
# base64 to bytes
text
=
text_bytes
.
decode
(
'utf-8'
)
# bytes to text
return
text
def
run
(
self
,
sentence
:
str
,
spk_id
:
int
=
0
,
speed
:
float
=
1.0
,
volume
:
float
=
1.0
,
sample_rate
:
int
=
0
,
save_path
:
str
=
None
):
spk_id
:
int
=
0
,):
""" run include inference and postprocess.
Args:
sentence (str): text to be synthesized
spk_id (int, optional): speaker id for multi-speaker speech synthesis. Defaults to 0.
speed (float, optional): speed. Defaults to 1.0.
volume (float, optional): volume. Defaults to 1.0.
sample_rate (int, optional): target sample rate for synthesized audio,
0 means the same as the model sampling rate. Defaults to 0.
save_path (str, optional): The save path of the synthesized audio.
None means do not save audio. Defaults to None.
Returns:
wav_base64: The base64 format of the synthesized audio.
...
...
@@ -507,7 +497,7 @@ class PaddleTTSConnectionHandler:
yield
wav_base64
wav_all
=
np
.
concatenate
(
wav_list
,
axis
=
0
)
duration
=
len
(
wav_all
)
/
self
.
executor
.
am_config
.
fs
duration
=
len
(
wav_all
)
/
self
.
tts_engine
.
sample_rate
logger
.
info
(
f
"sentence:
{
sentence
}
"
)
logger
.
info
(
f
"The durations of audio is:
{
duration
}
s"
)
...
...
paddlespeech/server/utils/audio_handler.py
浏览文件 @
5b06b76e
...
...
@@ -266,6 +266,12 @@ class TTSWsHandler:
self
.
url
=
"ws://"
+
self
.
server
+
":"
+
str
(
self
.
port
)
+
"/paddlespeech/tts/streaming"
self
.
play
=
play
# get model sample rate
self
.
url_get_sr
=
"http://"
+
str
(
self
.
server
)
+
":"
+
str
(
self
.
port
)
+
"/paddlespeech/tts/streaming/samplerate"
self
.
sample_rate
=
requests
.
get
(
self
.
url_get_sr
).
json
()[
"sample_rate"
]
if
self
.
play
:
import
pyaudio
self
.
buffer
=
b
''
...
...
@@ -273,7 +279,7 @@ class TTSWsHandler:
self
.
stream
=
self
.
p
.
open
(
format
=
self
.
p
.
get_format_from_width
(
2
),
channels
=
1
,
rate
=
24000
,
rate
=
self
.
sample_rate
,
output
=
True
)
self
.
mutex
=
threading
.
Lock
()
self
.
start_play
=
True
...
...
@@ -293,12 +299,16 @@ class TTSWsHandler:
self
.
buffer
=
b
''
self
.
mutex
.
release
()
async
def
run
(
self
,
text
:
str
,
output
:
str
=
None
):
async
def
run
(
self
,
text
:
str
,
spk_id
=
0
,
output
:
str
=
None
):
"""Send a text to online server
Args:
text (str): sentence to be synthesized
output (str): save audio path
spk_id (int, optional): speaker id. Defaults to 0.
output (str, optional): client save audio path. Defaults to None.
"""
all_bytes
=
b
''
receive_time_list
=
[]
...
...
@@ -315,8 +325,13 @@ class TTSWsHandler:
session
=
msg
[
"session"
]
# 3. send speech synthesis request
text_base64
=
str
(
base64
.
b64encode
((
text
).
encode
(
'utf-8'
)),
"UTF8"
)
request
=
json
.
dumps
({
"text"
:
text_base64
})
#text_base64 = str(base64.b64encode((text).encode('utf-8')), "UTF8")
params
=
{
"text"
:
text
,
"spk_id"
:
spk_id
,
}
request
=
json
.
dumps
(
params
)
st
=
time
.
time
()
await
ws
.
send
(
request
)
logging
.
info
(
"send a message to the server"
)
...
...
@@ -341,10 +356,10 @@ class TTSWsHandler:
# Rerutn last packet normally, no audio information
elif
status
==
2
:
final_response
=
time
.
time
()
-
st
duration
=
len
(
all_bytes
)
/
2.0
/
24000
duration
=
len
(
all_bytes
)
/
2.0
/
self
.
sample_rate
if
output
is
not
None
:
save_audio_success
=
save_audio
(
all_bytes
,
output
)
save_audio_success
=
save_audio
(
all_bytes
,
output
,
self
.
sample_rate
)
else
:
save_audio_success
=
False
...
...
@@ -362,7 +377,7 @@ class TTSWsHandler:
receive_time_list
.
append
(
time
.
time
())
audio
=
message
[
"audio"
]
audio
=
base64
.
b64decode
(
audio
)
# bytes
chunk_duration_list
.
append
(
len
(
audio
)
/
2.0
/
24000
)
chunk_duration_list
.
append
(
len
(
audio
)
/
2.0
/
self
.
sample_rate
)
all_bytes
+=
audio
if
self
.
play
:
self
.
mutex
.
acquire
()
...
...
@@ -403,19 +418,26 @@ class TTSHttpHandler:
self
.
port
)
+
"/paddlespeech/tts/streaming"
self
.
play
=
play
# get model sample rate
self
.
url_get_sr
=
"http://"
+
str
(
self
.
server
)
+
":"
+
str
(
self
.
port
)
+
"/paddlespeech/tts/streaming/samplerate"
self
.
sample_rate
=
requests
.
get
(
self
.
url_get_sr
).
json
()[
"sample_rate"
]
if
self
.
play
:
import
pyaudio
self
.
buffer
=
b
''
self
.
p
=
pyaudio
.
PyAudio
()
self
.
start_play
=
True
self
.
max_fail
=
50
self
.
stream
=
self
.
p
.
open
(
format
=
self
.
p
.
get_format_from_width
(
2
),
channels
=
1
,
rate
=
24000
,
rate
=
self
.
sample_rate
,
output
=
True
)
self
.
mutex
=
threading
.
Lock
()
self
.
start_play
=
True
self
.
t
=
threading
.
Thread
(
target
=
self
.
play_audio
)
self
.
max_fail
=
50
logger
.
info
(
f
"endpoint:
{
self
.
url
}
"
)
def
play_audio
(
self
):
...
...
@@ -433,28 +455,19 @@ class TTSHttpHandler:
def
run
(
self
,
text
:
str
,
spk_id
=
0
,
speed
=
1.0
,
volume
=
1.0
,
sample_rate
=
0
,
output
:
str
=
None
):
"""Send a text to tts online server
Args:
text (str): sentence to be synthesized.
spk_id (int, optional): speaker id. Defaults to 0.
speed (float, optional): audio speed. Defaults to 1.0.
volume (float, optional): audio volume. Defaults to 1.0.
sample_rate (int, optional): audio sample rate, 0 means the same as model. Defaults to 0.
output (str, optional): save audio path. Defaults to None.
output (str, optional): client save audio path. Defaults to None.
"""
# 1. Create request
params
=
{
"text"
:
text
,
"spk_id"
:
spk_id
,
"speed"
:
speed
,
"volume"
:
volume
,
"sample_rate"
:
sample_rate
,
"save_path"
:
output
}
all_bytes
=
b
''
...
...
@@ -482,14 +495,14 @@ class TTSHttpHandler:
self
.
t
.
start
()
self
.
start_play
=
False
all_bytes
+=
audio
chunk_duration_list
.
append
(
len
(
audio
)
/
2.0
/
24000
)
chunk_duration_list
.
append
(
len
(
audio
)
/
2.0
/
self
.
sample_rate
)
final_response
=
time
.
time
()
-
st
duration
=
len
(
all_bytes
)
/
2.0
/
24000
duration
=
len
(
all_bytes
)
/
2.0
/
self
.
sample_rate
html
.
close
()
# when stream=True
if
output
is
not
None
:
save_audio_success
=
save_audio
(
all_bytes
,
output
)
save_audio_success
=
save_audio
(
all_bytes
,
output
,
self
.
sample_rate
)
else
:
save_audio_success
=
False
...
...
paddlespeech/server/utils/onnx_infer.py
浏览文件 @
5b06b76e
...
...
@@ -16,7 +16,7 @@ from typing import Optional
import
onnxruntime
as
ort
from
.log
import
logger
from
paddlespeech.cli
.log
import
logger
def
get_sess
(
model_path
:
Optional
[
os
.
PathLike
]
=
None
,
sess_conf
:
dict
=
None
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录