Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
5b06b76e
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5b06b76e
编写于
7月 07, 2022
作者:
L
lym0302
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
change sr, test=doc
上级
a0d1888c
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
65 addition
and
96 deletion
+65
-96
paddlespeech/server/bin/paddlespeech_client.py
paddlespeech/server/bin/paddlespeech_client.py
+3
-28
paddlespeech/server/engine/engine_factory.py
paddlespeech/server/engine/engine_factory.py
+1
-1
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
+12
-21
paddlespeech/server/engine/tts/online/python/tts_engine.py
paddlespeech/server/engine/tts/online/python/tts_engine.py
+10
-20
paddlespeech/server/utils/audio_handler.py
paddlespeech/server/utils/audio_handler.py
+38
-25
paddlespeech/server/utils/onnx_infer.py
paddlespeech/server/utils/onnx_infer.py
+1
-1
未找到文件。
paddlespeech/server/bin/paddlespeech_client.py
浏览文件 @
5b06b76e
...
@@ -192,23 +192,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
...
@@ -192,23 +192,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
'--spk_id'
,
type
=
int
,
default
=
0
,
help
=
'Speaker id'
)
'--spk_id'
,
type
=
int
,
default
=
0
,
help
=
'Speaker id'
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
'--speed'
,
'--output'
,
type
=
str
,
default
=
None
,
help
=
'Client saves synthesized audio'
)
type
=
float
,
default
=
1.0
,
help
=
'Audio speed, the value should be set between 0 and 3'
)
self
.
parser
.
add_argument
(
'--volume'
,
type
=
float
,
default
=
1.0
,
help
=
'Audio volume, the value should be set between 0 and 3'
)
self
.
parser
.
add_argument
(
'--sample_rate'
,
type
=
int
,
default
=
0
,
choices
=
[
0
,
8000
,
16000
],
help
=
'Sampling rate, the default is the same as the model'
)
self
.
parser
.
add_argument
(
'--output'
,
type
=
str
,
default
=
None
,
help
=
'Synthesized audio file'
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
"--play"
,
type
=
bool
,
help
=
"whether to play audio"
,
default
=
False
)
"--play"
,
type
=
bool
,
help
=
"whether to play audio"
,
default
=
False
)
...
@@ -219,9 +203,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
...
@@ -219,9 +203,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
port
=
args
.
port
port
=
args
.
port
protocol
=
args
.
protocol
protocol
=
args
.
protocol
spk_id
=
args
.
spk_id
spk_id
=
args
.
spk_id
speed
=
args
.
speed
volume
=
args
.
volume
sample_rate
=
args
.
sample_rate
output
=
args
.
output
output
=
args
.
output
play
=
args
.
play
play
=
args
.
play
...
@@ -232,9 +213,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
...
@@ -232,9 +213,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
port
=
port
,
port
=
port
,
protocol
=
protocol
,
protocol
=
protocol
,
spk_id
=
spk_id
,
spk_id
=
spk_id
,
speed
=
speed
,
volume
=
volume
,
sample_rate
=
sample_rate
,
output
=
output
,
output
=
output
,
play
=
play
)
play
=
play
)
return
True
return
True
...
@@ -250,9 +228,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
...
@@ -250,9 +228,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
port
:
int
=
8092
,
port
:
int
=
8092
,
protocol
:
str
=
"http"
,
protocol
:
str
=
"http"
,
spk_id
:
int
=
0
,
spk_id
:
int
=
0
,
speed
:
float
=
1.0
,
volume
:
float
=
1.0
,
sample_rate
:
int
=
0
,
output
:
str
=
None
,
output
:
str
=
None
,
play
:
bool
=
False
):
play
:
bool
=
False
):
"""
"""
...
@@ -264,7 +239,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
...
@@ -264,7 +239,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
from
paddlespeech.server.utils.audio_handler
import
TTSHttpHandler
from
paddlespeech.server.utils.audio_handler
import
TTSHttpHandler
handler
=
TTSHttpHandler
(
server_ip
,
port
,
play
)
handler
=
TTSHttpHandler
(
server_ip
,
port
,
play
)
first_response
,
final_response
,
duration
,
save_audio_success
,
receive_time_list
,
chunk_duration_list
=
handler
.
run
(
first_response
,
final_response
,
duration
,
save_audio_success
,
receive_time_list
,
chunk_duration_list
=
handler
.
run
(
input
,
spk_id
,
speed
,
volume
,
sample_rate
,
output
)
input
,
spk_id
,
output
)
delay_time_list
=
compute_delay
(
receive_time_list
,
delay_time_list
=
compute_delay
(
receive_time_list
,
chunk_duration_list
)
chunk_duration_list
)
...
@@ -274,7 +249,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
...
@@ -274,7 +249,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
handler
=
TTSWsHandler
(
server_ip
,
port
,
play
)
handler
=
TTSWsHandler
(
server_ip
,
port
,
play
)
loop
=
asyncio
.
get_event_loop
()
loop
=
asyncio
.
get_event_loop
()
first_response
,
final_response
,
duration
,
save_audio_success
,
receive_time_list
,
chunk_duration_list
=
loop
.
run_until_complete
(
first_response
,
final_response
,
duration
,
save_audio_success
,
receive_time_list
,
chunk_duration_list
=
loop
.
run_until_complete
(
handler
.
run
(
input
,
output
))
handler
.
run
(
input
,
spk_id
,
output
))
delay_time_list
=
compute_delay
(
receive_time_list
,
delay_time_list
=
compute_delay
(
receive_time_list
,
chunk_duration_list
)
chunk_duration_list
)
...
...
paddlespeech/server/engine/engine_factory.py
浏览文件 @
5b06b76e
...
@@ -13,7 +13,7 @@
...
@@ -13,7 +13,7 @@
# limitations under the License.
# limitations under the License.
from
typing
import
Text
from
typing
import
Text
from
..utils
.log
import
logger
from
paddlespeech.cli
.log
import
logger
__all__
=
[
'EngineFactory'
]
__all__
=
[
'EngineFactory'
]
...
...
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
浏览文件 @
5b06b76e
...
@@ -19,6 +19,8 @@ from typing import Optional
...
@@ -19,6 +19,8 @@ from typing import Optional
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
import
librosa
from
scipy
import
signal
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.tts.infer
import
TTSExecutor
from
paddlespeech.cli.tts.infer
import
TTSExecutor
...
@@ -30,6 +32,8 @@ from paddlespeech.server.utils.util import denorm
...
@@ -30,6 +32,8 @@ from paddlespeech.server.utils.util import denorm
from
paddlespeech.server.utils.util
import
get_chunks
from
paddlespeech.server.utils.util
import
get_chunks
from
paddlespeech.t2s.frontend
import
English
from
paddlespeech.t2s.frontend
import
English
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
from
paddlespeech.server.utils.audio_process
import
change_speed
from
paddlespeech.server.utils.exception
import
ServerBaseException
__all__
=
[
'TTSEngine'
,
'PaddleTTSConnectionHandler'
]
__all__
=
[
'TTSEngine'
,
'PaddleTTSConnectionHandler'
]
...
@@ -64,6 +68,8 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -64,6 +68,8 @@ class TTSServerExecutor(TTSExecutor):
self
,
'am_postnet_sess'
)))
and
hasattr
(
self
,
'voc_inference'
):
self
,
'am_postnet_sess'
)))
and
hasattr
(
self
,
'voc_inference'
):
logger
.
info
(
'Models had been initialized.'
)
logger
.
info
(
'Models had been initialized.'
)
return
return
# am
# am
am_tag
=
am
+
'-'
+
lang
am_tag
=
am
+
'-'
+
lang
if
am
==
"fastspeech2_csmsc_onnx"
:
if
am
==
"fastspeech2_csmsc_onnx"
:
...
@@ -213,6 +219,8 @@ class TTSEngine(BaseEngine):
...
@@ -213,6 +219,8 @@ class TTSEngine(BaseEngine):
self
.
config
.
voc_sample_rate
==
self
.
config
.
am_sample_rate
self
.
config
.
voc_sample_rate
==
self
.
config
.
am_sample_rate
),
"The sample rate of AM and Vocoder model are different, please check model."
),
"The sample rate of AM and Vocoder model are different, please check model."
self
.
sample_rate
=
self
.
config
.
voc_sample_rate
try
:
try
:
if
self
.
config
.
am_sess_conf
.
device
is
not
None
:
if
self
.
config
.
am_sess_conf
.
device
is
not
None
:
self
.
device
=
self
.
config
.
am_sess_conf
.
device
self
.
device
=
self
.
config
.
am_sess_conf
.
device
...
@@ -441,32 +449,15 @@ class PaddleTTSConnectionHandler:
...
@@ -441,32 +449,15 @@ class PaddleTTSConnectionHandler:
self
.
final_response_time
=
time
.
time
()
-
frontend_st
self
.
final_response_time
=
time
.
time
()
-
frontend_st
def
preprocess
(
self
,
text_bese64
:
str
=
None
,
text_bytes
:
bytes
=
None
):
# Convert byte to text
if
text_bese64
:
text_bytes
=
base64
.
b64decode
(
text_bese64
)
# base64 to bytes
text
=
text_bytes
.
decode
(
'utf-8'
)
# bytes to text
return
text
def
run
(
self
,
def
run
(
self
,
sentence
:
str
,
sentence
:
str
,
spk_id
:
int
=
0
,
spk_id
:
int
=
0
):
speed
:
float
=
1.0
,
volume
:
float
=
1.0
,
sample_rate
:
int
=
0
,
save_path
:
str
=
None
):
""" run include inference and postprocess.
""" run include inference and postprocess.
Args:
Args:
sentence (str): text to be synthesized
sentence (str): text to be synthesized
spk_id (int, optional): speaker id for multi-speaker speech synthesis. Defaults to 0.
spk_id (int, optional): speaker id for multi-speaker speech synthesis. Defaults to 0.
speed (float, optional): speed. Defaults to 1.0.
volume (float, optional): volume. Defaults to 1.0.
sample_rate (int, optional): target sample rate for synthesized audio,
0 means the same as the model sampling rate. Defaults to 0.
save_path (str, optional): The save path of the synthesized audio.
None means do not save audio. Defaults to None.
Returns:
Returns:
wav_base64: The base64 format of the synthesized audio.
wav_base64: The base64 format of the synthesized audio.
...
@@ -488,7 +479,7 @@ class PaddleTTSConnectionHandler:
...
@@ -488,7 +479,7 @@ class PaddleTTSConnectionHandler:
yield
wav_base64
yield
wav_base64
wav_all
=
np
.
concatenate
(
wav_list
,
axis
=
0
)
wav_all
=
np
.
concatenate
(
wav_list
,
axis
=
0
)
duration
=
len
(
wav_all
)
/
self
.
config
.
voc_
sample_rate
duration
=
len
(
wav_all
)
/
self
.
tts_engine
.
sample_rate
logger
.
info
(
f
"sentence:
{
sentence
}
"
)
logger
.
info
(
f
"sentence:
{
sentence
}
"
)
logger
.
info
(
f
"The durations of audio is:
{
duration
}
s"
)
logger
.
info
(
f
"The durations of audio is:
{
duration
}
s"
)
logger
.
info
(
f
"first response time:
{
self
.
first_response_time
}
s"
)
logger
.
info
(
f
"first response time:
{
self
.
first_response_time
}
s"
)
...
...
paddlespeech/server/engine/tts/online/python/tts_engine.py
浏览文件 @
5b06b76e
...
@@ -276,6 +276,13 @@ class TTSEngine(BaseEngine):
...
@@ -276,6 +276,13 @@ class TTSEngine(BaseEngine):
logger
.
error
(
e
)
logger
.
error
(
e
)
return
False
return
False
assert
(
self
.
executor
.
am_config
.
fs
==
self
.
executor
.
voc_config
.
fs
),
"The sample rate of AM and Vocoder model are different, please check model."
self
.
sample_rate
=
self
.
executor
.
am_config
.
fs
self
.
am_block
=
self
.
config
.
am_block
self
.
am_block
=
self
.
config
.
am_block
self
.
am_pad
=
self
.
config
.
am_pad
self
.
am_pad
=
self
.
config
.
am_pad
self
.
voc_block
=
self
.
config
.
voc_block
self
.
voc_block
=
self
.
config
.
voc_block
...
@@ -459,32 +466,15 @@ class PaddleTTSConnectionHandler:
...
@@ -459,32 +466,15 @@ class PaddleTTSConnectionHandler:
self
.
final_response_time
=
time
.
time
()
-
frontend_st
self
.
final_response_time
=
time
.
time
()
-
frontend_st
def
preprocess
(
self
,
text_bese64
:
str
=
None
,
text_bytes
:
bytes
=
None
):
# Convert byte to text
if
text_bese64
:
text_bytes
=
base64
.
b64decode
(
text_bese64
)
# base64 to bytes
text
=
text_bytes
.
decode
(
'utf-8'
)
# bytes to text
return
text
def
run
(
self
,
def
run
(
self
,
sentence
:
str
,
sentence
:
str
,
spk_id
:
int
=
0
,
spk_id
:
int
=
0
,):
speed
:
float
=
1.0
,
volume
:
float
=
1.0
,
sample_rate
:
int
=
0
,
save_path
:
str
=
None
):
""" run include inference and postprocess.
""" run include inference and postprocess.
Args:
Args:
sentence (str): text to be synthesized
sentence (str): text to be synthesized
spk_id (int, optional): speaker id for multi-speaker speech synthesis. Defaults to 0.
spk_id (int, optional): speaker id for multi-speaker speech synthesis. Defaults to 0.
speed (float, optional): speed. Defaults to 1.0.
volume (float, optional): volume. Defaults to 1.0.
sample_rate (int, optional): target sample rate for synthesized audio,
0 means the same as the model sampling rate. Defaults to 0.
save_path (str, optional): The save path of the synthesized audio.
None means do not save audio. Defaults to None.
Returns:
Returns:
wav_base64: The base64 format of the synthesized audio.
wav_base64: The base64 format of the synthesized audio.
...
@@ -507,7 +497,7 @@ class PaddleTTSConnectionHandler:
...
@@ -507,7 +497,7 @@ class PaddleTTSConnectionHandler:
yield
wav_base64
yield
wav_base64
wav_all
=
np
.
concatenate
(
wav_list
,
axis
=
0
)
wav_all
=
np
.
concatenate
(
wav_list
,
axis
=
0
)
duration
=
len
(
wav_all
)
/
self
.
executor
.
am_config
.
fs
duration
=
len
(
wav_all
)
/
self
.
tts_engine
.
sample_rate
logger
.
info
(
f
"sentence:
{
sentence
}
"
)
logger
.
info
(
f
"sentence:
{
sentence
}
"
)
logger
.
info
(
f
"The durations of audio is:
{
duration
}
s"
)
logger
.
info
(
f
"The durations of audio is:
{
duration
}
s"
)
...
...
paddlespeech/server/utils/audio_handler.py
浏览文件 @
5b06b76e
...
@@ -266,6 +266,12 @@ class TTSWsHandler:
...
@@ -266,6 +266,12 @@ class TTSWsHandler:
self
.
url
=
"ws://"
+
self
.
server
+
":"
+
str
(
self
.
url
=
"ws://"
+
self
.
server
+
":"
+
str
(
self
.
port
)
+
"/paddlespeech/tts/streaming"
self
.
port
)
+
"/paddlespeech/tts/streaming"
self
.
play
=
play
self
.
play
=
play
# get model sample rate
self
.
url_get_sr
=
"http://"
+
str
(
self
.
server
)
+
":"
+
str
(
self
.
port
)
+
"/paddlespeech/tts/streaming/samplerate"
self
.
sample_rate
=
requests
.
get
(
self
.
url_get_sr
).
json
()[
"sample_rate"
]
if
self
.
play
:
if
self
.
play
:
import
pyaudio
import
pyaudio
self
.
buffer
=
b
''
self
.
buffer
=
b
''
...
@@ -273,7 +279,7 @@ class TTSWsHandler:
...
@@ -273,7 +279,7 @@ class TTSWsHandler:
self
.
stream
=
self
.
p
.
open
(
self
.
stream
=
self
.
p
.
open
(
format
=
self
.
p
.
get_format_from_width
(
2
),
format
=
self
.
p
.
get_format_from_width
(
2
),
channels
=
1
,
channels
=
1
,
rate
=
24000
,
rate
=
self
.
sample_rate
,
output
=
True
)
output
=
True
)
self
.
mutex
=
threading
.
Lock
()
self
.
mutex
=
threading
.
Lock
()
self
.
start_play
=
True
self
.
start_play
=
True
...
@@ -293,12 +299,16 @@ class TTSWsHandler:
...
@@ -293,12 +299,16 @@ class TTSWsHandler:
self
.
buffer
=
b
''
self
.
buffer
=
b
''
self
.
mutex
.
release
()
self
.
mutex
.
release
()
async
def
run
(
self
,
text
:
str
,
output
:
str
=
None
):
async
def
run
(
self
,
text
:
str
,
spk_id
=
0
,
output
:
str
=
None
):
"""Send a text to online server
"""Send a text to online server
Args:
Args:
text (str): sentence to be synthesized
text (str): sentence to be synthesized
output (str): save audio path
spk_id (int, optional): speaker id. Defaults to 0.
output (str, optional): client save audio path. Defaults to None.
"""
"""
all_bytes
=
b
''
all_bytes
=
b
''
receive_time_list
=
[]
receive_time_list
=
[]
...
@@ -315,8 +325,13 @@ class TTSWsHandler:
...
@@ -315,8 +325,13 @@ class TTSWsHandler:
session
=
msg
[
"session"
]
session
=
msg
[
"session"
]
# 3. send speech synthesis request
# 3. send speech synthesis request
text_base64
=
str
(
base64
.
b64encode
((
text
).
encode
(
'utf-8'
)),
"UTF8"
)
#text_base64 = str(base64.b64encode((text).encode('utf-8')), "UTF8")
request
=
json
.
dumps
({
"text"
:
text_base64
})
params
=
{
"text"
:
text
,
"spk_id"
:
spk_id
,
}
request
=
json
.
dumps
(
params
)
st
=
time
.
time
()
st
=
time
.
time
()
await
ws
.
send
(
request
)
await
ws
.
send
(
request
)
logging
.
info
(
"send a message to the server"
)
logging
.
info
(
"send a message to the server"
)
...
@@ -341,10 +356,10 @@ class TTSWsHandler:
...
@@ -341,10 +356,10 @@ class TTSWsHandler:
# Rerutn last packet normally, no audio information
# Rerutn last packet normally, no audio information
elif
status
==
2
:
elif
status
==
2
:
final_response
=
time
.
time
()
-
st
final_response
=
time
.
time
()
-
st
duration
=
len
(
all_bytes
)
/
2.0
/
24000
duration
=
len
(
all_bytes
)
/
2.0
/
self
.
sample_rate
if
output
is
not
None
:
if
output
is
not
None
:
save_audio_success
=
save_audio
(
all_bytes
,
output
)
save_audio_success
=
save_audio
(
all_bytes
,
output
,
self
.
sample_rate
)
else
:
else
:
save_audio_success
=
False
save_audio_success
=
False
...
@@ -362,7 +377,7 @@ class TTSWsHandler:
...
@@ -362,7 +377,7 @@ class TTSWsHandler:
receive_time_list
.
append
(
time
.
time
())
receive_time_list
.
append
(
time
.
time
())
audio
=
message
[
"audio"
]
audio
=
message
[
"audio"
]
audio
=
base64
.
b64decode
(
audio
)
# bytes
audio
=
base64
.
b64decode
(
audio
)
# bytes
chunk_duration_list
.
append
(
len
(
audio
)
/
2.0
/
24000
)
chunk_duration_list
.
append
(
len
(
audio
)
/
2.0
/
self
.
sample_rate
)
all_bytes
+=
audio
all_bytes
+=
audio
if
self
.
play
:
if
self
.
play
:
self
.
mutex
.
acquire
()
self
.
mutex
.
acquire
()
...
@@ -403,19 +418,26 @@ class TTSHttpHandler:
...
@@ -403,19 +418,26 @@ class TTSHttpHandler:
self
.
port
)
+
"/paddlespeech/tts/streaming"
self
.
port
)
+
"/paddlespeech/tts/streaming"
self
.
play
=
play
self
.
play
=
play
# get model sample rate
self
.
url_get_sr
=
"http://"
+
str
(
self
.
server
)
+
":"
+
str
(
self
.
port
)
+
"/paddlespeech/tts/streaming/samplerate"
self
.
sample_rate
=
requests
.
get
(
self
.
url_get_sr
).
json
()[
"sample_rate"
]
if
self
.
play
:
if
self
.
play
:
import
pyaudio
import
pyaudio
self
.
buffer
=
b
''
self
.
buffer
=
b
''
self
.
p
=
pyaudio
.
PyAudio
()
self
.
p
=
pyaudio
.
PyAudio
()
self
.
start_play
=
True
self
.
max_fail
=
50
self
.
stream
=
self
.
p
.
open
(
self
.
stream
=
self
.
p
.
open
(
format
=
self
.
p
.
get_format_from_width
(
2
),
format
=
self
.
p
.
get_format_from_width
(
2
),
channels
=
1
,
channels
=
1
,
rate
=
24000
,
rate
=
self
.
sample_rate
,
output
=
True
)
output
=
True
)
self
.
mutex
=
threading
.
Lock
()
self
.
mutex
=
threading
.
Lock
()
self
.
start_play
=
True
self
.
t
=
threading
.
Thread
(
target
=
self
.
play_audio
)
self
.
t
=
threading
.
Thread
(
target
=
self
.
play_audio
)
self
.
max_fail
=
50
logger
.
info
(
f
"endpoint:
{
self
.
url
}
"
)
logger
.
info
(
f
"endpoint:
{
self
.
url
}
"
)
def
play_audio
(
self
):
def
play_audio
(
self
):
...
@@ -433,28 +455,19 @@ class TTSHttpHandler:
...
@@ -433,28 +455,19 @@ class TTSHttpHandler:
def
run
(
self
,
def
run
(
self
,
text
:
str
,
text
:
str
,
spk_id
=
0
,
spk_id
=
0
,
speed
=
1.0
,
volume
=
1.0
,
sample_rate
=
0
,
output
:
str
=
None
):
output
:
str
=
None
):
"""Send a text to tts online server
"""Send a text to tts online server
Args:
Args:
text (str): sentence to be synthesized.
text (str): sentence to be synthesized.
spk_id (int, optional): speaker id. Defaults to 0.
spk_id (int, optional): speaker id. Defaults to 0.
speed (float, optional): audio speed. Defaults to 1.0.
output (str, optional): client save audio path. Defaults to None.
volume (float, optional): audio volume. Defaults to 1.0.
sample_rate (int, optional): audio sample rate, 0 means the same as model. Defaults to 0.
output (str, optional): save audio path. Defaults to None.
"""
"""
# 1. Create request
# 1. Create request
params
=
{
params
=
{
"text"
:
text
,
"text"
:
text
,
"spk_id"
:
spk_id
,
"spk_id"
:
spk_id
,
"speed"
:
speed
,
"volume"
:
volume
,
"sample_rate"
:
sample_rate
,
"save_path"
:
output
}
}
all_bytes
=
b
''
all_bytes
=
b
''
...
@@ -482,14 +495,14 @@ class TTSHttpHandler:
...
@@ -482,14 +495,14 @@ class TTSHttpHandler:
self
.
t
.
start
()
self
.
t
.
start
()
self
.
start_play
=
False
self
.
start_play
=
False
all_bytes
+=
audio
all_bytes
+=
audio
chunk_duration_list
.
append
(
len
(
audio
)
/
2.0
/
24000
)
chunk_duration_list
.
append
(
len
(
audio
)
/
2.0
/
self
.
sample_rate
)
final_response
=
time
.
time
()
-
st
final_response
=
time
.
time
()
-
st
duration
=
len
(
all_bytes
)
/
2.0
/
24000
duration
=
len
(
all_bytes
)
/
2.0
/
self
.
sample_rate
html
.
close
()
# when stream=True
html
.
close
()
# when stream=True
if
output
is
not
None
:
if
output
is
not
None
:
save_audio_success
=
save_audio
(
all_bytes
,
output
)
save_audio_success
=
save_audio
(
all_bytes
,
output
,
self
.
sample_rate
)
else
:
else
:
save_audio_success
=
False
save_audio_success
=
False
...
...
paddlespeech/server/utils/onnx_infer.py
浏览文件 @
5b06b76e
...
@@ -16,7 +16,7 @@ from typing import Optional
...
@@ -16,7 +16,7 @@ from typing import Optional
import
onnxruntime
as
ort
import
onnxruntime
as
ort
from
.log
import
logger
from
paddlespeech.cli
.log
import
logger
def
get_sess
(
model_path
:
Optional
[
os
.
PathLike
]
=
None
,
sess_conf
:
dict
=
None
):
def
get_sess
(
model_path
:
Optional
[
os
.
PathLike
]
=
None
,
sess_conf
:
dict
=
None
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录