Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
22b67ed0
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
22b67ed0
编写于
5月 11, 2022
作者:
L
liangym
提交者:
GitHub
5月 11, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1882 from lym0302/streaming_tts_server
[server] improve code
上级
f8631789
be21aed0
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
161 addition
and
78 deletion
+161
-78
paddlespeech/server/bin/paddlespeech_client.py
paddlespeech/server/bin/paddlespeech_client.py
+8
-2
paddlespeech/server/bin/paddlespeech_server.py
paddlespeech/server/bin/paddlespeech_server.py
+7
-4
paddlespeech/server/tests/tts/offline/http_client.py
paddlespeech/server/tests/tts/offline/http_client.py
+1
-1
paddlespeech/server/utils/audio_handler.py
paddlespeech/server/utils/audio_handler.py
+64
-35
paddlespeech/server/utils/audio_process.py
paddlespeech/server/utils/audio_process.py
+1
-1
paddlespeech/server/ws/tts_api.py
paddlespeech/server/ws/tts_api.py
+70
-27
tests/unit/server/offline/change_yaml.py
tests/unit/server/offline/change_yaml.py
+5
-4
tests/unit/server/online/tts/check_server/change_yaml.py
tests/unit/server/online/tts/check_server/change_yaml.py
+3
-2
tests/unit/server/online/tts/test_server/test_http_client.py
tests/unit/server/online/tts/test_server/test_http_client.py
+2
-2
未找到文件。
paddlespeech/server/bin/paddlespeech_client.py
浏览文件 @
22b67ed0
...
...
@@ -18,6 +18,7 @@ import io
import
json
import
os
import
random
import
sys
import
time
from
typing
import
List
...
...
@@ -91,7 +92,7 @@ class TTSClientExecutor(BaseExecutor):
temp_wav
=
str
(
random
.
getrandbits
(
128
))
+
".wav"
soundfile
.
write
(
temp_wav
,
samples
,
sample_rate
)
wav2pcm
(
temp_wav
,
outfile
,
data_type
=
np
.
int16
)
os
.
system
(
"rm %s"
%
(
temp_wav
)
)
os
.
remove
(
temp_wav
)
else
:
logger
.
error
(
"The format for saving audio only supports wav or pcm"
)
...
...
@@ -128,6 +129,7 @@ class TTSClientExecutor(BaseExecutor):
return
True
except
Exception
as
e
:
logger
.
error
(
"Failed to synthesized audio."
)
logger
.
error
(
e
)
return
False
@
stats_wrapper
...
...
@@ -236,6 +238,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
return
True
except
Exception
as
e
:
logger
.
error
(
"Failed to synthesized audio."
)
logger
.
error
(
e
)
return
False
@
stats_wrapper
...
...
@@ -275,7 +278,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
else
:
logger
.
error
(
"Please set correct protocol, http or websocket"
)
return
False
sys
.
exit
(
-
1
)
logger
.
info
(
f
"sentence:
{
input
}
"
)
logger
.
info
(
f
"duration:
{
duration
}
s"
)
...
...
@@ -503,6 +506,7 @@ class ASROnlineClientExecutor(BaseExecutor):
Returns:
str: the audio text
"""
logger
.
info
(
"asr websocket client start"
)
handler
=
ASRWsAudioHandler
(
server_ip
,
...
...
@@ -555,6 +559,7 @@ class CLSClientExecutor(BaseExecutor):
return
True
except
Exception
as
e
:
logger
.
error
(
"Failed to speech classification."
)
logger
.
error
(
e
)
return
False
@
stats_wrapper
...
...
@@ -728,6 +733,7 @@ class VectorClientExecutor(BaseExecutor):
Returns:
str: the audio embedding or score between enroll and test audio
"""
if
task
==
"spk"
:
from
paddlespeech.server.utils.audio_handler
import
VectorHttpHandler
logger
.
info
(
"vector http client start"
)
...
...
paddlespeech/server/bin/paddlespeech_server.py
浏览文件 @
22b67ed0
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
sys
from
typing
import
List
import
uvicorn
...
...
@@ -79,10 +80,12 @@ class ServerExecutor(BaseExecutor):
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
args
=
self
.
parser
.
parse_args
(
argv
)
config
=
get_config
(
args
.
config_file
)
if
self
.
init
(
config
):
uvicorn
.
run
(
app
,
host
=
config
.
host
,
port
=
config
.
port
,
debug
=
True
)
try
:
self
(
args
.
config_file
,
args
.
log_file
)
except
Exception
as
e
:
logger
.
error
(
"Failed to start server."
)
logger
.
error
(
e
)
sys
.
exit
(
-
1
)
@
stats_wrapper
def
__call__
(
self
,
...
...
paddlespeech/server/tests/tts/offline/http_client.py
浏览文件 @
22b67ed0
...
...
@@ -61,7 +61,7 @@ def tts_client(args):
temp_wav
=
str
(
random
.
getrandbits
(
128
))
+
".wav"
soundfile
.
write
(
temp_wav
,
samples
,
sample_rate
)
wav2pcm
(
temp_wav
,
outfile
,
data_type
=
np
.
int16
)
os
.
system
(
"rm %s"
%
(
temp_wav
)
)
os
.
remove
(
temp_wav
)
else
:
print
(
"The format for saving audio only supports wav or pcm"
)
...
...
paddlespeech/server/utils/audio_handler.py
浏览文件 @
22b67ed0
...
...
@@ -304,52 +304,80 @@ class TTSWsHandler:
receive_time_list
=
[]
chunk_duration_list
=
[]
# 1. Send websocket handshake
protocal
# 1. Send websocket handshake
request
async
with
websockets
.
connect
(
self
.
url
)
as
ws
:
# 2. Server has already received handshake protocal
# send text to engine
# 2. Server has already received handshake response, send start request
start_request
=
json
.
dumps
({
"task"
:
"tts"
,
"signal"
:
"start"
})
await
ws
.
send
(
start_request
)
msg
=
await
ws
.
recv
()
logger
.
info
(
f
"client receive msg=
{
msg
}
"
)
msg
=
json
.
loads
(
msg
)
session
=
msg
[
"session"
]
# 3. send speech synthesis request
text_base64
=
str
(
base64
.
b64encode
((
text
).
encode
(
'utf-8'
)),
"UTF8"
)
d
=
{
"text"
:
text_base64
}
d
=
json
.
dumps
(
d
)
request
=
json
.
dumps
({
"text"
:
text_base64
})
st
=
time
.
time
()
await
ws
.
send
(
d
)
await
ws
.
send
(
request
)
logging
.
info
(
"send a message to the server"
)
#
3. Process the received response
#
4. Process the received response
message
=
await
ws
.
recv
()
first_response
=
time
.
time
()
-
st
message
=
json
.
loads
(
message
)
status
=
message
[
"status"
]
while
True
:
# When throw an exception
if
status
==
-
1
:
# send end request
end_request
=
json
.
dumps
({
"task"
:
"tts"
,
"signal"
:
"end"
,
"session"
:
session
})
await
ws
.
send
(
end_request
)
break
# Rerutn last packet normally, no audio information
elif
status
==
2
:
final_response
=
time
.
time
()
-
st
duration
=
len
(
all_bytes
)
/
2.0
/
24000
if
output
is
not
None
:
save_audio_success
=
save_audio
(
all_bytes
,
output
)
else
:
save_audio_success
=
False
# send end request
end_request
=
json
.
dumps
({
"task"
:
"tts"
,
"signal"
:
"end"
,
"session"
:
session
})
await
ws
.
send
(
end_request
)
break
# Return the audio stream normally
elif
status
==
1
:
receive_time_list
.
append
(
time
.
time
())
audio
=
message
[
"audio"
]
audio
=
base64
.
b64decode
(
audio
)
# bytes
chunk_duration_list
.
append
(
len
(
audio
)
/
2.0
/
24000
)
all_bytes
+=
audio
if
self
.
play
:
self
.
mutex
.
acquire
()
self
.
buffer
+=
audio
self
.
mutex
.
release
()
if
self
.
start_play
:
self
.
t
.
start
()
self
.
start_play
=
False
message
=
await
ws
.
recv
()
message
=
json
.
loads
(
message
)
status
=
message
[
"status"
]
while
(
status
==
1
):
receive_time_list
.
append
(
time
.
time
())
audio
=
message
[
"audio"
]
audio
=
base64
.
b64decode
(
audio
)
# bytes
chunk_duration_list
.
append
(
len
(
audio
)
/
2.0
/
24000
)
all_bytes
+=
audio
if
self
.
play
:
self
.
mutex
.
acquire
()
self
.
buffer
+=
audio
self
.
mutex
.
release
()
if
self
.
start_play
:
self
.
t
.
start
()
self
.
start_play
=
False
message
=
await
ws
.
recv
()
message
=
json
.
loads
(
message
)
status
=
message
[
"status"
]
# 4. Last packet, no audio information
if
status
==
2
:
final_response
=
time
.
time
()
-
st
duration
=
len
(
all_bytes
)
/
2.0
/
24000
if
output
is
not
None
:
save_audio_success
=
save_audio
(
all_bytes
,
output
)
else
:
save_audio_success
=
False
else
:
logger
.
error
(
"infer error"
)
logger
.
error
(
"infer error, return status is invalid."
)
if
self
.
play
:
self
.
t
.
join
()
...
...
@@ -458,6 +486,7 @@ class TTSHttpHandler:
final_response
=
time
.
time
()
-
st
duration
=
len
(
all_bytes
)
/
2.0
/
24000
html
.
close
()
# when stream=True
if
output
is
not
None
:
save_audio_success
=
save_audio
(
all_bytes
,
output
)
...
...
paddlespeech/server/utils/audio_process.py
浏览文件 @
22b67ed0
...
...
@@ -167,7 +167,7 @@ def save_audio(bytes_data, audio_path, sample_rate: int=24000) -> bool:
channels
=
1
,
bits
=
16
,
sample_rate
=
sample_rate
)
os
.
system
(
"rm
./tmp.pcm"
)
os
.
remove
(
"
./tmp.pcm"
)
else
:
print
(
"Only supports saved audio format is pcm or wav"
)
return
False
...
...
paddlespeech/server/ws/tts_api.py
浏览文件 @
22b67ed0
...
...
@@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
import
uuid
from
fastapi
import
APIRouter
from
fastapi
import
WebSocket
from
fastapi
import
WebSocketDisconnect
from
starlette.websockets
import
WebSocketState
as
WebSocketState
from
paddlespeech.cli.log
import
logger
...
...
@@ -26,36 +26,79 @@ router = APIRouter()
@
router
.
websocket
(
'/paddlespeech/tts/streaming'
)
async
def
websocket_endpoint
(
websocket
:
WebSocket
):
"""PaddleSpeech Online TTS Server api
Args:
websocket (WebSocket): the websocket instance
"""
#1. the interface wait to accept the websocket protocal header
# and only we receive the header, it establish the connection with specific thread
await
websocket
.
accept
()
#2. if we accept the websocket headers, we will get the online tts engine instance
engine_pool
=
get_engine_pool
()
tts_engine
=
engine_pool
[
'tts'
]
try
:
# careful here, changed the source code from starlette.websockets
assert
websocket
.
application_state
==
WebSocketState
.
CONNECTED
message
=
await
websocket
.
receive
()
websocket
.
_raise_on_disconnect
(
message
)
while
True
:
# careful here, changed the source code from starlette.websockets
assert
websocket
.
application_state
==
WebSocketState
.
CONNECTED
message
=
await
websocket
.
receive
()
websocket
.
_raise_on_disconnect
(
message
)
message
=
json
.
loads
(
message
[
"text"
])
# get engine
engine_pool
=
get_engine_pool
()
tts_engine
=
engine_pool
[
'tts'
]
if
'signal'
in
message
:
# start request
if
message
[
'signal'
]
==
'start'
:
session
=
uuid
.
uuid1
().
hex
resp
=
{
"status"
:
0
,
"signal"
:
"server ready"
,
"session"
:
session
}
await
websocket
.
send_json
(
resp
)
# 获取 message 并转文本
message
=
json
.
loads
(
message
[
"text"
])
text_bese64
=
message
[
"text"
]
sentence
=
tts_engine
.
preprocess
(
text_bese64
=
text_bese64
)
# end request
elif
message
[
'signal'
]
==
'end'
:
resp
=
{
"status"
:
0
,
"signal"
:
"connection will be closed"
,
"session"
:
session
}
await
websocket
.
send_json
(
resp
)
break
else
:
resp
=
{
"status"
:
0
,
"signal"
:
"no valid json data"
}
await
websocket
.
send_json
(
resp
)
# run
wav_generator
=
tts_engine
.
run
(
sentence
)
# speech synthesis request
elif
'text'
in
message
:
text_bese64
=
message
[
"text"
]
sentence
=
tts_engine
.
preprocess
(
text_bese64
=
text_bese64
)
while
True
:
try
:
tts_results
=
next
(
wav_generator
)
resp
=
{
"status"
:
1
,
"audio"
:
tts_results
}
await
websocket
.
send_json
(
resp
)
except
StopIteration
as
e
:
resp
=
{
"status"
:
2
,
"audio"
:
''
}
await
websocket
.
send_json
(
resp
)
logger
.
info
(
"Complete the transmission of audio streams"
)
break
except
WebSocketDisconnect
:
pass
\ No newline at end of file
# run
wav_generator
=
tts_engine
.
run
(
sentence
)
while
True
:
try
:
tts_results
=
next
(
wav_generator
)
resp
=
{
"status"
:
1
,
"audio"
:
tts_results
}
await
websocket
.
send_json
(
resp
)
except
StopIteration
as
e
:
resp
=
{
"status"
:
2
,
"audio"
:
''
}
await
websocket
.
send_json
(
resp
)
logger
.
info
(
"Complete the synthesis of the audio streams"
)
break
except
Exception
as
e
:
resp
=
{
"status"
:
-
1
,
"audio"
:
''
}
await
websocket
.
send_json
(
resp
)
break
else
:
logger
.
error
(
"Invalid request, please check if the request is correct."
)
except
Exception
as
e
:
logger
.
error
(
e
)
tests/unit/server/offline/change_yaml.py
浏览文件 @
22b67ed0
#!/usr/bin/python
import
argparse
import
os
import
shutil
import
yaml
...
...
@@ -14,7 +15,7 @@ def change_device(yamlfile: str, engine: str, device: str):
model_type (dict): change model type
"""
tmp_yamlfile
=
yamlfile
.
split
(
".yaml"
)[
0
]
+
"_tmp.yaml"
os
.
system
(
"cp %s %s"
%
(
yamlfile
,
tmp_yamlfile
)
)
shutil
.
copyfile
(
yamlfile
,
tmp_yamlfile
)
if
device
==
'cpu'
:
set_device
=
'cpu'
...
...
@@ -41,7 +42,7 @@ def change_device(yamlfile: str, engine: str, device: str):
print
(
yaml
.
dump
(
y
,
default_flow_style
=
False
,
sort_keys
=
False
))
yaml
.
dump
(
y
,
fw
,
allow_unicode
=
True
)
os
.
system
(
"rm %s"
%
(
tmp_yamlfile
)
)
os
.
remove
(
tmp_yamlfile
)
print
(
"Change %s successfully."
%
(
yamlfile
))
...
...
@@ -52,7 +53,7 @@ def change_engine_type(yamlfile: str, engine_type):
task (str): asr or tts
"""
tmp_yamlfile
=
yamlfile
.
split
(
".yaml"
)[
0
]
+
"_tmp.yaml"
os
.
system
(
"cp %s %s"
%
(
yamlfile
,
tmp_yamlfile
)
)
shutil
.
copyfile
(
yamlfile
,
tmp_yamlfile
)
speech_task
=
engine_type
.
split
(
"_"
)[
0
]
with
open
(
tmp_yamlfile
)
as
f
,
open
(
yamlfile
,
"w+"
,
encoding
=
"utf-8"
)
as
fw
:
...
...
@@ -65,7 +66,7 @@ def change_engine_type(yamlfile: str, engine_type):
y
[
'engine_list'
]
=
engine_list
print
(
yaml
.
dump
(
y
,
default_flow_style
=
False
,
sort_keys
=
False
))
yaml
.
dump
(
y
,
fw
,
allow_unicode
=
True
)
os
.
system
(
"rm %s"
%
(
tmp_yamlfile
)
)
os
.
remove
(
tmp_yamlfile
)
print
(
"Change %s successfully."
%
(
yamlfile
))
...
...
tests/unit/server/online/tts/check_server/change_yaml.py
浏览文件 @
22b67ed0
#!/usr/bin/python
import
argparse
import
os
import
shutil
import
yaml
...
...
@@ -13,7 +14,7 @@ def change_value(args):
target_value
=
args
.
target_value
tmp_yamlfile
=
yamlfile
.
split
(
".yaml"
)[
0
]
+
"_tmp.yaml"
os
.
system
(
"cp %s %s"
%
(
yamlfile
,
tmp_yamlfile
)
)
shutil
.
copyfile
(
yamlfile
,
tmp_yamlfile
)
with
open
(
tmp_yamlfile
)
as
f
,
open
(
yamlfile
,
"w+"
,
encoding
=
"utf-8"
)
as
fw
:
y
=
yaml
.
safe_load
(
f
)
...
...
@@ -51,7 +52,7 @@ def change_value(args):
print
(
yaml
.
dump
(
y
,
default_flow_style
=
False
,
sort_keys
=
False
))
yaml
.
dump
(
y
,
fw
,
allow_unicode
=
True
)
os
.
system
(
"rm %s"
%
(
tmp_yamlfile
)
)
os
.
remove
(
tmp_yamlfile
)
print
(
f
"Change key:
{
target_key
}
to value:
{
target_value
}
successfully."
)
...
...
tests/unit/server/online/tts/test_server/test_http_client.py
浏览文件 @
22b67ed0
...
...
@@ -75,8 +75,8 @@ if __name__ == "__main__":
args
=
parser
.
parse_args
()
os
.
system
(
"rm -rf %s"
%
(
args
.
output_dir
))
os
.
mkdir
(
args
.
output_dir
)
if
not
os
.
path
.
exists
(
args
.
output_dir
):
os
.
makedirs
(
args
.
output_dir
)
first_response_list
=
[]
final_response_list
=
[]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录