Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
7007b0ec
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7007b0ec
编写于
4月 25, 2022
作者:
X
xiongxinlei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update the asr server api, test=doc
上级
833900a8
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
107 addition
and
34 deletion
+107
-34
demos/streaming_asr_server/websocket_client.py
demos/streaming_asr_server/websocket_client.py
+24
-4
paddlespeech/cli/cls/infer.py
paddlespeech/cli/cls/infer.py
+2
-2
paddlespeech/server/bin/paddlespeech_client.py
paddlespeech/server/bin/paddlespeech_client.py
+4
-6
paddlespeech/server/tests/asr/online/microphone_client.py
paddlespeech/server/tests/asr/online/microphone_client.py
+2
-2
paddlespeech/server/utils/audio_handler.py
paddlespeech/server/utils/audio_handler.py
+72
-17
paddlespeech/server/ws/asr_socket.py
paddlespeech/server/ws/asr_socket.py
+3
-3
未找到文件。
demos/streaming_asr_server/websocket_client.py
浏览文件 @
7007b0ec
...
...
@@ -20,19 +20,23 @@ import logging
import
os
from
paddlespeech.cli.log
import
logger
from
paddlespeech.server.utils.audio_handler
import
ASRAudioHandler
from
paddlespeech.server.utils.audio_handler
import
ASR
Ws
AudioHandler
def
main
(
args
):
logger
.
info
(
"asr websocket client start"
)
handler
=
ASRAudioHandler
(
"127.0.0.1"
,
8090
)
handler
=
ASRWsAudioHandler
(
args
.
server_ip
,
args
.
port
,
punc_server_ip
=
args
.
punc_server_ip
,
punc_server_port
=
args
.
punc_server_port
)
loop
=
asyncio
.
get_event_loop
()
# support to process single audio file
if
args
.
wavfile
and
os
.
path
.
exists
(
args
.
wavfile
):
logger
.
info
(
f
"start to process the wavscp:
{
args
.
wavfile
}
"
)
result
=
loop
.
run_until_complete
(
handler
.
run
(
args
.
wavfile
))
result
=
result
[
"
asr_results
"
]
result
=
result
[
"
final_result
"
]
logger
.
info
(
f
"asr websocket client finished :
{
result
}
"
)
# support to process batch audios from wav.scp
...
...
@@ -43,13 +47,29 @@ def main(args):
for
line
in
f
:
utt_name
,
utt_path
=
line
.
strip
().
split
()
result
=
loop
.
run_until_complete
(
handler
.
run
(
utt_path
))
result
=
result
[
"
asr_results
"
]
result
=
result
[
"
final_result
"
]
w
.
write
(
f
"
{
utt_name
}
{
result
}
\n
"
)
if
__name__
==
"__main__"
:
logger
.
info
(
"Start to do streaming asr client"
)
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--server_ip'
,
type
=
str
,
default
=
'127.0.0.1'
,
help
=
'server ip'
)
parser
.
add_argument
(
'--port'
,
type
=
int
,
default
=
8090
,
help
=
'server port'
)
parser
.
add_argument
(
'--punc.server_ip'
,
type
=
str
,
default
=
None
,
dest
=
"punc_server_ip"
,
help
=
'Punctuation server ip'
)
parser
.
add_argument
(
'--punc.port'
,
type
=
int
,
default
=
8091
,
dest
=
"punc_server_port"
,
help
=
'Punctuation server port'
)
parser
.
add_argument
(
"--wavfile"
,
action
=
"store"
,
...
...
paddlespeech/cli/cls/infer.py
浏览文件 @
7007b0ec
...
...
@@ -21,8 +21,6 @@ from typing import Union
import
numpy
as
np
import
paddle
import
yaml
from
paddleaudio
import
load
from
paddleaudio.features
import
LogMelSpectrogram
from
..executor
import
BaseExecutor
from
..log
import
logger
...
...
@@ -30,6 +28,8 @@ from ..utils import cli_register
from
..utils
import
stats_wrapper
from
.pretrained_models
import
model_alias
from
.pretrained_models
import
pretrained_models
from
paddleaudio
import
load
from
paddleaudio.features
import
LogMelSpectrogram
from
paddlespeech.s2t.utils.dynamic_import
import
dynamic_import
__all__
=
[
'CLSExecutor'
]
...
...
paddlespeech/server/bin/paddlespeech_client.py
浏览文件 @
7007b0ec
...
...
@@ -29,7 +29,7 @@ from ..executor import BaseExecutor
from
..util
import
cli_client_register
from
..util
import
stats_wrapper
from
paddlespeech.cli.log
import
logger
from
paddlespeech.server.utils.audio_handler
import
ASRAudioHandler
from
paddlespeech.server.utils.audio_handler
import
ASR
Ws
AudioHandler
from
paddlespeech.server.utils.audio_process
import
wav2pcm
from
paddlespeech.server.utils.util
import
wav2base64
...
...
@@ -369,7 +369,7 @@ class ASRClientExecutor(BaseExecutor):
Returns:
str: The ASR results
"""
#
1. Firstly,
we use the asr server to recognize the audio text content
# we use the asr server to recognize the audio text content
if
protocol
.
lower
()
==
"http"
:
from
paddlespeech.server.utils.audio_handler
import
ASRHttpHandler
logger
.
info
(
"asr http client start"
)
...
...
@@ -380,22 +380,20 @@ class ASRClientExecutor(BaseExecutor):
elif
protocol
.
lower
()
==
"websocket"
:
logger
.
info
(
"asr websocket client start"
)
handler
=
ASRAudioHandler
(
handler
=
ASR
Ws
AudioHandler
(
server_ip
,
port
,
punc_server_ip
=
punc_server_ip
,
punc_server_port
=
punc_server_port
)
loop
=
asyncio
.
get_event_loop
()
res
=
loop
.
run_until_complete
(
handler
.
run
(
input
))
res
=
res
[
'
asr_results
'
]
res
=
res
[
'
final_result
'
]
logger
.
info
(
"asr websocket client finished"
)
else
:
logger
.
error
(
f
"Sorry, we have not support protocol:
{
protocol
}
,"
"please use http or websocket protocol"
)
sys
.
exit
(
-
1
)
# 2. Secondly, we use the punctuation server to do post process for text
return
res
...
...
paddlespeech/server/tests/asr/online/microphone_client.py
浏览文件 @
7007b0ec
...
...
@@ -26,7 +26,7 @@ import pyaudio
import
websockets
class
ASRAudioHandler
(
threading
.
Thread
):
class
ASR
Ws
AudioHandler
(
threading
.
Thread
):
def
__init__
(
self
,
url
=
"127.0.0.1"
,
port
=
8091
):
threading
.
Thread
.
__init__
(
self
)
self
.
url
=
url
...
...
@@ -148,7 +148,7 @@ if __name__ == "__main__":
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
info
(
"asr websocket client start"
)
handler
=
ASRAudioHandler
(
"127.0.0.1"
,
8091
)
handler
=
ASR
Ws
AudioHandler
(
"127.0.0.1"
,
8091
)
loop
=
asyncio
.
get_event_loop
()
main_task
=
asyncio
.
ensure_future
(
handler
.
run
())
for
signal
in
[
SIGINT
,
SIGTERM
]:
...
...
paddlespeech/server/utils/audio_handler.py
浏览文件 @
7007b0ec
...
...
@@ -29,13 +29,30 @@ from paddlespeech.server.utils.util import wav2base64
class
TextHttpHandler
:
def
__init__
(
self
,
server_ip
=
"127.0.0.1"
,
port
=
8090
):
"""Text http client request
Args:
server_ip (str, optional): the text server ip. Defaults to "127.0.0.1".
port (int, optional): the text server port. Defaults to 8090.
"""
super
().
__init__
()
self
.
server_ip
=
server_ip
self
.
port
=
port
if
server_ip
is
None
or
port
is
None
:
self
.
url
=
None
else
:
self
.
url
=
'http://'
+
self
.
server_ip
+
":"
+
str
(
self
.
port
)
+
'/paddlespeech/text'
def
run
(
self
,
text
):
"""Call the text server to process the specific text
Args:
text (str): the text to be processed
Returns:
str: punctuation text
"""
if
self
.
server_ip
is
None
or
self
.
port
is
None
:
logger
.
warning
(
"No punctuation server, please input valid ip and port"
)
...
...
@@ -55,24 +72,29 @@ class TextHttpHandler:
return
punc_text
class
ASRAudioHandler
:
class
ASR
Ws
AudioHandler
:
def
__init__
(
self
,
url
=
"127.0.0.1"
,
port
=
8090
,
punc_server_ip
=
"127.0.0.1"
,
punc_server_port
=
"8091"
):
url
=
None
,
port
=
None
,
endpoint
=
"/paddlespeech/asr/streaming"
,
punc_server_ip
=
None
,
punc_server_port
=
None
):
"""PaddleSpeech Online ASR Server Client audio handler
Online asr server use the websocket protocal
Args:
url (str, optional): the server ip. Defaults to "127.0.0.1".
port (int, optional): the server port. Defaults to 8090.
url (str, optional): the server ip. Defaults to None.
port (int, optional): the server port. Defaults to None.
endpoint(str, optional): to compatiable with python server and c++ server.
punc_server_ip(str, optional): the punctuation server ip. Defaults to None.
punc_server_port(int, optional): the punctuation port. Defaults to None
"""
self
.
url
=
url
self
.
port
=
port
self
.
url
=
"ws://"
+
self
.
url
+
":"
+
str
(
self
.
port
)
+
"/ws/asr"
if
url
is
None
or
port
is
None
or
endpoint
is
None
:
self
.
url
=
None
else
:
self
.
url
=
"ws://"
+
self
.
url
+
":"
+
str
(
self
.
port
)
+
endpoint
self
.
punc_server
=
TextHttpHandler
(
punc_server_ip
,
punc_server_port
)
def
read_wave
(
self
,
wavfile_path
:
str
):
...
...
@@ -117,6 +139,11 @@ class ASRAudioHandler:
"""
logging
.
info
(
"send a message to the server"
)
if
self
.
url
is
None
:
logger
.
error
(
"No punctuation server, please input valid ip and port"
)
return
""
# 1. send websocket handshake protocal
async
with
websockets
.
connect
(
self
.
url
)
as
ws
:
# 2. server has already received handshake protocal
...
...
@@ -125,7 +152,7 @@ class ASRAudioHandler:
{
"name"
:
"test.wav"
,
"signal"
:
"start"
,
"nbest"
:
5
"nbest"
:
1
},
sort_keys
=
True
,
indent
=
4
,
...
...
@@ -139,7 +166,9 @@ class ASRAudioHandler:
await
ws
.
send
(
chunk_data
.
tobytes
())
msg
=
await
ws
.
recv
()
msg
=
json
.
loads
(
msg
)
msg
[
"asr_results"
]
=
self
.
punc_server
.
run
(
msg
[
"asr_results"
])
if
self
.
punc_server
and
len
(
msg
[
"partial_result"
])
>
0
:
msg
[
"partial_result"
]
=
self
.
punc_server
.
run
(
msg
[
"partial_result"
])
logger
.
info
(
"receive msg={}"
.
format
(
msg
))
# 4. we must send finished signal to the server
...
...
@@ -157,7 +186,8 @@ class ASRAudioHandler:
# 5. decode the bytes to str
msg
=
json
.
loads
(
msg
)
msg
[
"asr_results"
]
=
self
.
punc_server
.
run
(
msg
[
"asr_results"
])
if
self
.
punc_server
:
msg
[
"final_result"
]
=
self
.
punc_server
.
run
(
msg
[
"final_result"
])
logger
.
info
(
"final receive msg={}"
.
format
(
msg
))
result
=
msg
...
...
@@ -165,14 +195,39 @@ class ASRAudioHandler:
class
ASRHttpHandler
:
def
__init__
(
self
,
server_ip
=
"127.0.0.1"
,
port
=
8090
):
def
__init__
(
self
,
server_ip
=
None
,
port
=
None
):
"""The ASR client http request
Args:
server_ip (str, optional): the http asr server ip. Defaults to "127.0.0.1".
port (int, optional): the http asr server port. Defaults to 8090.
"""
super
().
__init__
()
self
.
server_ip
=
server_ip
self
.
port
=
port
if
server_ip
is
None
or
port
is
None
:
self
.
url
=
None
else
:
self
.
url
=
'http://'
+
self
.
server_ip
+
":"
+
str
(
self
.
port
)
+
'/paddlespeech/asr'
def
run
(
self
,
input
,
audio_format
,
sample_rate
,
lang
):
"""Call the http asr to process the audio
Args:
input (str): the audio file path
audio_format (str): the audio format
sample_rate (str): the audio sample rate
lang (str): the audio language type
Returns:
str: the final asr result
"""
if
self
.
url
is
None
:
logger
.
error
(
"No punctuation server, please input valid ip and port"
)
return
""
audio
=
wav2base64
(
input
)
data
=
{
"audio"
:
audio
,
...
...
paddlespeech/server/ws/asr_socket.py
浏览文件 @
7007b0ec
...
...
@@ -24,7 +24,7 @@ from paddlespeech.server.engine.engine_pool import get_engine_pool
router
=
APIRouter
()
@
router
.
websocket
(
'/
ws/asr
'
)
@
router
.
websocket
(
'/
paddlespeech/asr/streaming
'
)
async
def
websocket_endpoint
(
websocket
:
WebSocket
):
"""PaddleSpeech Online ASR Server api
...
...
@@ -83,7 +83,7 @@ async def websocket_endpoint(websocket: WebSocket):
resp
=
{
"status"
:
"ok"
,
"signal"
:
"finished"
,
'
asr_results
'
:
asr_results
'
final_result
'
:
asr_results
}
await
websocket
.
send_json
(
resp
)
break
...
...
@@ -102,7 +102,7 @@ async def websocket_endpoint(websocket: WebSocket):
# return the current period result
# if the engine create the vad instance, this connection will have many period results
resp
=
{
'
asr_results
'
:
asr_results
}
resp
=
{
'
partial_result
'
:
asr_results
}
await
websocket
.
send_json
(
resp
)
except
WebSocketDisconnect
:
pass
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录