Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
b6d0db0d
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b6d0db0d
编写于
4月 25, 2022
作者:
H
Hui Zhang
提交者:
GitHub
4月 25, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1784 from Honei/v0.3
[asr][server]asr client add punctuatjion server
上级
cea7b5eb
f72cbc9b
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
222 addition
and
109 deletion
+222
-109
demos/streaming_asr_server/web/templates/index.html
demos/streaming_asr_server/web/templates/index.html
+2
-2
demos/streaming_asr_server/websocket_client.py
demos/streaming_asr_server/websocket_client.py
+24
-4
paddlespeech/cli/cls/infer.py
paddlespeech/cli/cls/infer.py
+2
-2
paddlespeech/server/bin/paddlespeech_client.py
paddlespeech/server/bin/paddlespeech_client.py
+66
-88
paddlespeech/server/tests/asr/online/microphone_client.py
paddlespeech/server/tests/asr/online/microphone_client.py
+2
-2
paddlespeech/server/utils/audio_handler.py
paddlespeech/server/utils/audio_handler.py
+123
-8
paddlespeech/server/ws/asr_socket.py
paddlespeech/server/ws/asr_socket.py
+3
-3
未找到文件。
demos/streaming_asr_server/web/templates/index.html
浏览文件 @
b6d0db0d
...
...
@@ -93,7 +93,7 @@
function
parseResult
(
data
)
{
var
data
=
JSON
.
parse
(
data
)
var
result
=
data
.
asr_results
var
result
=
data
.
result
console
.
log
(
result
)
$
(
"
#resultPanel
"
).
html
(
result
)
}
...
...
@@ -152,4 +152,4 @@
</script>
</body>
</html>
\ No newline at end of file
</html>
demos/streaming_asr_server/websocket_client.py
浏览文件 @
b6d0db0d
...
...
@@ -20,19 +20,23 @@ import logging
import
os
from
paddlespeech.cli.log
import
logger
from
paddlespeech.server.utils.audio_handler
import
ASRAudioHandler
from
paddlespeech.server.utils.audio_handler
import
ASR
Ws
AudioHandler
def
main
(
args
):
logger
.
info
(
"asr websocket client start"
)
handler
=
ASRAudioHandler
(
"127.0.0.1"
,
8090
)
handler
=
ASRWsAudioHandler
(
args
.
server_ip
,
args
.
port
,
punc_server_ip
=
args
.
punc_server_ip
,
punc_server_port
=
args
.
punc_server_port
)
loop
=
asyncio
.
get_event_loop
()
# support to process single audio file
if
args
.
wavfile
and
os
.
path
.
exists
(
args
.
wavfile
):
logger
.
info
(
f
"start to process the wavscp:
{
args
.
wavfile
}
"
)
result
=
loop
.
run_until_complete
(
handler
.
run
(
args
.
wavfile
))
result
=
result
[
"
asr_results
"
]
result
=
result
[
"
final_result
"
]
logger
.
info
(
f
"asr websocket client finished :
{
result
}
"
)
# support to process batch audios from wav.scp
...
...
@@ -43,13 +47,29 @@ def main(args):
for
line
in
f
:
utt_name
,
utt_path
=
line
.
strip
().
split
()
result
=
loop
.
run_until_complete
(
handler
.
run
(
utt_path
))
result
=
result
[
"
asr_results
"
]
result
=
result
[
"
final_result
"
]
w
.
write
(
f
"
{
utt_name
}
{
result
}
\n
"
)
if
__name__
==
"__main__"
:
logger
.
info
(
"Start to do streaming asr client"
)
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--server_ip'
,
type
=
str
,
default
=
'127.0.0.1'
,
help
=
'server ip'
)
parser
.
add_argument
(
'--port'
,
type
=
int
,
default
=
8090
,
help
=
'server port'
)
parser
.
add_argument
(
'--punc.server_ip'
,
type
=
str
,
default
=
None
,
dest
=
"punc_server_ip"
,
help
=
'Punctuation server ip'
)
parser
.
add_argument
(
'--punc.port'
,
type
=
int
,
default
=
8091
,
dest
=
"punc_server_port"
,
help
=
'Punctuation server port'
)
parser
.
add_argument
(
"--wavfile"
,
action
=
"store"
,
...
...
paddlespeech/cli/cls/infer.py
浏览文件 @
b6d0db0d
...
...
@@ -21,8 +21,6 @@ from typing import Union
import
numpy
as
np
import
paddle
import
yaml
from
paddleaudio
import
load
from
paddleaudio.features
import
LogMelSpectrogram
from
..executor
import
BaseExecutor
from
..log
import
logger
...
...
@@ -30,6 +28,8 @@ from ..utils import cli_register
from
..utils
import
stats_wrapper
from
.pretrained_models
import
model_alias
from
.pretrained_models
import
pretrained_models
from
paddleaudio
import
load
from
paddleaudio.features
import
LogMelSpectrogram
from
paddlespeech.s2t.utils.dynamic_import
import
dynamic_import
__all__
=
[
'CLSExecutor'
]
...
...
paddlespeech/server/bin/paddlespeech_client.py
浏览文件 @
b6d0db0d
...
...
@@ -16,7 +16,6 @@ import asyncio
import
base64
import
io
import
json
import
logging
import
os
import
random
import
time
...
...
@@ -30,13 +29,13 @@ from ..executor import BaseExecutor
from
..util
import
cli_client_register
from
..util
import
stats_wrapper
from
paddlespeech.cli.log
import
logger
from
paddlespeech.server.utils.audio_handler
import
ASRAudioHandler
from
paddlespeech.server.utils.audio_handler
import
ASR
Ws
AudioHandler
from
paddlespeech.server.utils.audio_process
import
wav2pcm
from
paddlespeech.server.utils.util
import
wav2base64
__all__
=
[
'TTSClientExecutor'
,
'TTSOnlineClientExecutor'
,
'ASRClientExecutor'
,
'
ASROnlineClientExecutor'
,
'
CLSClientExecutor'
'CLSClientExecutor'
]
...
...
@@ -288,6 +287,12 @@ class ASRClientExecutor(BaseExecutor):
default
=
None
,
help
=
'Audio file to be recognized'
,
required
=
True
)
self
.
parser
.
add_argument
(
'--protocol'
,
type
=
str
,
default
=
"http"
,
choices
=
[
"http"
,
"websocket"
],
help
=
'server protocol'
)
self
.
parser
.
add_argument
(
'--sample_rate'
,
type
=
int
,
default
=
16000
,
help
=
'audio sample rate'
)
self
.
parser
.
add_argument
(
...
...
@@ -295,81 +300,18 @@ class ASRClientExecutor(BaseExecutor):
self
.
parser
.
add_argument
(
'--audio_format'
,
type
=
str
,
default
=
"wav"
,
help
=
'audio format'
)
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
args
=
self
.
parser
.
parse_args
(
argv
)
input_
=
args
.
input
server_ip
=
args
.
server_ip
port
=
args
.
port
sample_rate
=
args
.
sample_rate
lang
=
args
.
lang
audio_format
=
args
.
audio_format
try
:
time_start
=
time
.
time
()
res
=
self
(
input
=
input_
,
server_ip
=
server_ip
,
port
=
port
,
sample_rate
=
sample_rate
,
lang
=
lang
,
audio_format
=
audio_format
)
time_end
=
time
.
time
()
logger
.
info
(
res
.
json
())
logger
.
info
(
"Response time %f s."
%
(
time_end
-
time_start
))
return
True
except
Exception
as
e
:
logger
.
error
(
"Failed to speech recognition."
)
return
False
@
stats_wrapper
def
__call__
(
self
,
input
:
str
,
server_ip
:
str
=
"127.0.0.1"
,
port
:
int
=
8090
,
sample_rate
:
int
=
16000
,
lang
:
str
=
"zh_cn"
,
audio_format
:
str
=
"wav"
):
"""
Python API to call an executor.
"""
url
=
'http://'
+
server_ip
+
":"
+
str
(
port
)
+
'/paddlespeech/asr'
audio
=
wav2base64
(
input
)
data
=
{
"audio"
:
audio
,
"audio_format"
:
audio_format
,
"sample_rate"
:
sample_rate
,
"lang"
:
lang
,
}
res
=
requests
.
post
(
url
=
url
,
data
=
json
.
dumps
(
data
))
return
res
@
cli_client_register
(
name
=
'paddlespeech_client.asr_online'
,
description
=
'visit asr online service'
)
class
ASROnlineClientExecutor
(
BaseExecutor
):
def
__init__
(
self
):
super
(
ASROnlineClientExecutor
,
self
).
__init__
()
self
.
parser
=
argparse
.
ArgumentParser
(
prog
=
'paddlespeech_client.asr_online'
,
add_help
=
True
)
self
.
parser
.
add_argument
(
'--server_ip'
,
type
=
str
,
default
=
'127.0.0.1'
,
help
=
'server ip'
)
self
.
parser
.
add_argument
(
'--port'
,
type
=
int
,
default
=
8091
,
help
=
'server port'
)
self
.
parser
.
add_argument
(
'--input'
,
'--punc.server_ip'
,
type
=
str
,
default
=
None
,
help
=
'Audio file to be recognized'
,
required
=
True
)
dest
=
"punc_server_ip"
,
help
=
'Punctuation server ip'
)
self
.
parser
.
add_argument
(
'--
sample_rate'
,
type
=
int
,
default
=
16000
,
help
=
'audio sample rate'
)
self
.
parser
.
add_argument
(
'--lang'
,
type
=
str
,
default
=
"zh_cn"
,
help
=
'language'
)
self
.
parser
.
add_argument
(
'--audio_format'
,
type
=
str
,
default
=
"wav"
,
help
=
'audio forma
t'
)
'--
punc.port'
,
type
=
int
,
default
=
8091
,
dest
=
"punc_server_port"
,
help
=
'Punctuation server por
t'
)
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
args
=
self
.
parser
.
parse_args
(
argv
)
...
...
@@ -379,6 +321,7 @@ class ASROnlineClientExecutor(BaseExecutor):
sample_rate
=
args
.
sample_rate
lang
=
args
.
lang
audio_format
=
args
.
audio_format
protocol
=
args
.
protocol
try
:
time_start
=
time
.
time
()
...
...
@@ -388,9 +331,12 @@ class ASROnlineClientExecutor(BaseExecutor):
port
=
port
,
sample_rate
=
sample_rate
,
lang
=
lang
,
audio_format
=
audio_format
)
audio_format
=
audio_format
,
protocol
=
protocol
,
punc_server_ip
=
args
.
punc_server_ip
,
punc_server_port
=
args
.
punc_server_port
)
time_end
=
time
.
time
()
logger
.
info
(
res
)
logger
.
info
(
f
"ASR result:
{
res
}
"
)
logger
.
info
(
"Response time %f s."
%
(
time_end
-
time_start
))
return
True
except
Exception
as
e
:
...
...
@@ -402,21 +348,53 @@ class ASROnlineClientExecutor(BaseExecutor):
def
__call__
(
self
,
input
:
str
,
server_ip
:
str
=
"127.0.0.1"
,
port
:
int
=
809
1
,
port
:
int
=
809
0
,
sample_rate
:
int
=
16000
,
lang
:
str
=
"zh_cn"
,
audio_format
:
str
=
"wav"
):
"""
Python API to call an executor.
audio_format
:
str
=
"wav"
,
protocol
:
str
=
"http"
,
punc_server_ip
:
str
=
"127.0.0.1"
,
punc_server_port
:
int
=
8091
):
"""Python API to call an executor.
Args:
input (str): The input audio file path
server_ip (str, optional): The ASR server ip. Defaults to "127.0.0.1".
port (int, optional): The ASR server port. Defaults to 8090.
sample_rate (int, optional): The audio sample rate. Defaults to 16000.
lang (str, optional): The audio language type. Defaults to "zh_cn".
audio_format (str, optional): The audio format information. Defaults to "wav".
protocol (str, optional): The ASR server. Defaults to "http".
Returns:
str: The ASR results
"""
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
info
(
"asr websocket client start"
)
handler
=
ASRAudioHandler
(
server_ip
,
port
)
loop
=
asyncio
.
get_event_loop
()
res
=
loop
.
run_until_complete
(
handler
.
run
(
input
))
logging
.
info
(
"asr websocket client finished"
)
return
res
[
'asr_results'
]
# we use the asr server to recognize the audio text content
if
protocol
.
lower
()
==
"http"
:
from
paddlespeech.server.utils.audio_handler
import
ASRHttpHandler
logger
.
info
(
"asr http client start"
)
handler
=
ASRHttpHandler
(
server_ip
=
server_ip
,
port
=
port
)
res
=
handler
.
run
(
input
,
audio_format
,
sample_rate
,
lang
)
res
=
res
[
'result'
][
'transcription'
]
logger
.
info
(
"asr http client finished"
)
elif
protocol
.
lower
()
==
"websocket"
:
logger
.
info
(
"asr websocket client start"
)
handler
=
ASRWsAudioHandler
(
server_ip
,
port
,
punc_server_ip
=
punc_server_ip
,
punc_server_port
=
punc_server_port
)
loop
=
asyncio
.
get_event_loop
()
res
=
loop
.
run_until_complete
(
handler
.
run
(
input
))
res
=
res
[
'result'
]
logger
.
info
(
"asr websocket client finished"
)
else
:
logger
.
error
(
f
"Sorry, we have not support protocol:
{
protocol
}
,"
"please use http or websocket protocol"
)
sys
.
exit
(
-
1
)
return
res
@
cli_client_register
(
...
...
paddlespeech/server/tests/asr/online/microphone_client.py
浏览文件 @
b6d0db0d
...
...
@@ -26,7 +26,7 @@ import pyaudio
import
websockets
class
ASRAudioHandler
(
threading
.
Thread
):
class
ASR
Ws
AudioHandler
(
threading
.
Thread
):
def
__init__
(
self
,
url
=
"127.0.0.1"
,
port
=
8091
):
threading
.
Thread
.
__init__
(
self
)
self
.
url
=
url
...
...
@@ -148,7 +148,7 @@ if __name__ == "__main__":
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
info
(
"asr websocket client start"
)
handler
=
ASRAudioHandler
(
"127.0.0.1"
,
8091
)
handler
=
ASR
Ws
AudioHandler
(
"127.0.0.1"
,
8091
)
loop
=
asyncio
.
get_event_loop
()
main_task
=
asyncio
.
ensure_future
(
handler
.
run
())
for
signal
in
[
SIGINT
,
SIGTERM
]:
...
...
paddlespeech/server/utils/audio_handler.py
浏览文件 @
b6d0db0d
...
...
@@ -24,22 +24,76 @@ import websockets
from
paddlespeech.cli.log
import
logger
from
paddlespeech.server.utils.audio_process
import
save_audio
from
paddlespeech.server.utils.util
import
wav2base64
class
ASRAudioHandler
:
class
TextHttpHandler
:
def
__init__
(
self
,
server_ip
=
"127.0.0.1"
,
port
=
8090
):
"""Text http client request
Args:
server_ip (str, optional): the text server ip. Defaults to "127.0.0.1".
port (int, optional): the text server port. Defaults to 8090.
"""
super
().
__init__
()
self
.
server_ip
=
server_ip
self
.
port
=
port
if
server_ip
is
None
or
port
is
None
:
self
.
url
=
None
else
:
self
.
url
=
'http://'
+
self
.
server_ip
+
":"
+
str
(
self
.
port
)
+
'/paddlespeech/text'
def
run
(
self
,
text
):
"""Call the text server to process the specific text
Args:
text (str): the text to be processed
Returns:
str: punctuation text
"""
if
self
.
server_ip
is
None
or
self
.
port
is
None
:
return
text
request
=
{
"text"
:
text
,
}
try
:
res
=
requests
.
post
(
url
=
self
.
url
,
data
=
json
.
dumps
(
request
))
response_dict
=
res
.
json
()
punc_text
=
response_dict
[
"result"
][
"punc_text"
]
except
Exception
as
e
:
logger
.
error
(
f
"Call punctuation
{
self
.
url
}
occurs"
)
logger
.
error
(
e
)
punc_text
=
text
return
punc_text
class
ASRWsAudioHandler
:
def
__init__
(
self
,
url
=
"127.0.0.1"
,
port
=
8090
,
endopoint
=
'/paddlespeech/asr/streaming'
):
url
=
None
,
port
=
None
,
endpoint
=
"/paddlespeech/asr/streaming"
,
punc_server_ip
=
None
,
punc_server_port
=
None
):
"""PaddleSpeech Online ASR Server Client audio handler
Online asr server use the websocket protocal
Args:
url (str, optional): the server ip. Defaults to "127.0.0.1".
port (int, optional): the server port. Defaults to 8090.
url (str, optional): the server ip. Defaults to None.
port (int, optional): the server port. Defaults to None.
endpoint(str, optional): to compatiable with python server and c++ server.
punc_server_ip(str, optional): the punctuation server ip. Defaults to None.
punc_server_port(int, optional): the punctuation port. Defaults to None
"""
self
.
url
=
url
self
.
port
=
port
self
.
url
=
"ws://"
+
self
.
url
+
":"
+
str
(
self
.
port
)
+
endopoint
if
url
is
None
or
port
is
None
or
endpoint
is
None
:
self
.
url
=
None
else
:
self
.
url
=
"ws://"
+
self
.
url
+
":"
+
str
(
self
.
port
)
+
endpoint
self
.
punc_server
=
TextHttpHandler
(
punc_server_ip
,
punc_server_port
)
logger
.
info
(
f
"endpoint:
{
self
.
url
}
"
)
def
read_wave
(
self
,
wavfile_path
:
str
):
...
...
@@ -84,6 +138,11 @@ class ASRAudioHandler:
"""
logging
.
info
(
"send a message to the server"
)
if
self
.
url
is
None
:
logger
.
error
(
"No asr server, please input valid ip and port"
)
return
""
# 1. send websocket handshake protocal
async
with
websockets
.
connect
(
self
.
url
)
as
ws
:
# 2. server has already received handshake protocal
...
...
@@ -92,7 +151,7 @@ class ASRAudioHandler:
{
"name"
:
"test.wav"
,
"signal"
:
"start"
,
"nbest"
:
5
"nbest"
:
1
},
sort_keys
=
True
,
indent
=
4
,
...
...
@@ -106,6 +165,10 @@ class ASRAudioHandler:
await
ws
.
send
(
chunk_data
.
tobytes
())
msg
=
await
ws
.
recv
()
msg
=
json
.
loads
(
msg
)
if
self
.
punc_server
and
len
(
msg
[
"result"
])
>
0
:
msg
[
"result"
]
=
self
.
punc_server
.
run
(
msg
[
"result"
])
logger
.
info
(
"client receive msg={}"
.
format
(
msg
))
# 4. we must send finished signal to the server
...
...
@@ -123,11 +186,63 @@ class ASRAudioHandler:
# 5. decode the bytes to str
msg
=
json
.
loads
(
msg
)
if
self
.
punc_server
:
msg
[
"result"
]
=
self
.
punc_server
.
run
(
msg
[
"result"
])
logger
.
info
(
"client final receive msg={}"
.
format
(
msg
))
result
=
msg
return
result
class
ASRHttpHandler
:
def
__init__
(
self
,
server_ip
=
None
,
port
=
None
):
"""The ASR client http request
Args:
server_ip (str, optional): the http asr server ip. Defaults to "127.0.0.1".
port (int, optional): the http asr server port. Defaults to 8090.
"""
super
().
__init__
()
self
.
server_ip
=
server_ip
self
.
port
=
port
if
server_ip
is
None
or
port
is
None
:
self
.
url
=
None
else
:
self
.
url
=
'http://'
+
self
.
server_ip
+
":"
+
str
(
self
.
port
)
+
'/paddlespeech/asr'
def
run
(
self
,
input
,
audio_format
,
sample_rate
,
lang
):
"""Call the http asr to process the audio
Args:
input (str): the audio file path
audio_format (str): the audio format
sample_rate (str): the audio sample rate
lang (str): the audio language type
Returns:
str: the final asr result
"""
if
self
.
url
is
None
:
logger
.
error
(
"No punctuation server, please input valid ip and port"
)
return
""
audio
=
wav2base64
(
input
)
data
=
{
"audio"
:
audio
,
"audio_format"
:
audio_format
,
"sample_rate"
:
sample_rate
,
"lang"
:
lang
,
}
res
=
requests
.
post
(
url
=
self
.
url
,
data
=
json
.
dumps
(
data
))
return
res
.
json
()
class
TTSWsHandler
:
def
__init__
(
self
,
server
=
"127.0.0.1"
,
port
=
8092
,
play
:
bool
=
False
):
"""PaddleSpeech Online TTS Server Client audio handler
...
...
paddlespeech/server/ws/asr_socket.py
浏览文件 @
b6d0db0d
...
...
@@ -24,7 +24,7 @@ from paddlespeech.server.engine.engine_pool import get_engine_pool
router
=
APIRouter
()
@
router
.
websocket
(
'/
ws/asr
'
)
@
router
.
websocket
(
'/
paddlespeech/asr/streaming
'
)
async
def
websocket_endpoint
(
websocket
:
WebSocket
):
"""PaddleSpeech Online ASR Server api
...
...
@@ -83,7 +83,7 @@ async def websocket_endpoint(websocket: WebSocket):
resp
=
{
"status"
:
"ok"
,
"signal"
:
"finished"
,
'
asr_results
'
:
asr_results
'
result
'
:
asr_results
}
await
websocket
.
send_json
(
resp
)
break
...
...
@@ -102,7 +102,7 @@ async def websocket_endpoint(websocket: WebSocket):
# return the current period result
# if the engine create the vad instance, this connection will have many period results
resp
=
{
'
asr_results
'
:
asr_results
}
resp
=
{
'
result
'
:
asr_results
}
await
websocket
.
send_json
(
resp
)
except
WebSocketDisconnect
:
pass
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录