Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
833900a8
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
833900a8
编写于
4月 25, 2022
作者:
X
xiongxinlei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
asr client add punctuatjion server, test=doc
上级
119143d0
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
130 addition
and
88 deletion
+130
-88
paddlespeech/server/bin/paddlespeech_client.py
paddlespeech/server/bin/paddlespeech_client.py
+67
-87
paddlespeech/server/utils/audio_handler.py
paddlespeech/server/utils/audio_handler.py
+63
-1
未找到文件。
paddlespeech/server/bin/paddlespeech_client.py
浏览文件 @
833900a8
...
...
@@ -16,7 +16,6 @@ import asyncio
import
base64
import
io
import
json
import
logging
import
os
import
random
import
time
...
...
@@ -36,7 +35,7 @@ from paddlespeech.server.utils.util import wav2base64
__all__
=
[
'TTSClientExecutor'
,
'TTSOnlineClientExecutor'
,
'ASRClientExecutor'
,
'
ASROnlineClientExecutor'
,
'
CLSClientExecutor'
'CLSClientExecutor'
]
...
...
@@ -288,6 +287,12 @@ class ASRClientExecutor(BaseExecutor):
default
=
None
,
help
=
'Audio file to be recognized'
,
required
=
True
)
self
.
parser
.
add_argument
(
'--protocol'
,
type
=
str
,
default
=
"http"
,
choices
=
[
"http"
,
"websocket"
],
help
=
'server protocol'
)
self
.
parser
.
add_argument
(
'--sample_rate'
,
type
=
int
,
default
=
16000
,
help
=
'audio sample rate'
)
self
.
parser
.
add_argument
(
...
...
@@ -295,81 +300,18 @@ class ASRClientExecutor(BaseExecutor):
self
.
parser
.
add_argument
(
'--audio_format'
,
type
=
str
,
default
=
"wav"
,
help
=
'audio format'
)
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
args
=
self
.
parser
.
parse_args
(
argv
)
input_
=
args
.
input
server_ip
=
args
.
server_ip
port
=
args
.
port
sample_rate
=
args
.
sample_rate
lang
=
args
.
lang
audio_format
=
args
.
audio_format
try
:
time_start
=
time
.
time
()
res
=
self
(
input
=
input_
,
server_ip
=
server_ip
,
port
=
port
,
sample_rate
=
sample_rate
,
lang
=
lang
,
audio_format
=
audio_format
)
time_end
=
time
.
time
()
logger
.
info
(
res
.
json
())
logger
.
info
(
"Response time %f s."
%
(
time_end
-
time_start
))
return
True
except
Exception
as
e
:
logger
.
error
(
"Failed to speech recognition."
)
return
False
@
stats_wrapper
def
__call__
(
self
,
input
:
str
,
server_ip
:
str
=
"127.0.0.1"
,
port
:
int
=
8090
,
sample_rate
:
int
=
16000
,
lang
:
str
=
"zh_cn"
,
audio_format
:
str
=
"wav"
):
"""
Python API to call an executor.
"""
url
=
'http://'
+
server_ip
+
":"
+
str
(
port
)
+
'/paddlespeech/asr'
audio
=
wav2base64
(
input
)
data
=
{
"audio"
:
audio
,
"audio_format"
:
audio_format
,
"sample_rate"
:
sample_rate
,
"lang"
:
lang
,
}
res
=
requests
.
post
(
url
=
url
,
data
=
json
.
dumps
(
data
))
return
res
@
cli_client_register
(
name
=
'paddlespeech_client.asr_online'
,
description
=
'visit asr online service'
)
class
ASROnlineClientExecutor
(
BaseExecutor
):
def
__init__
(
self
):
super
(
ASROnlineClientExecutor
,
self
).
__init__
()
self
.
parser
=
argparse
.
ArgumentParser
(
prog
=
'paddlespeech_client.asr_online'
,
add_help
=
True
)
self
.
parser
.
add_argument
(
'--server_ip'
,
type
=
str
,
default
=
'127.0.0.1'
,
help
=
'server ip'
)
self
.
parser
.
add_argument
(
'--port'
,
type
=
int
,
default
=
8091
,
help
=
'server port'
)
self
.
parser
.
add_argument
(
'--input'
,
'--punc.server_ip'
,
type
=
str
,
default
=
None
,
help
=
'Audio file to be recognized'
,
required
=
True
)
self
.
parser
.
add_argument
(
'--sample_rate'
,
type
=
int
,
default
=
16000
,
help
=
'audio sample rate'
)
self
.
parser
.
add_argument
(
'--lang'
,
type
=
str
,
default
=
"zh_cn"
,
help
=
'language'
)
dest
=
"punc_server_ip"
,
help
=
'Punctuation server ip'
)
self
.
parser
.
add_argument
(
'--audio_format'
,
type
=
str
,
default
=
"wav"
,
help
=
'audio format'
)
'--punc.port'
,
type
=
int
,
default
=
8091
,
dest
=
"punc_server_port"
,
help
=
'Punctuation server port'
)
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
args
=
self
.
parser
.
parse_args
(
argv
)
...
...
@@ -379,6 +321,7 @@ class ASROnlineClientExecutor(BaseExecutor):
sample_rate
=
args
.
sample_rate
lang
=
args
.
lang
audio_format
=
args
.
audio_format
protocol
=
args
.
protocol
try
:
time_start
=
time
.
time
()
...
...
@@ -388,9 +331,12 @@ class ASROnlineClientExecutor(BaseExecutor):
port
=
port
,
sample_rate
=
sample_rate
,
lang
=
lang
,
audio_format
=
audio_format
)
audio_format
=
audio_format
,
protocol
=
protocol
,
punc_server_ip
=
args
.
punc_server_ip
,
punc_server_port
=
args
.
punc_server_port
)
time_end
=
time
.
time
()
logger
.
info
(
res
)
logger
.
info
(
f
"ASR result:
{
res
}
"
)
logger
.
info
(
"Response time %f s."
%
(
time_end
-
time_start
))
return
True
except
Exception
as
e
:
...
...
@@ -402,21 +348,55 @@ class ASROnlineClientExecutor(BaseExecutor):
def
__call__
(
self
,
input
:
str
,
server_ip
:
str
=
"127.0.0.1"
,
port
:
int
=
809
1
,
port
:
int
=
809
0
,
sample_rate
:
int
=
16000
,
lang
:
str
=
"zh_cn"
,
audio_format
:
str
=
"wav"
):
"""
Python API to call an executor.
audio_format
:
str
=
"wav"
,
protocol
:
str
=
"http"
,
punc_server_ip
:
str
=
"127.0.0.1"
,
punc_server_port
:
int
=
8091
):
"""Python API to call an executor.
Args:
input (str): The input audio file path
server_ip (str, optional): The ASR server ip. Defaults to "127.0.0.1".
port (int, optional): The ASR server port. Defaults to 8090.
sample_rate (int, optional): The audio sample rate. Defaults to 16000.
lang (str, optional): The audio language type. Defaults to "zh_cn".
audio_format (str, optional): The audio format information. Defaults to "wav".
protocol (str, optional): The ASR server. Defaults to "http".
Returns:
str: The ASR results
"""
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
info
(
"asr websocket client start"
)
handler
=
ASRAudioHandler
(
server_ip
,
port
)
loop
=
asyncio
.
get_event_loop
()
res
=
loop
.
run_until_complete
(
handler
.
run
(
input
))
logging
.
info
(
"asr websocket client finished"
)
return
res
[
'asr_results'
]
# 1. Firstly, we use the asr server to recognize the audio text content
if
protocol
.
lower
()
==
"http"
:
from
paddlespeech.server.utils.audio_handler
import
ASRHttpHandler
logger
.
info
(
"asr http client start"
)
handler
=
ASRHttpHandler
(
server_ip
=
server_ip
,
port
=
port
)
res
=
handler
.
run
(
input
,
audio_format
,
sample_rate
,
lang
)
res
=
res
[
'result'
][
'transcription'
]
logger
.
info
(
"asr http client finished"
)
elif
protocol
.
lower
()
==
"websocket"
:
logger
.
info
(
"asr websocket client start"
)
handler
=
ASRAudioHandler
(
server_ip
,
port
,
punc_server_ip
=
punc_server_ip
,
punc_server_port
=
punc_server_port
)
loop
=
asyncio
.
get_event_loop
()
res
=
loop
.
run_until_complete
(
handler
.
run
(
input
))
res
=
res
[
'asr_results'
]
logger
.
info
(
"asr websocket client finished"
)
else
:
logger
.
error
(
f
"Sorry, we have not support protocol:
{
protocol
}
,"
"please use http or websocket protocol"
)
sys
.
exit
(
-
1
)
# 2. Secondly, we use the punctuation server to do post process for text
return
res
@
cli_client_register
(
...
...
paddlespeech/server/utils/audio_handler.py
浏览文件 @
833900a8
...
...
@@ -24,20 +24,57 @@ import websockets
from
paddlespeech.cli.log
import
logger
from
paddlespeech.server.utils.audio_process
import
save_audio
from
paddlespeech.server.utils.util
import
wav2base64
class
TextHttpHandler
:
def
__init__
(
self
,
server_ip
=
"127.0.0.1"
,
port
=
8090
):
super
().
__init__
()
self
.
server_ip
=
server_ip
self
.
port
=
port
self
.
url
=
'http://'
+
self
.
server_ip
+
":"
+
str
(
self
.
port
)
+
'/paddlespeech/text'
def
run
(
self
,
text
):
if
self
.
server_ip
is
None
or
self
.
port
is
None
:
logger
.
warning
(
"No punctuation server, please input valid ip and port"
)
return
text
request
=
{
"text"
:
text
,
}
try
:
res
=
requests
.
post
(
url
=
self
.
url
,
data
=
json
.
dumps
(
request
))
response_dict
=
res
.
json
()
punc_text
=
response_dict
[
"result"
][
"punc_text"
]
except
Exception
as
e
:
logger
.
error
(
f
"Call punctuation
{
self
.
url
}
occurs"
)
logger
.
error
(
e
)
punc_text
=
text
return
punc_text
class
ASRAudioHandler
:
def
__init__
(
self
,
url
=
"127.0.0.1"
,
port
=
8090
):
def
__init__
(
self
,
url
=
"127.0.0.1"
,
port
=
8090
,
punc_server_ip
=
"127.0.0.1"
,
punc_server_port
=
"8091"
):
"""PaddleSpeech Online ASR Server Client audio handler
Online asr server use the websocket protocal
Args:
url (str, optional): the server ip. Defaults to "127.0.0.1".
port (int, optional): the server port. Defaults to 8090.
punc_server_ip(str, optional): the punctuation server ip. Defaults to None.
punc_server_port(int, optional): the punctuation port. Defaults to None
"""
self
.
url
=
url
self
.
port
=
port
self
.
url
=
"ws://"
+
self
.
url
+
":"
+
str
(
self
.
port
)
+
"/ws/asr"
self
.
punc_server
=
TextHttpHandler
(
punc_server_ip
,
punc_server_port
)
def
read_wave
(
self
,
wavfile_path
:
str
):
"""read the audio file from specific wavfile path
...
...
@@ -102,6 +139,7 @@ class ASRAudioHandler:
await
ws
.
send
(
chunk_data
.
tobytes
())
msg
=
await
ws
.
recv
()
msg
=
json
.
loads
(
msg
)
msg
[
"asr_results"
]
=
self
.
punc_server
.
run
(
msg
[
"asr_results"
])
logger
.
info
(
"receive msg={}"
.
format
(
msg
))
# 4. we must send finished signal to the server
...
...
@@ -119,11 +157,35 @@ class ASRAudioHandler:
# 5. decode the bytes to str
msg
=
json
.
loads
(
msg
)
msg
[
"asr_results"
]
=
self
.
punc_server
.
run
(
msg
[
"asr_results"
])
logger
.
info
(
"final receive msg={}"
.
format
(
msg
))
result
=
msg
return
result
class
ASRHttpHandler
:
def
__init__
(
self
,
server_ip
=
"127.0.0.1"
,
port
=
8090
):
super
().
__init__
()
self
.
server_ip
=
server_ip
self
.
port
=
port
self
.
url
=
'http://'
+
self
.
server_ip
+
":"
+
str
(
self
.
port
)
+
'/paddlespeech/asr'
def
run
(
self
,
input
,
audio_format
,
sample_rate
,
lang
):
audio
=
wav2base64
(
input
)
data
=
{
"audio"
:
audio
,
"audio_format"
:
audio_format
,
"sample_rate"
:
sample_rate
,
"lang"
:
lang
,
}
res
=
requests
.
post
(
url
=
self
.
url
,
data
=
json
.
dumps
(
data
))
return
res
.
json
()
class
TTSWsHandler
:
def
__init__
(
self
,
server
=
"127.0.0.1"
,
port
=
8092
,
play
:
bool
=
False
):
"""PaddleSpeech Online TTS Server Client audio handler
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录