Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
48fa84be
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
48fa84be
编写于
4月 19, 2022
作者:
X
xiongxinlei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix the asr online client bug, return None, test=doc
上级
babac27a
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
40 addition
and
18 deletion
+40
-18
paddlespeech/s2t/modules/encoder.py
paddlespeech/s2t/modules/encoder.py
+0
-2
paddlespeech/server/README.md
paddlespeech/server/README.md
+13
-0
paddlespeech/server/README_cn.md
paddlespeech/server/README_cn.md
+14
-0
paddlespeech/server/bin/paddlespeech_client.py
paddlespeech/server/bin/paddlespeech_client.py
+4
-2
paddlespeech/server/engine/asr/online/asr_engine.py
paddlespeech/server/engine/asr/online/asr_engine.py
+2
-2
paddlespeech/server/engine/asr/online/ctc_search.py
paddlespeech/server/engine/asr/online/ctc_search.py
+3
-5
paddlespeech/server/tests/asr/online/websocket_client.py
paddlespeech/server/tests/asr/online/websocket_client.py
+4
-7
未找到文件。
paddlespeech/s2t/modules/encoder.py
浏览文件 @
48fa84be
...
...
@@ -317,8 +317,6 @@ class BaseEncoder(nn.Layer):
outputs
=
[]
offset
=
0
# Feed forward overlap input step by step
print
(
f
"context:
{
context
}
"
)
print
(
f
"stride:
{
stride
}
"
)
for
cur
in
range
(
0
,
num_frames
-
context
+
1
,
stride
):
end
=
min
(
cur
+
decoding_window
,
num_frames
)
chunk_xs
=
xs
[:,
cur
:
end
,
:]
...
...
paddlespeech/server/README.md
浏览文件 @
48fa84be
...
...
@@ -35,3 +35,16 @@
```
bash
paddlespeech_client cls
--server_ip
127.0.0.1
--port
8090
--input
input.wav
```
## Online ASR Server
### Lanuch online asr server
```
paddlespeech_server start --config_file conf/ws_conformer_application.yaml
```
### Access online asr server
```
paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
```
\ No newline at end of file
paddlespeech/server/README_cn.md
浏览文件 @
48fa84be
...
...
@@ -35,3 +35,17 @@
```
bash
paddlespeech_client cls
--server_ip
127.0.0.1
--port
8090
--input
input.wav
```
## 流式ASR
### 启动流式语音识别服务
```
paddlespeech_server start --config_file conf/ws_conformer_application.yaml
```
### 访问流式语音识别服务
```
paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input zh.wav
```
\ No newline at end of file
paddlespeech/server/bin/paddlespeech_client.py
浏览文件 @
48fa84be
...
...
@@ -277,11 +277,12 @@ class ASRClientExecutor(BaseExecutor):
lang
=
lang
,
audio_format
=
audio_format
)
time_end
=
time
.
time
()
logger
.
info
(
res
.
json
()
)
logger
.
info
(
res
)
logger
.
info
(
"Response time %f s."
%
(
time_end
-
time_start
))
return
True
except
Exception
as
e
:
logger
.
error
(
"Failed to speech recognition."
)
logger
.
error
(
e
)
return
False
@
stats_wrapper
...
...
@@ -299,9 +300,10 @@ class ASRClientExecutor(BaseExecutor):
logging
.
info
(
"asr websocket client start"
)
handler
=
ASRAudioHandler
(
server_ip
,
port
)
loop
=
asyncio
.
get_event_loop
()
loop
.
run_until_complete
(
handler
.
run
(
input
))
res
=
loop
.
run_until_complete
(
handler
.
run
(
input
))
logging
.
info
(
"asr websocket client finished"
)
return
res
[
'asr_results'
]
@
cli_client_register
(
name
=
'paddlespeech_client.cls'
,
description
=
'visit cls service'
)
...
...
paddlespeech/server/engine/asr/online/asr_engine.py
浏览文件 @
48fa84be
...
...
@@ -473,7 +473,7 @@ class PaddleASRConnectionHanddler:
ctc_probs
=
self
.
model
.
ctc
.
log_softmax
(
ys
)
# (1, maxlen, vocab_size)
ctc_probs
=
ctc_probs
.
squeeze
(
0
)
self
.
searcher
.
search
(
None
,
ctc_probs
,
self
.
cached_feat
.
place
)
self
.
searcher
.
search
(
ctc_probs
,
self
.
cached_feat
.
place
)
self
.
hyps
=
self
.
searcher
.
get_one_best_hyps
()
assert
self
.
cached_feat
.
shape
[
0
]
==
1
...
...
@@ -823,7 +823,7 @@ class ASRServerExecutor(ASRExecutor):
ctc_probs
=
self
.
model
.
ctc
.
log_softmax
(
encoder_out
)
# (1, maxlen, vocab_size)
ctc_probs
=
ctc_probs
.
squeeze
(
0
)
self
.
searcher
.
search
(
xs
,
ctc_probs
,
xs
.
place
)
self
.
searcher
.
search
(
ctc_probs
,
xs
.
place
)
# update the one best result
self
.
hyps
=
self
.
searcher
.
get_one_best_hyps
()
...
...
paddlespeech/server/engine/asr/online/ctc_search.py
浏览文件 @
48fa84be
...
...
@@ -24,19 +24,18 @@ class CTCPrefixBeamSearch:
"""Implement the ctc prefix beam search
Args:
config (
_type_
): _description_
config (
yacs.config.CfgNode
): _description_
"""
self
.
config
=
config
self
.
reset
()
def
search
(
self
,
xs
,
ctc_probs
,
device
,
blank_id
=
0
):
def
search
(
self
,
ctc_probs
,
device
,
blank_id
=
0
):
"""ctc prefix beam search method decode a chunk feature
Args:
xs (paddle.Tensor): feature data
ctc_probs (paddle.Tensor): the ctc probability of all the tokens
encoder_out (paddle.Tensor): _description_
encoder_mask (_type_): _description_
device (paddle.fluid.core_avx.Place): the feature host device, such as CUDAPlace(0).
blank_id (int, optional): the blank id in the vocab. Defaults to 0.
Returns:
...
...
@@ -45,7 +44,6 @@ class CTCPrefixBeamSearch:
# decode
logger
.
info
(
"start to ctc prefix search"
)
# device = xs.place
batch_size
=
1
beam_size
=
self
.
config
.
beam_size
maxlen
=
ctc_probs
.
shape
[
0
]
...
...
paddlespeech/server/tests/asr/online/websocket_client.py
浏览文件 @
48fa84be
...
...
@@ -34,10 +34,9 @@ class ASRAudioHandler:
def
read_wave
(
self
,
wavfile_path
:
str
):
samples
,
sample_rate
=
soundfile
.
read
(
wavfile_path
,
dtype
=
'int16'
)
x_len
=
len
(
samples
)
# chunk_stride = 40 * 16 #40ms, sample_rate = 16kHz
chunk_size
=
80
*
16
#80ms, sample_rate = 16kHz
if
x_len
%
chunk_size
!=
0
:
chunk_size
=
85
*
16
#80ms, sample_rate = 16kHz
if
x_len
%
chunk_size
!=
0
:
padding_len_x
=
chunk_size
-
x_len
%
chunk_size
else
:
padding_len_x
=
0
...
...
@@ -48,7 +47,6 @@ class ASRAudioHandler:
assert
(
x_len
+
padding_len_x
)
%
chunk_size
==
0
num_chunk
=
(
x_len
+
padding_len_x
)
/
chunk_size
num_chunk
=
int
(
num_chunk
)
for
i
in
range
(
0
,
num_chunk
):
start
=
i
*
chunk_size
end
=
start
+
chunk_size
...
...
@@ -82,7 +80,6 @@ class ASRAudioHandler:
msg
=
json
.
loads
(
msg
)
logging
.
info
(
"receive msg={}"
.
format
(
msg
))
result
=
msg
# finished
audio_info
=
json
.
dumps
(
{
...
...
@@ -98,8 +95,8 @@ class ASRAudioHandler:
# decode the bytes to str
msg
=
json
.
loads
(
msg
)
logging
.
info
(
"receive msg={}"
.
format
(
msg
))
logging
.
info
(
"
final
receive msg={}"
.
format
(
msg
))
result
=
msg
return
result
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录