Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
8f9b7bba
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
8f9b7bba
编写于
6月 07, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor asr online server
上级
f3132ce2
变更
7
展开全部
隐藏空白更改
内联
并排
Showing
7 changed file
with
356 addition
and
232 deletion
+356
-232
.pre-commit-config.yaml
.pre-commit-config.yaml
+6
-6
demos/streaming_asr_server/server.sh
demos/streaming_asr_server/server.sh
+2
-1
demos/streaming_asr_server/test.sh
demos/streaming_asr_server/test.sh
+2
-1
paddlespeech/__init__.py
paddlespeech/__init__.py
+4
-0
paddlespeech/server/engine/asr/online/asr_engine.py
paddlespeech/server/engine/asr/online/asr_engine.py
+199
-209
paddlespeech/server/engine/asr/online/ctc_endpoint.py
paddlespeech/server/engine/asr/online/ctc_endpoint.py
+108
-0
paddlespeech/server/engine/asr/online/ctc_search.py
paddlespeech/server/engine/asr/online/ctc_search.py
+35
-15
未找到文件。
.pre-commit-config.yaml
浏览文件 @
8f9b7bba
...
...
@@ -51,12 +51,12 @@ repos:
language
:
system
files
:
\.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
exclude
:
(?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin).*(\.cpp|\.cc|\.h|\.py)$
-
id
:
copyright_checker
name
:
copyright_checker
entry
:
python .pre-commit-hooks/copyright-check.hook
language
:
system
files
:
\.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude
:
(?=third_party|pypinyin|speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin).*(\.cpp|\.cc|\.h|\.py)$
#
- id: copyright_checker
#
name: copyright_checker
#
entry: python .pre-commit-hooks/copyright-check.hook
#
language: system
#
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
#
exclude: (?=third_party|pypinyin|speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin).*(\.cpp|\.cc|\.h|\.py)$
-
repo
:
https://github.com/asottile/reorder_python_imports
rev
:
v2.4.0
hooks
:
...
...
demos/streaming_asr_server/server.sh
浏览文件 @
8f9b7bba
...
...
@@ -5,4 +5,5 @@ export CUDA_VISIBLE_DEVICE=0,1,2,3
paddlespeech_server start
--config_file
conf/punc_application.yaml &> punc.log &
# nohup python3 streaming_asr_server.py --config_file conf/ws_conformer_application.yaml > streaming_asr.log 2>&1 &
paddlespeech_server start
--config_file
conf/ws_conformer_application.yaml &> streaming_asr.log &
\ No newline at end of file
paddlespeech_server start
--config_file
conf/ws_conformer_application.yaml &> streaming_asr.log &
demos/streaming_asr_server/test.sh
浏览文件 @
8f9b7bba
...
...
@@ -9,4 +9,5 @@ paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8290 --input ./zh.wa
# read the wav and call streaming and punc service
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
# python3 websocket_client.py --server_ip 127.0.0.1 --port 8290 --punc.server_ip 127.0.0.1 --punc.port 8190 --wavfile ./zh.wav
paddlespeech_client asr_online
--server_ip
127.0.0.1
--port
8290
--punc
.server_ip 127.0.0.1
--punc
.port 8190
--input
./zh.wav
\ No newline at end of file
paddlespeech_client asr_online
--server_ip
127.0.0.1
--port
8290
--punc
.server_ip 127.0.0.1
--punc
.port 8190
--input
./zh.wav
paddlespeech/__init__.py
浏览文件 @
8f9b7bba
...
...
@@ -14,3 +14,7 @@
import
_locale
_locale
.
_getdefaultlocale
=
(
lambda
*
args
:
[
'en_US'
,
'utf8'
])
paddlespeech/server/engine/asr/online/asr_engine.py
浏览文件 @
8f9b7bba
此差异已折叠。
点击以展开。
paddlespeech/server/engine/asr/online/ctc_endpoint.py
0 → 100644
浏览文件 @
8f9b7bba
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
dataclasses
import
dataclass
from
typing
import
List
from
paddlespeech.cli.log
import
logger
@
dataclass
class
OnlineCTCEndpointRule
:
must_contain_nonsilence
:
bool
=
True
min_trailing_silence
:
int
=
1000
min_utterance_length
:
int
=
0
@
dataclass
class
OnlineCTCEndpoingOpt
:
frame_shift_in_ms
:
int
=
10
blank
:
int
=
0
# blank id, that we consider as silence for purposes of endpointing.
blank_threshold
:
float
=
0.8
# above blank threshold is silence
# We support three rules. We terminate decoding if ANY of these rules
# evaluates to "true". If you want to add more rules, do it by changing this
# code. If you want to disable a rule, you can set the silence-timeout for
# that rule to a very large number.
# rule1 times out after 5 seconds of silence, even if we decoded nothing.
rule1
:
OnlineCTCEndpointRule
=
OnlineCTCEndpointRule
(
False
,
5000
,
0
)
# rule4 times out after 1.0 seconds of silence after decoding something,
# even if we did not reach a final-state at all.
rule2
:
OnlineCTCEndpointRule
=
OnlineCTCEndpointRule
(
True
,
1000
,
0
)
# rule5 times out after the utterance is 20 seconds long, regardless of
# anything else.
rule3
:
OnlineCTCEndpointRule
=
OnlineCTCEndpointRule
(
False
,
0
,
20000
)
class
OnlineCTCEndpoint
:
"""
[END-TO-END AUTOMATIC SPEECH RECOGNITION INTEGRATED WITH CTC-BASED VOICE ACTIVITY DETECTION](https://arxiv.org/pdf/2002.00551.pdf)
"""
def
__init__
(
self
,
opts
:
OnlineCTCEndpoingOpt
):
self
.
opts
=
opts
logger
.
info
(
f
"Endpont Opts:
{
opts
}
"
)
self
.
frame_shift_in_ms
=
opts
.
frame_shift_in_ms
self
.
num_frames_decoded
=
0
self
.
trailing_silence_frames
=
0
self
.
reset
()
def
reset
(
self
):
self
.
num_frames_decoded
=
0
self
.
trailing_silence_frames
=
0
def
rule_activated
(
self
,
rule
:
OnlineCTCEndpointRule
,
rule_name
:
str
,
decoding_something
:
bool
,
trailine_silence
:
int
,
utterance_length
:
int
)
->
bool
:
ans
=
(
decoding_something
or
(
not
rule
.
must_contain_nonsilence
)
)
and
trailine_silence
>=
rule
.
min_trailing_silence
and
utterance_length
>=
rule
.
min_utterance_length
if
(
ans
):
logger
.
info
(
f
"Endpoint Rule:
{
rule_name
}
activated:
{
decoding_something
}
,
{
trailine_silence
}
,
{
utterance_length
}
"
)
return
ans
def
endpoint_detected
(
ctc_log_probs
:
List
[
List
[
float
]],
decoding_something
:
bool
)
->
bool
:
for
logprob
in
ctc_log_probs
:
blank_prob
=
exp
(
logprob
[
self
.
opts
.
blank_id
])
self
.
num_frames_decoded
+=
1
if
blank_prob
>
self
.
opts
.
blank_threshold
:
self
.
trailing_silence_frames
+=
1
else
:
self
.
trailing_silence_frames
=
0
assert
self
.
num_frames_decoded
>=
self
.
trailing_silence_frames
assert
self
.
frame_shift_in_ms
>
0
utterance_length
=
self
.
num_frames_decoded
*
self
.
frame_shift_in_ms
trailing_silence
=
self
.
trailing_silence_frames
*
self
.
frame_shift_in_ms
if
self
.
rule_activated
(
self
.
opts
.
rule1
,
'rule1'
,
decoding_something
,
trailing_silence
,
utterance_length
):
return
True
if
self
.
rule_activated
(
self
.
opts
.
rule2
,
'rule2'
,
decoding_something
,
trailing_silence
,
utterance_length
):
return
True
if
self
.
rule_activated
(
self
.
opts
.
rule3
,
'rule3'
,
decoding_something
,
trailing_silence
,
utterance_length
):
return
True
return
False
paddlespeech/server/engine/asr/online/ctc_search.py
浏览文件 @
8f9b7bba
...
...
@@ -30,8 +30,29 @@ class CTCPrefixBeamSearch:
config (yacs.config.CfgNode): the ctc prefix beam search configuration
"""
self
.
config
=
config
# beam size
self
.
first_beam_size
=
self
.
config
.
beam_size
# TODO(support second beam size)
self
.
second_beam_size
=
int
(
self
.
first_beam_size
*
1.0
)
logger
.
info
(
f
"first and second beam size:
{
self
.
first_beam_size
}
,
{
self
.
second_beam_size
}
"
)
# state
self
.
cur_hyps
=
None
self
.
hyps
=
None
self
.
abs_time_step
=
0
self
.
reset
()
def
reset
(
self
):
"""Rest the search cache value
"""
self
.
cur_hyps
=
None
self
.
hyps
=
None
self
.
abs_time_step
=
0
@
paddle
.
no_grad
()
def
search
(
self
,
ctc_probs
,
device
,
blank_id
=
0
):
"""ctc prefix beam search method decode a chunk feature
...
...
@@ -47,12 +68,17 @@ class CTCPrefixBeamSearch:
"""
# decode
logger
.
info
(
"start to ctc prefix search"
)
assert
len
(
ctc_probs
.
shape
)
==
2
batch_size
=
1
beam_size
=
self
.
config
.
beam_size
maxlen
=
ctc_probs
.
shape
[
0
]
assert
len
(
ctc_probs
.
shape
)
==
2
vocab_size
=
ctc_probs
.
shape
[
1
]
first_beam_size
=
min
(
self
.
first_beam_size
,
vocab_size
)
second_beam_size
=
min
(
self
.
second_beam_size
,
vocab_size
)
logger
.
info
(
f
"effect first and second beam size:
{
self
.
first_beam_size
}
,
{
self
.
second_beam_size
}
"
)
maxlen
=
ctc_probs
.
shape
[
0
]
# cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
# 0. blank_ending_score,
...
...
@@ -75,7 +101,8 @@ class CTCPrefixBeamSearch:
# 2.1 First beam prune: select topk best
# do token passing process
top_k_logp
,
top_k_index
=
logp
.
topk
(
beam_size
)
# (beam_size,)
top_k_logp
,
top_k_index
=
logp
.
topk
(
first_beam_size
)
# (first_beam_size,)
for
s
in
top_k_index
:
s
=
s
.
item
()
ps
=
logp
[
s
].
item
()
...
...
@@ -148,7 +175,7 @@ class CTCPrefixBeamSearch:
next_hyps
.
items
(),
key
=
lambda
x
:
log_add
([
x
[
1
][
0
],
x
[
1
][
1
]]),
reverse
=
True
)
self
.
cur_hyps
=
next_hyps
[:
beam_size
]
self
.
cur_hyps
=
next_hyps
[:
second_
beam_size
]
# 2.3 update the absolute time step
self
.
abs_time_step
+=
1
...
...
@@ -163,7 +190,7 @@ class CTCPrefixBeamSearch:
"""Return the one best result
Returns:
list: the one best result
list: the one best result
, List[str]
"""
return
[
self
.
hyps
[
0
][
0
]]
...
...
@@ -171,17 +198,10 @@ class CTCPrefixBeamSearch:
"""Return the search hyps
Returns:
list: return the search hyps
list: return the search hyps
, List[Tuple[str, float, ...]]
"""
return
self
.
hyps
def
reset
(
self
):
"""Rest the search cache value
"""
self
.
cur_hyps
=
None
self
.
hyps
=
None
self
.
abs_time_step
=
0
def
finalize_search
(
self
):
"""do nothing in ctc_prefix_beam_search
"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录