Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
d8e6cc66
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
11 个月 前同步成功
通知
204
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
d8e6cc66
编写于
7月 11, 2022
作者:
J
Jackwaterveg
提交者:
GitHub
7月 11, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2133 from zh794390558/timestamp
[server][asr]rename time_s and time_ns to time_b and time_nb
上级
bfc44f7a
f8450c39
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
42 addition
and
39 deletion
+42
-39
paddlespeech/server/engine/acs/python/acs_engine.py
paddlespeech/server/engine/acs/python/acs_engine.py
+4
-1
paddlespeech/server/engine/asr/online/ctc_search.py
paddlespeech/server/engine/asr/online/ctc_search.py
+38
-38
未找到文件。
paddlespeech/server/engine/acs/python/acs_engine.py
浏览文件 @
d8e6cc66
...
...
@@ -192,12 +192,15 @@ class ACSEngine(BaseEngine):
# search for each word in self.word_list
offset
=
self
.
config
.
offset
# last time in time_stamp
max_ed
=
time_stamp
[
-
1
][
'ed'
]
for
w
in
self
.
word_list
:
# search the w in asr_result and the index in asr_result
# https://docs.python.org/3/library/re.html#re.finditer
for
m
in
re
.
finditer
(
w
,
asr_result
):
# match start and end char index in timestamp
# https://docs.python.org/3/library/re.html#re.Match.start
start
=
max
(
time_stamp
[
m
.
start
(
0
)][
'bg'
]
-
offset
,
0
)
end
=
min
(
time_stamp
[
m
.
end
(
0
)
-
1
][
'ed'
]
+
offset
,
max_ed
)
logger
.
debug
(
f
'start:
{
start
}
, end:
{
end
}
'
)
acs_result
.
append
({
'w'
:
w
,
'bg'
:
start
,
'ed'
:
end
})
...
...
paddlespeech/server/engine/asr/online/ctc_search.py
浏览文件 @
d8e6cc66
...
...
@@ -83,11 +83,11 @@ class CTCPrefixBeamSearch:
# cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
# 0. blank_ending_score,
# 1. none_blank_ending_score,
# 2. viterbi_blank ending,
# 3. viterbi_non_blank,
# 2. viterbi_blank ending
score
,
# 3. viterbi_non_blank
score
,
# 4. current_token_prob,
# 5. times_viterbi_blank,
# 6. times_titerbi_non_blank
# 5. times_viterbi_blank,
times_b
# 6. times_titerbi_non_blank
, times_nb
if
self
.
cur_hyps
is
None
:
self
.
cur_hyps
=
[(
tuple
(),
(
0.0
,
-
float
(
'inf'
),
0.0
,
0.0
,
-
float
(
'inf'
),
[],
[]))]
...
...
@@ -106,69 +106,69 @@ class CTCPrefixBeamSearch:
for
s
in
top_k_index
:
s
=
s
.
item
()
ps
=
logp
[
s
].
item
()
for
prefix
,
(
pb
,
pnb
,
v_b_s
,
v_nb_s
,
cur_token_prob
,
times_
s
,
times_n
s
)
in
self
.
cur_hyps
:
for
prefix
,
(
pb
,
pnb
,
v_b_s
,
v_nb_s
,
cur_token_prob
,
times_
b
,
times_n
b
)
in
self
.
cur_hyps
:
last
=
prefix
[
-
1
]
if
len
(
prefix
)
>
0
else
None
if
s
==
blank_id
:
# blank
n_pb
,
n_pnb
,
n_v_
s
,
n_v_ns
,
n_cur_token_prob
,
n_times_s
,
n_times_ns
=
next_hyps
[
n_pb
,
n_pnb
,
n_v_
b
,
n_v_nb
,
n_cur_token_prob
,
n_times_b
,
n_times_nb
=
next_hyps
[
prefix
]
n_pb
=
log_add
([
n_pb
,
pb
+
ps
,
pnb
+
ps
])
pre_times
=
times_
s
if
v_b_s
>
v_nb_s
else
times_ns
n_times_
s
=
copy
.
deepcopy
(
pre_times
)
pre_times
=
times_
b
if
v_b_s
>
v_nb_s
else
times_nb
n_times_
b
=
copy
.
deepcopy
(
pre_times
)
viterbi_score
=
v_b_s
if
v_b_s
>
v_nb_s
else
v_nb_s
n_v_
s
=
viterbi_score
+
ps
next_hyps
[
prefix
]
=
(
n_pb
,
n_pnb
,
n_v_
s
,
n_v_ns
,
n_cur_token_prob
,
n_times_
s
,
n_times_n
s
)
n_v_
b
=
viterbi_score
+
ps
next_hyps
[
prefix
]
=
(
n_pb
,
n_pnb
,
n_v_
b
,
n_v_nb
,
n_cur_token_prob
,
n_times_
b
,
n_times_n
b
)
elif
s
==
last
:
# Update *ss -> *s;
# case1: *a + a => *a
n_pb
,
n_pnb
,
n_v_
s
,
n_v_ns
,
n_cur_token_prob
,
n_times_s
,
n_times_ns
=
next_hyps
[
n_pb
,
n_pnb
,
n_v_
b
,
n_v_nb
,
n_cur_token_prob
,
n_times_b
,
n_times_nb
=
next_hyps
[
prefix
]
n_pnb
=
log_add
([
n_pnb
,
pnb
+
ps
])
if
n_v_n
s
<
v_nb_s
+
ps
:
n_v_n
s
=
v_nb_s
+
ps
if
n_v_n
b
<
v_nb_s
+
ps
:
n_v_n
b
=
v_nb_s
+
ps
if
n_cur_token_prob
<
ps
:
n_cur_token_prob
=
ps
n_times_n
s
=
copy
.
deepcopy
(
times_ns
)
n_times_n
s
[
n_times_n
b
=
copy
.
deepcopy
(
times_nb
)
n_times_n
b
[
-
1
]
=
self
.
abs_time_step
# 注意,这里要重新使用绝对时间
next_hyps
[
prefix
]
=
(
n_pb
,
n_pnb
,
n_v_
s
,
n_v_ns
,
n_cur_token_prob
,
n_times_
s
,
n_times_n
s
)
next_hyps
[
prefix
]
=
(
n_pb
,
n_pnb
,
n_v_
b
,
n_v_nb
,
n_cur_token_prob
,
n_times_
b
,
n_times_n
b
)
# Update *s-s -> *ss, - is for blank
# Case 2: *aε + a => *aa
n_prefix
=
prefix
+
(
s
,
)
n_pb
,
n_pnb
,
n_v_
s
,
n_v_ns
,
n_cur_token_prob
,
n_times_s
,
n_times_ns
=
next_hyps
[
n_pb
,
n_pnb
,
n_v_
b
,
n_v_nb
,
n_cur_token_prob
,
n_times_b
,
n_times_nb
=
next_hyps
[
n_prefix
]
if
n_v_n
s
<
v_b_s
+
ps
:
n_v_n
s
=
v_b_s
+
ps
if
n_v_n
b
<
v_b_s
+
ps
:
n_v_n
b
=
v_b_s
+
ps
n_cur_token_prob
=
ps
n_times_n
s
=
copy
.
deepcopy
(
times_s
)
n_times_n
s
.
append
(
self
.
abs_time_step
)
n_times_n
b
=
copy
.
deepcopy
(
times_b
)
n_times_n
b
.
append
(
self
.
abs_time_step
)
n_pnb
=
log_add
([
n_pnb
,
pb
+
ps
])
next_hyps
[
n_prefix
]
=
(
n_pb
,
n_pnb
,
n_v_
s
,
n_v_ns
,
n_cur_token_prob
,
n_times_
s
,
n_times_n
s
)
next_hyps
[
n_prefix
]
=
(
n_pb
,
n_pnb
,
n_v_
b
,
n_v_nb
,
n_cur_token_prob
,
n_times_
b
,
n_times_n
b
)
else
:
# Case 3: *a + b => *ab, *aε + b => *ab
n_prefix
=
prefix
+
(
s
,
)
n_pb
,
n_pnb
,
n_v_
s
,
n_v_ns
,
n_cur_token_prob
,
n_times_s
,
n_times_ns
=
next_hyps
[
n_pb
,
n_pnb
,
n_v_
b
,
n_v_nb
,
n_cur_token_prob
,
n_times_b
,
n_times_nb
=
next_hyps
[
n_prefix
]
viterbi_score
=
v_b_s
if
v_b_s
>
v_nb_s
else
v_nb_s
pre_times
=
times_
s
if
v_b_s
>
v_nb_s
else
times_ns
if
n_v_n
s
<
viterbi_score
+
ps
:
n_v_n
s
=
viterbi_score
+
ps
pre_times
=
times_
b
if
v_b_s
>
v_nb_s
else
times_nb
if
n_v_n
b
<
viterbi_score
+
ps
:
n_v_n
b
=
viterbi_score
+
ps
n_cur_token_prob
=
ps
n_times_n
s
=
copy
.
deepcopy
(
pre_times
)
n_times_n
s
.
append
(
self
.
abs_time_step
)
n_times_n
b
=
copy
.
deepcopy
(
pre_times
)
n_times_n
b
.
append
(
self
.
abs_time_step
)
n_pnb
=
log_add
([
n_pnb
,
pb
+
ps
,
pnb
+
ps
])
next_hyps
[
n_prefix
]
=
(
n_pb
,
n_pnb
,
n_v_
s
,
n_v_ns
,
n_cur_token_prob
,
n_times_
s
,
n_times_n
s
)
next_hyps
[
n_prefix
]
=
(
n_pb
,
n_pnb
,
n_v_
b
,
n_v_nb
,
n_cur_token_prob
,
n_times_
b
,
n_times_n
b
)
# 2.2 Second beam prune
next_hyps
=
sorted
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录