Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
10a2da68
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
10a2da68
编写于
10月 13, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
format code
上级
60e97906
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
191 addition
and
109 deletion
+191
-109
deepspeech/decoders/README.md
deepspeech/decoders/README.md
+1
-1
deepspeech/decoders/__init__.py
deepspeech/decoders/__init__.py
+14
-1
deepspeech/decoders/scores/__init__.py
deepspeech/decoders/scores/__init__.py
+13
-0
deepspeech/decoders/scores/ctc.py
deepspeech/decoders/scores/ctc.py
+17
-12
deepspeech/decoders/scores/ctc_prefix_score.py
deepspeech/decoders/scores/ctc_prefix_score.py
+56
-54
deepspeech/decoders/scores/length_bonus.py
deepspeech/decoders/scores/length_bonus.py
+21
-10
deepspeech/decoders/scores/ngram.py
deepspeech/decoders/scores/ngram.py
+17
-5
deepspeech/decoders/scores/score_interface.py
deepspeech/decoders/scores/score_interface.py
+33
-22
deepspeech/decoders/utils.py
deepspeech/decoders/utils.py
+18
-3
deepspeech/modules/ctc.py
deepspeech/modules/ctc.py
+1
-1
未找到文件。
deepspeech/decoders/README.md
浏览文件 @
10a2da68
...
...
@@ -10,4 +10,4 @@
*
[
Vectorized Beam Search for CTC-Attention-based Speech Recognition
](
https://www.isca-speech.org/archive/pdfs/interspeech_2019/seki19b_interspeech.pdf
)
### Streaming Join CTC/ATT Beam Search
*
[
STREAMING TRANSFORMER ASR WITH BLOCKWISE SYNCHRONOUS BEAM SEARCH
](
https://arxiv.org/abs/2006.14941
)
\ No newline at end of file
*
[
STREAMING TRANSFORMER ASR WITH BLOCKWISE SYNCHRONOUS BEAM SEARCH
](
https://arxiv.org/abs/2006.14941
)
deepspeech/decoders/__init__.py
浏览文件 @
10a2da68
from
.ctcdecoder
import
swig_wrapper
\ No newline at end of file
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.ctcdecoder
import
swig_wrapper
deepspeech/decoders/scores/__init__.py
浏览文件 @
10a2da68
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
deepspeech/decoders/scores/ctc.py
浏览文件 @
10a2da68
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ScorerInterface implementation for CTC."""
import
numpy
as
np
import
paddle
...
...
@@ -81,8 +93,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
prev_score
,
state
=
state
presub_score
,
new_st
=
self
.
impl
(
y
.
cpu
(),
ids
.
cpu
(),
state
)
tscore
=
paddle
.
to_tensor
(
presub_score
-
prev_score
,
place
=
x
.
place
,
dtype
=
x
.
dtype
)
presub_score
-
prev_score
,
place
=
x
.
place
,
dtype
=
x
.
dtype
)
return
tscore
,
(
presub_score
,
new_st
)
def
batch_init_state
(
self
,
x
:
paddle
.
Tensor
):
...
...
@@ -115,15 +126,9 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
"""
batch_state
=
(
(
paddle
.
stack
([
s
[
0
]
for
s
in
state
],
axis
=
2
),
paddle
.
stack
([
s
[
1
]
for
s
in
state
]),
state
[
0
][
2
],
state
[
0
][
3
],
)
if
state
[
0
]
is
not
None
else
None
)
(
paddle
.
stack
([
s
[
0
]
for
s
in
state
],
axis
=
2
),
paddle
.
stack
([
s
[
1
]
for
s
in
state
]),
state
[
0
][
2
],
state
[
0
][
3
],
)
if
state
[
0
]
is
not
None
else
None
)
return
self
.
impl
(
y
,
batch_state
,
ids
)
def
extend_prob
(
self
,
x
:
paddle
.
Tensor
):
...
...
deepspeech/decoders/scores/ctc_prefix_score.py
浏览文件 @
10a2da68
#!/usr/bin/env python3
# Copyright 2018 Mitsubishi Electric Research Labs (Takaaki Hori)
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
import
paddle
import
numpy
as
np
import
paddle
import
six
...
...
@@ -49,9 +46,10 @@ class CTCPrefixScorePD():
x
[
i
,
l
:,
blank
]
=
0
# Reshape input x
xn
=
x
.
transpose
([
1
,
0
,
2
])
# (B, T, O) -> (T, B, O)
xb
=
xn
[:,
:,
self
.
blank
].
unsqueeze
(
2
).
expand
(
-
1
,
-
1
,
self
.
odim
)
# (T,B,O)
xb
=
xn
[:,
:,
self
.
blank
].
unsqueeze
(
2
).
expand
(
-
1
,
-
1
,
self
.
odim
)
# (T,B,O)
self
.
x
=
paddle
.
stack
([
xn
,
xb
])
# (2, T, B, O)
self
.
end_frames
=
paddle
.
to_tensor
(
xlens
)
-
1
# (B,)
self
.
end_frames
=
paddle
.
to_tensor
(
xlens
)
-
1
# (B,)
# Setup CTC windowing
self
.
margin
=
margin
...
...
@@ -59,7 +57,7 @@ class CTCPrefixScorePD():
self
.
frame_ids
=
paddle
.
arange
(
self
.
input_length
,
dtype
=
self
.
dtype
)
# Base indices for index conversion
# B idx, hyp idx. shape (B*W, 1)
self
.
idx_bh
=
None
self
.
idx_bh
=
None
# B idx. shape (B,)
self
.
idx_b
=
paddle
.
arange
(
self
.
batch
)
# B idx, O idx. shape (B, 1)
...
...
@@ -78,56 +76,59 @@ class CTCPrefixScorePD():
last_ids
=
[
yi
[
-
1
]
for
yi
in
y
]
# last output label ids
n_bh
=
len
(
last_ids
)
# batch * hyps
n_hyps
=
n_bh
//
self
.
batch
# assuming each utterance has the same # of hyps
self
.
scoring_num
=
scoring_ids
.
size
(
-
1
)
if
scoring_ids
is
not
None
else
0
self
.
scoring_num
=
scoring_ids
.
size
(
-
1
)
if
scoring_ids
is
not
None
else
0
# prepare state info
if
state
is
None
:
r_prev
=
paddle
.
full
(
(
self
.
input_length
,
2
,
self
.
batch
,
n_hyps
),
self
.
logzero
,
dtype
=
self
.
dtype
,
)
# (T, 2, B, W)
r_prev
[:,
1
]
=
paddle
.
cumsum
(
self
.
x
[
0
,
:,
:,
self
.
blank
],
0
).
unsqueeze
(
2
)
r_prev
=
r_prev
.
view
(
-
1
,
2
,
n_bh
)
# (T, 2, BW)
dtype
=
self
.
dtype
,
)
# (T, 2, B, W)
r_prev
[:,
1
]
=
paddle
.
cumsum
(
self
.
x
[
0
,
:,
:,
self
.
blank
],
0
).
unsqueeze
(
2
)
r_prev
=
r_prev
.
view
(
-
1
,
2
,
n_bh
)
# (T, 2, BW)
s_prev
=
0.0
# score
f_min_prev
=
0
# eq. 22-23
f_max_prev
=
1
# eq. 22-23
f_min_prev
=
0
# eq. 22-23
f_max_prev
=
1
# eq. 22-23
else
:
r_prev
,
s_prev
,
f_min_prev
,
f_max_prev
=
state
# select input dimensions for scoring
if
self
.
scoring_num
>
0
:
# (BW, O)
scoring_idmap
=
paddle
.
full
((
n_bh
,
self
.
odim
),
-
1
,
dtype
=
paddle
.
long
)
scoring_idmap
=
paddle
.
full
(
(
n_bh
,
self
.
odim
),
-
1
,
dtype
=
paddle
.
long
)
snum
=
self
.
scoring_num
if
self
.
idx_bh
is
None
or
n_bh
>
len
(
self
.
idx_bh
):
self
.
idx_bh
=
paddle
.
arange
(
n_bh
).
view
(
-
1
,
1
)
# (BW, 1)
self
.
idx_bh
=
paddle
.
arange
(
n_bh
).
view
(
-
1
,
1
)
# (BW, 1)
scoring_idmap
[
self
.
idx_bh
[:
n_bh
],
scoring_ids
]
=
paddle
.
arange
(
snum
)
scoring_idx
=
(
scoring_ids
+
self
.
idx_bo
.
repeat
(
1
,
n_hyps
).
view
(
-
1
,
1
)
# (BW,1)
).
view
(
-
1
)
# (BWO)
scoring_ids
+
self
.
idx_bo
.
repeat
(
1
,
n_hyps
).
view
(
-
1
,
1
)
# (BW,1)
).
view
(
-
1
)
# (BWO)
# x_ shape (2, T, B*W, O)
x_
=
paddle
.
index_select
(
self
.
x
.
view
(
2
,
-
1
,
self
.
batch
*
self
.
odim
),
scoring_idx
,
2
).
view
(
2
,
-
1
,
n_bh
,
snum
)
self
.
x
.
view
(
2
,
-
1
,
self
.
batch
*
self
.
odim
),
scoring_idx
,
2
).
view
(
2
,
-
1
,
n_bh
,
snum
)
else
:
scoring_ids
=
None
scoring_idmap
=
None
snum
=
self
.
odim
# x_ shape (2, T, B*W, O)
x_
=
self
.
x
.
unsqueeze
(
3
).
repeat
(
1
,
1
,
1
,
n_hyps
,
1
).
view
(
2
,
-
1
,
n_bh
,
snum
)
x_
=
self
.
x
.
unsqueeze
(
3
).
repeat
(
1
,
1
,
1
,
n_hyps
,
1
).
view
(
2
,
-
1
,
n_bh
,
snum
)
# new CTC forward probs are prepared as a (T x 2 x BW x S) tensor
# that corresponds to r_t^n(h) and r_t^b(h) in a batch.
r
=
paddle
.
full
(
(
self
.
input_length
,
2
,
n_bh
,
snum
),
self
.
logzero
,
dtype
=
self
.
dtype
,
)
dtype
=
self
.
dtype
,
)
if
output_length
==
0
:
r
[
0
,
0
]
=
x_
[
0
,
0
]
r_sum
=
paddle
.
logsumexp
(
r_prev
,
1
)
#(T,BW)
log_phi
=
r_sum
.
unsqueeze
(
2
).
repeat
(
1
,
1
,
snum
)
# (T, BW, O)
r_sum
=
paddle
.
logsumexp
(
r_prev
,
1
)
#(T,BW)
log_phi
=
r_sum
.
unsqueeze
(
2
).
repeat
(
1
,
1
,
snum
)
# (T, BW, O)
if
scoring_ids
is
not
None
:
for
idx
in
range
(
n_bh
):
pos
=
scoring_idmap
[
idx
,
last_ids
[
idx
]]
...
...
@@ -152,27 +153,30 @@ class CTCPrefixScorePD():
# compute forward probabilities log(r_t^n(h)) and log(r_t^b(h))
for
t
in
range
(
start
,
end
):
rp
=
r
[
t
-
1
]
# (2 x BW x O')
rp
=
r
[
t
-
1
]
# (2 x BW x O')
rr
=
paddle
.
stack
([
rp
[
0
],
log_phi
[
t
-
1
],
rp
[
0
],
rp
[
1
]]).
view
(
2
,
2
,
n_bh
,
snum
)
# (2,2,BW,O')
2
,
2
,
n_bh
,
snum
)
# (2,2,BW,O')
r
[
t
]
=
paddle
.
logsumexp
(
rr
,
1
)
+
x_
[:,
t
]
# compute log prefix probabilities log(psi)
log_phi_x
=
paddle
.
concat
((
log_phi
[
0
].
unsqueeze
(
0
),
log_phi
[:
-
1
]),
axis
=
0
)
+
x_
[
0
]
log_phi_x
=
paddle
.
concat
(
(
log_phi
[
0
].
unsqueeze
(
0
),
log_phi
[:
-
1
]),
axis
=
0
)
+
x_
[
0
]
if
scoring_ids
is
not
None
:
log_psi
=
paddle
.
full
((
n_bh
,
self
.
odim
),
self
.
logzero
,
dtype
=
self
.
dtype
)
log_psi
=
paddle
.
full
(
(
n_bh
,
self
.
odim
),
self
.
logzero
,
dtype
=
self
.
dtype
)
log_psi_
=
paddle
.
logsumexp
(
paddle
.
concat
((
log_phi_x
[
start
:
end
],
r
[
start
-
1
,
0
].
unsqueeze
(
0
)),
axis
=
0
),
axis
=
0
,
)
paddle
.
concat
(
(
log_phi_x
[
start
:
end
],
r
[
start
-
1
,
0
].
unsqueeze
(
0
)),
axis
=
0
),
axis
=
0
,
)
for
si
in
range
(
n_bh
):
log_psi
[
si
,
scoring_ids
[
si
]]
=
log_psi_
[
si
]
else
:
log_psi
=
paddle
.
logsumexp
(
paddle
.
concat
((
log_phi_x
[
start
:
end
],
r
[
start
-
1
,
0
].
unsqueeze
(
0
)),
axis
=
0
),
axis
=
0
,
)
paddle
.
concat
(
(
log_phi_x
[
start
:
end
],
r
[
start
-
1
,
0
].
unsqueeze
(
0
)),
axis
=
0
),
axis
=
0
,
)
for
si
in
range
(
n_bh
):
log_psi
[
si
,
self
.
eos
]
=
r_sum
[
self
.
end_frames
[
si
//
n_hyps
],
si
]
...
...
@@ -193,16 +197,16 @@ class CTCPrefixScorePD():
# convert ids to BHO space
n_bh
=
len
(
s
)
n_hyps
=
n_bh
//
self
.
batch
vidx
=
(
best_ids
+
(
self
.
idx_b
*
(
n_hyps
*
self
.
odim
)).
view
(
-
1
,
1
)).
view
(
-
1
)
vidx
=
(
best_ids
+
(
self
.
idx_b
*
(
n_hyps
*
self
.
odim
)).
view
(
-
1
,
1
)).
view
(
-
1
)
# select hypothesis scores
s_new
=
paddle
.
index_select
(
s
.
view
(
-
1
),
vidx
,
0
)
s_new
=
s_new
.
view
(
-
1
,
1
).
repeat
(
1
,
self
.
odim
).
view
(
n_bh
,
self
.
odim
)
# convert ids to BHS space (S: scoring_num)
if
scoring_idmap
is
not
None
:
snum
=
self
.
scoring_num
hyp_idx
=
(
best_ids
//
self
.
odim
+
(
self
.
idx_b
*
n_hyps
).
view
(
-
1
,
1
)).
view
(
-
1
)
hyp_idx
=
(
best_ids
//
self
.
odim
+
(
self
.
idx_b
*
n_hyps
).
view
(
-
1
,
1
)).
view
(
-
1
)
label_ids
=
paddle
.
fmod
(
best_ids
,
self
.
odim
).
view
(
-
1
)
score_idx
=
scoring_idmap
[
hyp_idx
,
label_ids
]
score_idx
[
score_idx
==
-
1
]
=
0
...
...
@@ -211,8 +215,7 @@ class CTCPrefixScorePD():
snum
=
self
.
odim
# select forward probabilities
r_new
=
paddle
.
index_select
(
r
.
view
(
-
1
,
2
,
n_bh
*
snum
),
vidx
,
2
).
view
(
-
1
,
2
,
n_bh
)
-
1
,
2
,
n_bh
)
return
r_new
,
s_new
,
f_min
,
f_max
def
extend_prob
(
self
,
x
):
...
...
@@ -233,7 +236,7 @@ class CTCPrefixScorePD():
xn
=
x
.
transpose
([
1
,
0
,
2
])
# (B, T, O) -> (T, B, O)
xb
=
xn
[:,
:,
self
.
blank
].
unsqueeze
(
2
).
expand
(
-
1
,
-
1
,
self
.
odim
)
self
.
x
=
paddle
.
stack
([
xn
,
xb
])
# (2, T, B, O)
self
.
x
[:,
:
tmp_x
.
shape
[
1
],
:,
:]
=
tmp_x
self
.
x
[:,
:
tmp_x
.
shape
[
1
],
:,
:]
=
tmp_x
self
.
input_length
=
x
.
size
(
1
)
self
.
end_frames
=
paddle
.
to_tensor
(
xlens
)
-
1
...
...
@@ -254,12 +257,12 @@ class CTCPrefixScorePD():
r_prev_new
=
paddle
.
full
(
(
self
.
input_length
,
2
),
self
.
logzero
,
dtype
=
self
.
dtype
,
)
dtype
=
self
.
dtype
,
)
start
=
max
(
r_prev
.
shape
[
0
],
1
)
r_prev_new
[
0
:
start
]
=
r_prev
for
t
in
range
(
start
,
self
.
input_length
):
r_prev_new
[
t
,
1
]
=
r_prev_new
[
t
-
1
,
1
]
+
self
.
x
[
0
,
t
,
:,
self
.
blank
]
r_prev_new
[
t
,
1
]
=
r_prev_new
[
t
-
1
,
1
]
+
self
.
x
[
0
,
t
,
:,
self
.
blank
]
return
(
r_prev_new
,
s_prev
,
f_min_prev
,
f_max_prev
)
...
...
@@ -279,7 +282,7 @@ class CTCPrefixScore():
self
.
blank
=
blank
self
.
eos
=
eos
self
.
input_length
=
len
(
x
)
self
.
x
=
x
# (T, O)
self
.
x
=
x
# (T, O)
def
initial_state
(
self
):
"""Obtain an initial CTC state
...
...
@@ -318,12 +321,12 @@ class CTCPrefixScore():
r
[
output_length
-
1
]
=
self
.
logzero
# prepare forward probabilities for the last label
r_sum
=
self
.
xp
.
logaddexp
(
r_prev
[:,
0
],
r_prev
[:,
1
]
)
# log(r_t^n(g) + r_t^b(g))
r_sum
=
self
.
xp
.
logaddexp
(
r_prev
[:,
0
],
r_prev
[:,
1
])
# log(r_t^n(g) + r_t^b(g))
last
=
y
[
-
1
]
if
output_length
>
0
and
last
in
cs
:
log_phi
=
self
.
xp
.
ndarray
((
self
.
input_length
,
len
(
cs
)),
dtype
=
np
.
float32
)
log_phi
=
self
.
xp
.
ndarray
(
(
self
.
input_length
,
len
(
cs
)),
dtype
=
np
.
float32
)
for
i
in
six
.
moves
.
range
(
len
(
cs
)):
log_phi
[:,
i
]
=
r_sum
if
cs
[
i
]
!=
last
else
r_prev
[:,
1
]
else
:
...
...
@@ -335,9 +338,8 @@ class CTCPrefixScore():
log_psi
=
r
[
start
-
1
,
0
]
for
t
in
six
.
moves
.
range
(
start
,
self
.
input_length
):
r
[
t
,
0
]
=
self
.
xp
.
logaddexp
(
r
[
t
-
1
,
0
],
log_phi
[
t
-
1
])
+
xs
[
t
]
r
[
t
,
1
]
=
(
self
.
xp
.
logaddexp
(
r
[
t
-
1
,
0
],
r
[
t
-
1
,
1
])
+
self
.
x
[
t
,
self
.
blank
]
)
r
[
t
,
1
]
=
(
self
.
xp
.
logaddexp
(
r
[
t
-
1
,
0
],
r
[
t
-
1
,
1
])
+
self
.
x
[
t
,
self
.
blank
])
log_psi
=
self
.
xp
.
logaddexp
(
log_psi
,
log_phi
[
t
-
1
]
+
xs
[
t
])
# get P(...eos|X) that ends with the prefix itself
...
...
deepspeech/decoders/scores/length_bonus.py
浏览文件 @
10a2da68
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Length bonus module."""
from
typing
import
Any
from
typing
import
List
...
...
@@ -34,11 +47,13 @@ class LengthBonus(BatchScorerInterface):
and None
"""
return
paddle
.
to_tensor
([
1.0
],
place
=
x
.
place
,
dtype
=
x
.
dtype
).
expand
(
self
.
n
),
None
return
paddle
.
to_tensor
(
[
1.0
],
place
=
x
.
place
,
dtype
=
x
.
dtype
).
expand
(
self
.
n
),
None
def
batch_score
(
self
,
ys
:
paddle
.
Tensor
,
states
:
List
[
Any
],
xs
:
paddle
.
Tensor
)
->
Tuple
[
paddle
.
Tensor
,
List
[
Any
]]:
def
batch_score
(
self
,
ys
:
paddle
.
Tensor
,
states
:
List
[
Any
],
xs
:
paddle
.
Tensor
)
->
Tuple
[
paddle
.
Tensor
,
List
[
Any
]]:
"""Score new token batch.
Args:
...
...
@@ -53,9 +68,5 @@ class LengthBonus(BatchScorerInterface):
and next state list for ys.
"""
return
(
paddle
.
to_tensor
([
1.0
],
place
=
xs
.
place
,
dtype
=
xs
.
dtype
).
expand
(
ys
.
shape
[
0
],
self
.
n
),
None
,
)
return
(
paddle
.
to_tensor
([
1.0
],
place
=
xs
.
place
,
dtype
=
xs
.
dtype
).
expand
(
ys
.
shape
[
0
],
self
.
n
),
None
,
)
deepspeech/decoders/scores/ngram.py
浏览文件 @
10a2da68
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Ngram lm implement."""
from
abc
import
ABC
import
kenlm
...
...
@@ -51,9 +63,8 @@ class Ngrambase(ABC):
self
.
lm
.
BaseScore
(
state
,
ys
,
out_state
)
scores
=
paddle
.
empty_like
(
next_token
,
dtype
=
x
.
dtype
)
for
i
,
j
in
enumerate
(
next_token
):
scores
[
i
]
=
self
.
lm
.
BaseScore
(
out_state
,
self
.
chardict
[
j
],
self
.
tmpkenlmstate
)
scores
[
i
]
=
self
.
lm
.
BaseScore
(
out_state
,
self
.
chardict
[
j
],
self
.
tmpkenlmstate
)
return
scores
,
out_state
...
...
@@ -74,7 +85,8 @@ class NgramFullScorer(Ngrambase, BatchScorerInterface):
and next state list for ys.
"""
return
self
.
score_partial_
(
y
,
paddle
.
to_tensor
(
range
(
self
.
charlen
)),
state
,
x
)
return
self
.
score_partial_
(
y
,
paddle
.
to_tensor
(
range
(
self
.
charlen
)),
state
,
x
)
class
NgramPartScorer
(
Ngrambase
,
PartialScorerInterface
):
...
...
deepspeech/decoders/scores/score_interface.py
浏览文件 @
10a2da68
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Scorer interface module."""
import
warnings
from
typing
import
Any
from
typing
import
List
from
typing
import
Tuple
import
paddle
import
warnings
class
ScorerInterface
:
...
...
@@ -37,7 +49,7 @@ class ScorerInterface:
"""
return
None
def
select_state
(
self
,
state
:
Any
,
i
:
int
,
new_id
:
int
=
None
)
->
Any
:
def
select_state
(
self
,
state
:
Any
,
i
:
int
,
new_id
:
int
=
None
)
->
Any
:
"""Select state with relative ids in the main beam search.
Args:
...
...
@@ -51,9 +63,8 @@ class ScorerInterface:
"""
return
None
if
state
is
None
else
state
[
i
]
def
score
(
self
,
y
:
paddle
.
Tensor
,
state
:
Any
,
x
:
paddle
.
Tensor
)
->
Tuple
[
paddle
.
Tensor
,
Any
]:
def
score
(
self
,
y
:
paddle
.
Tensor
,
state
:
Any
,
x
:
paddle
.
Tensor
)
->
Tuple
[
paddle
.
Tensor
,
Any
]:
"""Score new token (required).
Args:
...
...
@@ -96,9 +107,10 @@ class BatchScorerInterface(ScorerInterface):
"""
return
self
.
init_state
(
x
)
def
batch_score
(
self
,
ys
:
paddle
.
Tensor
,
states
:
List
[
Any
],
xs
:
paddle
.
Tensor
)
->
Tuple
[
paddle
.
Tensor
,
List
[
Any
]]:
def
batch_score
(
self
,
ys
:
paddle
.
Tensor
,
states
:
List
[
Any
],
xs
:
paddle
.
Tensor
)
->
Tuple
[
paddle
.
Tensor
,
List
[
Any
]]:
"""Score new token batch (required).
Args:
...
...
@@ -114,10 +126,8 @@ class BatchScorerInterface(ScorerInterface):
"""
warnings
.
warn
(
"{} batch score is implemented through for loop not parallelized"
.
format
(
self
.
__class__
.
__name__
)
)
"{} batch score is implemented through for loop not parallelized"
.
format
(
self
.
__class__
.
__name__
))
scores
=
list
()
outstates
=
list
()
for
i
,
(
y
,
state
,
x
)
in
enumerate
(
zip
(
ys
,
states
,
xs
)):
...
...
@@ -141,9 +151,11 @@ class PartialScorerInterface(ScorerInterface):
"""
def
score_partial
(
self
,
y
:
paddle
.
Tensor
,
next_tokens
:
paddle
.
Tensor
,
state
:
Any
,
x
:
paddle
.
Tensor
)
->
Tuple
[
paddle
.
Tensor
,
Any
]:
def
score_partial
(
self
,
y
:
paddle
.
Tensor
,
next_tokens
:
paddle
.
Tensor
,
state
:
Any
,
x
:
paddle
.
Tensor
)
->
Tuple
[
paddle
.
Tensor
,
Any
]:
"""Score new token (required).
Args:
...
...
@@ -165,12 +177,11 @@ class BatchPartialScorerInterface(BatchScorerInterface, PartialScorerInterface):
"""Batch partial scorer interface for beam search."""
def
batch_score_partial
(
self
,
ys
:
paddle
.
Tensor
,
next_tokens
:
paddle
.
Tensor
,
states
:
List
[
Any
],
xs
:
paddle
.
Tensor
,
)
->
Tuple
[
paddle
.
Tensor
,
Any
]:
self
,
ys
:
paddle
.
Tensor
,
next_tokens
:
paddle
.
Tensor
,
states
:
List
[
Any
],
xs
:
paddle
.
Tensor
,
)
->
Tuple
[
paddle
.
Tensor
,
Any
]:
"""Score new token (required).
Args:
...
...
deepspeech/decoders/utils.py
浏览文件 @
10a2da68
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
"end_detect"
]
def
end_detect
(
ended_hyps
,
i
,
M
=
3
,
D_end
=
np
.
log
(
1
*
np
.
exp
(
-
10
))):
"""End detection.
...
...
@@ -20,11 +34,12 @@ def end_detect(ended_hyps, i, M=3, D_end=np.log(1 * np.exp(-10))):
for
m
in
range
(
M
):
# get ended_hyps with their length is i - m
hyp_length
=
i
-
m
hyps_same_length
=
[
x
for
x
in
ended_hyps
if
len
(
x
[
"yseq"
])
==
hyp_length
]
hyps_same_length
=
[
x
for
x
in
ended_hyps
if
len
(
x
[
"yseq"
])
==
hyp_length
]
if
len
(
hyps_same_length
)
>
0
:
best_hyp_same_length
=
sorted
(
hyps_same_length
,
key
=
lambda
x
:
x
[
"score"
],
reverse
=
True
)[
0
]
hyps_same_length
,
key
=
lambda
x
:
x
[
"score"
],
reverse
=
True
)[
0
]
if
best_hyp_same_length
[
"score"
]
-
best_hyp
[
"score"
]
<
D_end
:
count
+=
1
...
...
deepspeech/modules/ctc.py
浏览文件 @
10a2da68
...
...
@@ -125,7 +125,7 @@ class CTCDecoderBase(nn.Layer):
class
CTCDecoder
(
CTCDecoderBase
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
# CTCDecoder LM Score handle
self
.
_ext_scorer
=
None
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录