Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
ddf184be
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
12 个月 前同步成功
通知
204
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ddf184be
编写于
1月 06, 2022
作者:
B
billishyahao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix some typos
上级
3568bb62
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
24 addition
and
24 deletion
+24
-24
examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
...es/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
+2
-2
paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py
paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py
+1
-1
paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp
.../s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp
+1
-1
paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp
paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp
+1
-1
paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp
paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp
+1
-1
paddlespeech/s2t/decoders/scorers/ctc.py
paddlespeech/s2t/decoders/scorers/ctc.py
+1
-1
paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
+2
-2
paddlespeech/s2t/models/ds2/deepspeech2.py
paddlespeech/s2t/models/ds2/deepspeech2.py
+2
-2
paddlespeech/s2t/models/ds2_online/deepspeech2.py
paddlespeech/s2t/models/ds2_online/deepspeech2.py
+2
-2
paddlespeech/s2t/models/u2/u2.py
paddlespeech/s2t/models/u2/u2.py
+2
-2
paddlespeech/s2t/models/u2_st/u2_st.py
paddlespeech/s2t/models/u2_st/u2_st.py
+2
-2
paddlespeech/s2t/modules/ctc.py
paddlespeech/s2t/modules/ctc.py
+4
-4
paddlespeech/s2t/modules/mask.py
paddlespeech/s2t/modules/mask.py
+1
-1
paddlespeech/s2t/utils/dynamic_import.py
paddlespeech/s2t/utils/dynamic_import.py
+2
-2
未找到文件。
examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
浏览文件 @
ddf184be
...
...
@@ -149,13 +149,13 @@ class DeepSpeech2Model(nn.Layer):
"""Compute Model loss
Args:
audio (Ten
os
r): [B, T, D]
audio (Ten
so
r): [B, T, D]
audio_len (Tensor): [B]
text (Tensor): [B, U]
text_len (Tensor): [B]
Returns:
loss (Ten
os
r): [1]
loss (Ten
so
r): [1]
"""
eouts
,
eouts_len
=
self
.
encoder
(
audio
,
audio_len
)
loss
=
self
.
decoder
(
eouts
,
eouts_len
,
text
,
text_len
)
...
...
paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py
浏览文件 @
ddf184be
...
...
@@ -62,7 +62,7 @@ class Scorer(object):
"""Evaluation function, gathering all the different scores
and return the final one.
:param sentence: The input sentence for evalu
t
ation
:param sentence: The input sentence for evaluation
:type sentence: str
:param log: Whether return the score in log representation.
:type log: bool
...
...
paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp
浏览文件 @
ddf184be
...
...
@@ -183,7 +183,7 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
std
::
sort
(
prefixes
.
begin
(),
prefixes
.
begin
()
+
num_prefixes
,
prefix_compare
);
// compute aproximate ctc score as the return score, without affecting the
// compute ap
p
roximate ctc score as the return score, without affecting the
// return order of decoding result. To delete when decoder gets stable.
for
(
size_t
i
=
0
;
i
<
beam_size
&&
i
<
prefixes
.
size
();
++
i
)
{
double
approx_ctc
=
prefixes
[
i
]
->
score
;
...
...
paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp
浏览文件 @
ddf184be
...
...
@@ -26,7 +26,7 @@ std::vector<std::pair<size_t, float>> get_pruned_log_probs(
for
(
size_t
i
=
0
;
i
<
prob_step
.
size
();
++
i
)
{
prob_idx
.
push_back
(
std
::
pair
<
int
,
double
>
(
i
,
prob_step
[
i
]));
}
// pruning of v
aco
bulary
// pruning of v
oca
bulary
size_t
cutoff_len
=
prob_step
.
size
();
if
(
cutoff_prob
<
1.0
||
cutoff_top_n
<
cutoff_len
)
{
std
::
sort
(
prob_idx
.
begin
(),
...
...
paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp
浏览文件 @
ddf184be
...
...
@@ -223,7 +223,7 @@ void Scorer::fill_dictionary(bool add_space) {
* This gets rid of "epsilon" transitions in the FST.
* These are transitions that don't require a string input to be taken.
* Getting rid of them is necessary to make the FST determinis
it
c, but
* Getting rid of them is necessary to make the FST determinis
ti
c, but
* can greatly increase the size of the FST
*/
fst
::
RmEpsilon
(
&
dictionary
);
...
...
paddlespeech/s2t/decoders/scorers/ctc.py
浏览文件 @
ddf184be
...
...
@@ -154,7 +154,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
Args:
state: The states of hyps
Returns: exteded state
Returns: exte
n
ded state
"""
new_state
=
[]
...
...
paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
浏览文件 @
ddf184be
...
...
@@ -11,7 +11,7 @@ class CTCPrefixScorePD():
which is based on Algorithm 2 in WATANABE et al.
"HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
but extended to efficiently compute the label probablities for multiple
but extended to efficiently compute the label probab
i
lities for multiple
hypotheses simultaneously
See also Seki et al. "Vectorized Beam Search for CTC-Attention-Based
Speech Recognition," In INTERSPEECH (pp. 3825-3829), 2019.
...
...
@@ -272,7 +272,7 @@ class CTCPrefixScore():
which is based on Algorithm 2 in WATANABE et al.
"HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
but extended to efficiently compute the probablities of multiple labels
but extended to efficiently compute the probab
i
lities of multiple labels
simultaneously
"""
...
...
paddlespeech/s2t/models/ds2/deepspeech2.py
浏览文件 @
ddf184be
...
...
@@ -151,13 +151,13 @@ class DeepSpeech2Model(nn.Layer):
"""Compute Model loss
Args:
audio (Ten
osr
): [B, T, D]
audio (Ten
sors
): [B, T, D]
audio_len (Tensor): [B]
text (Tensor): [B, U]
text_len (Tensor): [B]
Returns:
loss (Ten
os
r): [1]
loss (Ten
so
r): [1]
"""
eouts
,
eouts_len
=
self
.
encoder
(
audio
,
audio_len
)
loss
=
self
.
decoder
(
eouts
,
eouts_len
,
text
,
text_len
)
...
...
paddlespeech/s2t/models/ds2_online/deepspeech2.py
浏览文件 @
ddf184be
...
...
@@ -279,13 +279,13 @@ class DeepSpeech2ModelOnline(nn.Layer):
"""Compute Model loss
Args:
audio (Ten
os
r): [B, T, D]
audio (Ten
so
r): [B, T, D]
audio_len (Tensor): [B]
text (Tensor): [B, U]
text_len (Tensor): [B]
Returns:
loss (Ten
os
r): [1]
loss (Ten
so
r): [1]
"""
eouts
,
eouts_len
,
final_state_h_box
,
final_state_c_box
=
self
.
encoder
(
audio
,
audio_len
,
None
,
None
)
...
...
paddlespeech/s2t/models/u2/u2.py
浏览文件 @
ddf184be
...
...
@@ -680,8 +680,8 @@ class U2BaseModel(ASRInterface, nn.Layer):
"""u2 decoding.
Args:
feats (Ten
os
r): audio features, (B, T, D)
feats_lengths (Ten
os
r): (B)
feats (Ten
so
r): audio features, (B, T, D)
feats_lengths (Ten
so
r): (B)
text_feature (TextFeaturizer): text feature object.
decoding_method (str): decoding mode, e.g.
'attention', 'ctc_greedy_search',
...
...
paddlespeech/s2t/models/u2_st/u2_st.py
浏览文件 @
ddf184be
...
...
@@ -478,8 +478,8 @@ class U2STBaseModel(nn.Layer):
"""u2 decoding.
Args:
feats (Ten
os
r): audio features, (B, T, D)
feats_lengths (Ten
os
r): (B)
feats (Ten
so
r): audio features, (B, T, D)
feats_lengths (Ten
so
r): (B)
text_feature (TextFeaturizer): text feature object.
decoding_method (str): decoding mode, e.g.
'fullsentence',
...
...
paddlespeech/s2t/modules/ctc.py
浏览文件 @
ddf184be
...
...
@@ -81,10 +81,10 @@ class CTCDecoderBase(nn.Layer):
Args:
hs_pad (Tensor): batch of padded hidden state sequences (B, Tmax, D)
hlens (Tensor): batch of lengths of hidden state sequences (B)
ys_pad (Ten
os
r): batch of padded character id sequence tensor (B, Lmax)
ys_pad (Ten
so
r): batch of padded character id sequence tensor (B, Lmax)
ys_lens (Tensor): batch of lengths of character sequence (B)
Returns:
loss (Ten
os
r): ctc loss value, scalar.
loss (Ten
so
r): ctc loss value, scalar.
"""
logits
=
self
.
ctc_lo
(
self
.
dropout
(
hs_pad
))
loss
=
self
.
criterion
(
logits
,
ys_pad
,
hlens
,
ys_lens
)
...
...
@@ -252,8 +252,8 @@ class CTCDecoder(CTCDecoderBase):
"""ctc decoding with probs.
Args:
probs (Ten
os
r): activation after softmax
logits_lens (Ten
os
r): audio output lens
probs (Ten
so
r): activation after softmax
logits_lens (Ten
so
r): audio output lens
vocab_list ([type]): [description]
decoding_method ([type]): [description]
lang_model_path ([type]): [description]
...
...
paddlespeech/s2t/modules/mask.py
浏览文件 @
ddf184be
...
...
@@ -54,7 +54,7 @@ def make_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor:
[0, 0, 0, 1, 1],
[0, 0, 1, 1, 1]]
"""
# (TODO: Hui Zhang): jit not support Ten
os
r.dim() and Tensor.ndim
# (TODO: Hui Zhang): jit not support Ten
so
r.dim() and Tensor.ndim
# assert lengths.dim() == 1
batch_size
=
int
(
lengths
.
shape
[
0
])
max_len
=
int
(
lengths
.
max
())
...
...
paddlespeech/s2t/utils/dynamic_import.py
浏览文件 @
ddf184be
...
...
@@ -57,7 +57,7 @@ def filter_valid_args(args: Dict[Text, Any], valid_keys: List[Text]):
return
new_args
def
filter_out_ten
os
r
(
args
:
Dict
[
Text
,
Any
]):
def
filter_out_ten
so
r
(
args
:
Dict
[
Text
,
Any
]):
return
{
key
:
val
for
key
,
val
in
args
.
items
()
if
not
has_tensor
(
val
)}
...
...
@@ -65,5 +65,5 @@ def instance_class(module_class, args: Dict[Text, Any]):
valid_keys
=
inspect
.
signature
(
module_class
).
parameters
.
keys
()
new_args
=
filter_valid_args
(
args
,
valid_keys
)
logger
.
info
(
f
"Instance:
{
module_class
.
__name__
}
{
filter_out_ten
os
r
(
new_args
)
}
."
)
f
"Instance:
{
module_class
.
__name__
}
{
filter_out_ten
so
r
(
new_args
)
}
."
)
return
module_class
(
**
new_args
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录