Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
00b2c1c8
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
00b2c1c8
编写于
9月 21, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix forward attention decoder caller
上级
309c8d70
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
10 addition
and
9 deletion
+10
-9
paddlespeech/s2t/exps/u2/bin/test_wav.py
paddlespeech/s2t/exps/u2/bin/test_wav.py
+1
-1
paddlespeech/s2t/models/u2/u2.py
paddlespeech/s2t/models/u2/u2.py
+8
-7
paddlespeech/s2t/modules/decoder.py
paddlespeech/s2t/modules/decoder.py
+1
-1
未找到文件。
paddlespeech/s2t/exps/u2/bin/test_wav.py
浏览文件 @
00b2c1c8
...
@@ -79,7 +79,7 @@ class U2Infer():
...
@@ -79,7 +79,7 @@ class U2Infer():
ilen
=
paddle
.
to_tensor
(
feat
.
shape
[
0
])
ilen
=
paddle
.
to_tensor
(
feat
.
shape
[
0
])
xs
=
paddle
.
to_tensor
(
feat
,
dtype
=
'float32'
).
unsqueeze
(
0
)
xs
=
paddle
.
to_tensor
(
feat
,
dtype
=
'float32'
).
unsqueeze
(
0
)
decode_config
=
self
.
config
.
decode
decode_config
=
self
.
config
.
decode
logger
.
debug
(
f
"decode cfg:
{
decode_config
}
"
)
logger
.
info
(
f
"decode cfg:
{
decode_config
}
"
)
result_transcripts
=
self
.
model
.
decode
(
result_transcripts
=
self
.
model
.
decode
(
xs
,
xs
,
ilen
,
ilen
,
...
...
paddlespeech/s2t/models/u2/u2.py
浏览文件 @
00b2c1c8
...
@@ -565,18 +565,18 @@ class U2BaseModel(ASRInterface, nn.Layer):
...
@@ -565,18 +565,18 @@ class U2BaseModel(ASRInterface, nn.Layer):
[
len
(
hyp
[
0
])
for
hyp
in
hyps
],
place
=
device
,
[
len
(
hyp
[
0
])
for
hyp
in
hyps
],
place
=
device
,
dtype
=
paddle
.
long
)
# (beam_size,)
dtype
=
paddle
.
long
)
# (beam_size,)
hyps_pad
,
_
=
add_sos_eos
(
hyps_pad
,
self
.
sos
,
self
.
eos
,
self
.
ignore_id
)
hyps_pad
,
_
=
add_sos_eos
(
hyps_pad
,
self
.
sos
,
self
.
eos
,
self
.
ignore_id
)
logger
.
debug
(
logger
.
info
(
f
"hyps pad:
{
hyps_pad
}
{
self
.
sos
}
{
self
.
eos
}
{
self
.
ignore_id
}
"
)
f
"hyps pad:
{
hyps_pad
}
{
self
.
sos
}
{
self
.
eos
}
{
self
.
ignore_id
}
"
)
hyps_lens
=
hyps_lens
+
1
# Add <sos> at begining
hyps_lens
=
hyps_lens
+
1
# Add <sos> at begining
# ctc score in ln domain
# ctc score in ln domain
# (beam_size, max_hyps_len, vocab_size)
# (beam_size, max_hyps_len, vocab_size)
decoder_out
,
r_decoder_out
=
self
.
forward_attention_decoder
(
hyps_pad
,
hyps_lens
,
decoder_out
,
r_decoder_out
=
self
.
forward_attention_decoder
(
hyps_pad
,
hyps_lens
,
encoder_out
,
reverse_weight
)
encoder_out
,
reverse_weight
)
decoder_out
=
decoder_out
.
numpy
()
# r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
# r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
# conventional transformer decoder.
# conventional transformer decoder.
r_decoder_out
=
paddle
.
nn
.
functional
.
log_softmax
(
r_decoder_out
,
axis
=-
1
)
r_decoder_out
=
r_decoder_out
.
numpy
()
r_decoder_out
=
r_decoder_out
.
numpy
()
# Only use decoder score for rescoring
# Only use decoder score for rescoring
...
@@ -590,15 +590,16 @@ class U2BaseModel(ASRInterface, nn.Layer):
...
@@ -590,15 +590,16 @@ class U2BaseModel(ASRInterface, nn.Layer):
# last decoder output token is `eos`, for laste decoder input token.
# last decoder output token is `eos`, for laste decoder input token.
score
+=
decoder_out
[
i
][
len
(
hyp
[
0
])][
self
.
eos
]
score
+=
decoder_out
[
i
][
len
(
hyp
[
0
])][
self
.
eos
]
logger
.
debug
(
logger
.
info
(
f
"hyp
{
i
}
len
{
len
(
hyp
[
0
])
}
l2r score:
{
score
}
ctc_score:
{
hyp
[
1
]
}
reverse_weight:
{
reverse_weight
}
"
)
f
"hyp
{
i
}
len
{
len
(
hyp
[
0
])
}
l2r rescore_score:
{
score
}
ctc_score:
{
hyp
[
1
]
}
"
)
if
reverse_weight
>
0
:
if
reverse_weight
>
0
:
r_score
=
0.0
r_score
=
0.0
for
j
,
w
in
enumerate
(
hyp
[
0
]):
for
j
,
w
in
enumerate
(
hyp
[
0
]):
r_score
+=
r_decoder_out
[
i
][
len
(
hyp
[
0
])
-
j
-
1
][
w
]
r_score
+=
r_decoder_out
[
i
][
len
(
hyp
[
0
])
-
j
-
1
][
w
]
r_score
+=
r_decoder_out
[
i
][
len
(
hyp
[
0
])][
self
.
eos
]
r_score
+=
r_decoder_out
[
i
][
len
(
hyp
[
0
])][
self
.
eos
]
logger
.
info
(
f
"hyp
{
i
}
len
{
len
(
hyp
[
0
])
}
r2l score:
{
score
}
ctc_score:
{
hyp
[
1
]
}
reverse_weight:
{
reverse_weight
}
"
)
score
=
score
*
(
1
-
reverse_weight
)
+
r_score
*
reverse_weight
score
=
score
*
(
1
-
reverse_weight
)
+
r_score
*
reverse_weight
# add ctc score (which in ln domain)
# add ctc score (which in ln domain)
...
@@ -607,7 +608,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
...
@@ -607,7 +608,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
best_score
=
score
best_score
=
score
best_index
=
i
best_index
=
i
logger
.
debug
(
f
"result:
{
hyps
[
best_index
]
}
"
)
logger
.
info
(
f
"result:
{
hyps
[
best_index
]
}
"
)
return
hyps
[
best_index
][
0
]
return
hyps
[
best_index
][
0
]
@
jit
.
to_static
(
property
=
True
)
@
jit
.
to_static
(
property
=
True
)
...
...
paddlespeech/s2t/modules/decoder.py
浏览文件 @
00b2c1c8
...
@@ -343,7 +343,7 @@ class BiTransformerDecoder(BatchScorerInterface, nn.Layer):
...
@@ -343,7 +343,7 @@ class BiTransformerDecoder(BatchScorerInterface, nn.Layer):
"""
"""
l_x
,
_
,
olens
=
self
.
left_decoder
(
memory
,
memory_mask
,
ys_in_pad
,
l_x
,
_
,
olens
=
self
.
left_decoder
(
memory
,
memory_mask
,
ys_in_pad
,
ys_in_lens
)
ys_in_lens
)
r_x
=
paddle
.
to_tensor
(
0.0
)
r_x
=
paddle
.
zeros
([
1
]
)
if
reverse_weight
>
0.0
:
if
reverse_weight
>
0.0
:
r_x
,
_
,
olens
=
self
.
right_decoder
(
memory
,
memory_mask
,
r_ys_in_pad
,
r_x
,
_
,
olens
=
self
.
right_decoder
(
memory
,
memory_mask
,
r_ys_in_pad
,
ys_in_lens
)
ys_in_lens
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录