Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
0a956894
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0a956894
编写于
9月 20, 2022
作者:
T
tianhao zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support bitransformer decoder
上级
455379b8
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
11 addition
and
12 deletion
+11
-12
paddlespeech/s2t/exps/u2/bin/test_wav.py
paddlespeech/s2t/exps/u2/bin/test_wav.py
+2
-2
paddlespeech/s2t/exps/u2/model.py
paddlespeech/s2t/exps/u2/model.py
+1
-1
paddlespeech/s2t/models/u2/u2.py
paddlespeech/s2t/models/u2/u2.py
+6
-6
paddlespeech/s2t/modules/decoder.py
paddlespeech/s2t/modules/decoder.py
+2
-3
未找到文件。
paddlespeech/s2t/exps/u2/bin/test_wav.py
浏览文件 @
0a956894
...
@@ -40,7 +40,7 @@ class U2Infer():
...
@@ -40,7 +40,7 @@ class U2Infer():
self
.
preprocess_conf
=
config
.
preprocess_config
self
.
preprocess_conf
=
config
.
preprocess_config
self
.
preprocess_args
=
{
"train"
:
False
}
self
.
preprocess_args
=
{
"train"
:
False
}
self
.
preprocessing
=
Transformation
(
self
.
preprocess_conf
)
self
.
preprocessing
=
Transformation
(
self
.
preprocess_conf
)
self
.
reverse_weight
=
getattr
(
config
.
model_conf
,
'reverse_weight'
,
0.0
)
self
.
text_feature
=
TextFeaturizer
(
self
.
text_feature
=
TextFeaturizer
(
unit_type
=
config
.
unit_type
,
unit_type
=
config
.
unit_type
,
vocab
=
config
.
vocab_filepath
,
vocab
=
config
.
vocab_filepath
,
...
@@ -90,7 +90,7 @@ class U2Infer():
...
@@ -90,7 +90,7 @@ class U2Infer():
decoding_chunk_size
=
decode_config
.
decoding_chunk_size
,
decoding_chunk_size
=
decode_config
.
decoding_chunk_size
,
num_decoding_left_chunks
=
decode_config
.
num_decoding_left_chunks
,
num_decoding_left_chunks
=
decode_config
.
num_decoding_left_chunks
,
simulate_streaming
=
decode_config
.
simulate_streaming
,
simulate_streaming
=
decode_config
.
simulate_streaming
,
reverse_weight
=
self
.
config
.
model_conf
.
reverse_weight
)
reverse_weight
=
self
.
reverse_weight
)
rsl
=
result_transcripts
[
0
][
0
]
rsl
=
result_transcripts
[
0
][
0
]
utt
=
Path
(
self
.
audio_file
).
name
utt
=
Path
(
self
.
audio_file
).
name
logger
.
info
(
f
"hyp:
{
utt
}
{
result_transcripts
[
0
][
0
]
}
"
)
logger
.
info
(
f
"hyp:
{
utt
}
{
result_transcripts
[
0
][
0
]
}
"
)
...
...
paddlespeech/s2t/exps/u2/model.py
浏览文件 @
0a956894
...
@@ -316,7 +316,7 @@ class U2Tester(U2Trainer):
...
@@ -316,7 +316,7 @@ class U2Tester(U2Trainer):
vocab
=
self
.
config
.
vocab_filepath
,
vocab
=
self
.
config
.
vocab_filepath
,
spm_model_prefix
=
self
.
config
.
spm_model_prefix
)
spm_model_prefix
=
self
.
config
.
spm_model_prefix
)
self
.
vocab_list
=
self
.
text_feature
.
vocab_list
self
.
vocab_list
=
self
.
text_feature
.
vocab_list
self
.
reverse_weight
=
getattr
(
config
,
'reverse_weight'
,
'0.0'
)
self
.
reverse_weight
=
getattr
(
config
.
model_conf
,
'reverse_weight'
,
0.0
)
def
id2token
(
self
,
texts
,
texts_len
,
text_feature
):
def
id2token
(
self
,
texts
,
texts_len
,
text_feature
):
""" ord() id to chr() chr """
""" ord() id to chr() chr """
...
...
paddlespeech/s2t/models/u2/u2.py
浏览文件 @
0a956894
...
@@ -689,24 +689,24 @@ class U2BaseModel(ASRInterface, nn.Layer):
...
@@ -689,24 +689,24 @@ class U2BaseModel(ASRInterface, nn.Layer):
"""
"""
return
self
.
ctc
.
log_softmax
(
xs
)
return
self
.
ctc
.
log_softmax
(
xs
)
@
jit
.
to_static
#
@jit.to_static
def
is_bidirectional_decoder
(
self
)
->
bool
:
def
is_bidirectional_decoder
(
self
)
->
bool
:
"""
"""
Returns:
Returns:
torch
.Tensor: decoder output
paddle
.Tensor: decoder output
"""
"""
if
hasattr
(
self
.
decoder
,
'right_decoder'
):
if
hasattr
(
self
.
decoder
,
'right_decoder'
):
return
True
return
True
else
:
else
:
return
False
return
False
@
jit
.
to_static
#
@jit.to_static
def
forward_attention_decoder
(
def
forward_attention_decoder
(
self
,
self
,
hyps
:
paddle
.
Tensor
,
hyps
:
paddle
.
Tensor
,
hyps_lens
:
paddle
.
Tensor
,
hyps_lens
:
paddle
.
Tensor
,
encoder_out
:
paddle
.
Tensor
,
encoder_out
:
paddle
.
Tensor
,
reverse_weight
:
float
=
0
,
)
->
paddle
.
Tensor
:
reverse_weight
:
float
=
0
.0
,
)
->
paddle
.
Tensor
:
""" Export interface for c++ call, forward decoder with multiple
""" Export interface for c++ call, forward decoder with multiple
hypothesis from ctc prefix beam search and one encoder output
hypothesis from ctc prefix beam search and one encoder output
Args:
Args:
...
@@ -783,7 +783,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
...
@@ -783,7 +783,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
# >>> tensor([[3, 2, 1],
# >>> tensor([[3, 2, 1],
# >>> [4, 8, 9],
# >>> [4, 8, 9],
# >>> [2, eos, eos]])
# >>> [2, eos, eos]])
r_hyps
=
torch
.
concat
([
hyps
[:,
0
:
1
],
r_hyps
],
axis
=
1
)
r_hyps
=
paddle
.
concat
([
hyps
[:,
0
:
1
],
r_hyps
],
axis
=
1
)
# >>> r_hyps
# >>> r_hyps
# >>> tensor([[sos, 3, 2, 1],
# >>> tensor([[sos, 3, 2, 1],
# >>> [sos, 4, 8, 9],
# >>> [sos, 4, 8, 9],
...
@@ -791,7 +791,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
...
@@ -791,7 +791,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
decoder_out
,
_
=
self
.
decoder
(
encoder_out
,
encoder_mask
,
hyps
,
decoder_out
,
_
=
self
.
decoder
(
encoder_out
,
encoder_mask
,
hyps
,
hyps_lens
,
r_hyps
,
reverse_weight
)
hyps_lens
,
r_hyps
,
reverse_weight
)
decoder_out
=
paddle
.
nn
.
functional
.
log_softmax
(
decoder_out
,
axis
=-
1
)
decoder_out
=
paddle
.
nn
.
functional
.
log_softmax
(
decoder_out
,
axis
=-
1
)
r_decoder_out
=
torch
.
nn
.
functional
.
log_softmax
(
r_decoder_out
,
dim
=-
1
)
r_decoder_out
=
paddle
.
nn
.
functional
.
log_softmax
(
r_decoder_out
,
axis
=-
1
)
return
decoder_out
,
r_decoder_out
return
decoder_out
,
r_decoder_out
@
paddle
.
no_grad
()
@
paddle
.
no_grad
()
...
...
paddlespeech/s2t/modules/decoder.py
浏览文件 @
0a956894
...
@@ -363,9 +363,8 @@ class BiTransformerDecoder(BatchScorerInterface, nn.Layer):
...
@@ -363,9 +363,8 @@ class BiTransformerDecoder(BatchScorerInterface, nn.Layer):
memory: encoded memory, float32 (batch, maxlen_in, feat)
memory: encoded memory, float32 (batch, maxlen_in, feat)
memory_mask: encoded memory mask, (batch, 1, maxlen_in)
memory_mask: encoded memory mask, (batch, 1, maxlen_in)
tgt: input token ids, int64 (batch, maxlen_out)
tgt: input token ids, int64 (batch, maxlen_out)
tgt_mask: input token mask, (batch, maxlen_out)
tgt_mask: input token mask, (batch, maxlen_out, maxlen_out)
dtype=torch.uint8 in PyTorch 1.2-
dtype=paddle.bool
dtype=torch.bool in PyTorch 1.2+ (include 1.2)
cache: cached output list of (batch, max_time_out-1, size)
cache: cached output list of (batch, max_time_out-1, size)
Returns:
Returns:
y, cache: NN output value and cache per `self.decoders`.
y, cache: NN output value and cache per `self.decoders`.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录