Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
1fb23815
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1fb23815
编写于
7月 05, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add export function for decoder
上级
660efceb
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
74 addition
and
2 deletion
+74
-2
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+3
-2
deepspeech/models/u2.py
deepspeech/models/u2.py
+29
-0
deepspeech/modules/decoder.py
deepspeech/modules/decoder.py
+42
-0
未找到文件。
deepspeech/exps/u2/model.py
浏览文件 @
1fb23815
...
@@ -610,10 +610,11 @@ class U2Tester(U2Trainer):
...
@@ -610,10 +610,11 @@ class U2Tester(U2Trainer):
input_spec
=
[
input_spec
=
[
paddle
.
static
.
InputSpec
(
paddle
.
static
.
InputSpec
(
shape
=
[
1
,
decoder_max_time
],
dtype
=
'int32'
),
# tgt
shape
=
[
1
,
decoder_max_time
],
dtype
=
'int32'
),
# tgt
paddle
.
static
.
InputSpec
(
shape
=
[
1
],
dtype
=
'int32'
),
# tgt_len
paddle
.
static
.
InputSpec
(
shape
=
[
1
,
decoder_max_time
],
dtype
=
'bool'
),
# tgt_mask
paddle
.
static
.
InputSpec
(
paddle
.
static
.
InputSpec
(
shape
=
[
1
,
encoder_max_time
,
encoder_model_size
],
shape
=
[
1
,
encoder_max_time
,
encoder_model_size
],
dtype
=
'
in
t32'
),
# encoder_out
dtype
=
'
floa
t32'
),
# encoder_out
])
])
logger
.
info
(
f
"Export code:
{
static_model
}
"
)
logger
.
info
(
f
"Export code:
{
static_model
}
"
)
...
...
deepspeech/models/u2.py
浏览文件 @
1fb23815
...
@@ -927,3 +927,32 @@ class U2InferModel(U2Model):
...
@@ -927,3 +927,32 @@ class U2InferModel(U2Model):
decoding_chunk_size
=
decoding_chunk_size
,
decoding_chunk_size
=
decoding_chunk_size
,
num_decoding_left_chunks
=
num_decoding_left_chunks
,
num_decoding_left_chunks
=
num_decoding_left_chunks
,
simulate_streaming
=
simulate_streaming
)
simulate_streaming
=
simulate_streaming
)
def
forward_attention_decoder
(
self
,
hyps
:
paddle
.
Tensor
,
hyps_masks
:
paddle
.
Tensor
,
encoder_out
:
paddle
.
Tensor
,
)
->
paddle
.
Tensor
:
""" Export interface for c++ call, forward decoder with multiple
hypothesis from ctc prefix beam search and one encoder output
Args:
hyps (paddle.Tensor): hyps from ctc prefix beam search, already
pad sos at the begining, (B, U)
hyps_masks (paddle.Tensor): length of each hyp in hyps, (B, U)
encoder_out (paddle.Tensor): corresponding encoder output, (B=1, T, D)
Returns:
paddle.Tensor: decoder output, (B, V)
"""
assert
encoder_out
.
shape
[
0
]
==
1
num_hyps
=
hyps
.
shape
[
0
]
assert
hyps_masks
.
shape
[
0
]
==
num_hyps
# encoder_out = encoder_out.repeat(num_hyps, 1, 1)
encoder_out
=
encoder_out
.
tile
([
num_hyps
,
1
,
1
])
# (B, 1, T)
encoder_mask
=
paddle
.
ones
(
[
num_hyps
,
1
,
encoder_out
.
shape
[
1
]],
dtype
=
paddle
.
bool
)
# (num_hyps, max_hyps_len, vocab_size)
decoder_out
,
_
=
self
.
decoder
.
export
(
encoder_out
,
encoder_mask
,
hyps
,
hyps_masks
)
decoder_out
=
paddle
.
nn
.
functional
.
log_softmax
(
decoder_out
,
dim
=-
1
)
return
decoder_out
deepspeech/modules/decoder.py
浏览文件 @
1fb23815
...
@@ -185,3 +185,45 @@ class TransformerDecoder(nn.Module):
...
@@ -185,3 +185,45 @@ class TransformerDecoder(nn.Module):
y
=
paddle
.
log_softmax
(
self
.
output_layer
(
y
),
axis
=-
1
)
y
=
paddle
.
log_softmax
(
self
.
output_layer
(
y
),
axis
=-
1
)
return
y
,
new_cache
return
y
,
new_cache
def
export
(
self
,
memory
:
paddle
.
Tensor
,
memory_mask
:
paddle
.
Tensor
,
ys_in_pad
:
paddle
.
Tensor
,
ys_in_mask
:
paddle
.
Tensor
,
)
->
Tuple
[
paddle
.
Tensor
,
paddle
.
Tensor
]:
"""Forward decoder.
Args:
memory: encoded memory, float32 (batch, maxlen_in, feat)
memory_mask: encoder memory mask, (batch, 1, maxlen_in)
ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
ys_in_mask: input mask of this batch (batch, maxlen_out)
Returns:
(tuple): tuple containing:
x: decoded token score before softmax (batch, maxlen_out, vocab_size)
if use_output_layer is True,
olens: (batch, )
"""
tgt
=
ys_in_pad
# tgt_mask: (B, 1, L)
tgt_mask
=
ys_in_mask
.
unsqueeze
(
1
)
# m: (1, L, L)
m
=
subsequent_mask
(
tgt_mask
.
shape
[
-
1
]).
unsqueeze
(
0
)
# tgt_mask: (B, L, L)
# TODO(Hui Zhang): not support & for tensor
# tgt_mask = tgt_mask & m
tgt_mask
=
tgt_mask
.
logical_and
(
m
)
x
,
_
=
self
.
embed
(
tgt
)
for
layer
in
self
.
decoders
:
x
,
tgt_mask
,
memory
,
memory_mask
=
layer
(
x
,
tgt_mask
,
memory
,
memory_mask
)
if
self
.
normalize_before
:
x
=
self
.
after_norm
(
x
)
if
self
.
use_output_layer
:
x
=
self
.
output_layer
(
x
)
# TODO(Hui Zhang): reduce_sum not support bool type
# olens = tgt_mask.sum(1)
olens
=
tgt_mask
.
astype
(
paddle
.
int
).
sum
(
1
)
return
x
,
olens
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录