Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
42a5bdf8
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
42a5bdf8
编写于
7月 13, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
encoder offline export
上级
1bc4acfd
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
91 addition
and
11 deletion
+91
-11
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+48
-11
deepspeech/modules/encoder.py
deepspeech/modules/encoder.py
+43
-0
未找到文件。
deepspeech/exps/u2/model.py
浏览文件 @
42a5bdf8
...
...
@@ -602,24 +602,61 @@ class U2Tester(U2Trainer):
infer_model
.
eval
()
#static_model = paddle.jit.to_static(infer_model., input_spec=input_spec)
decoder_max_time
=
100
encoder_max_time
=
None
encoder_model_size
=
256
logger
.
info
(
f
"subsampling_rate:
{
infer_model
.
subsampling_rate
}
"
)
logger
.
info
(
f
"right_context:
{
infer_model
.
right_context
}
"
)
logger
.
info
(
f
"sos_symbol:
{
infer_model
.
sos_symbol
}
"
)
logger
.
info
(
f
"eos_symbol:
{
infer_model
.
eos_symbol
}
"
)
logger
.
info
(
f
"model_size:
{
infer_model
.
encoder
.
output_size
}
"
)
encoder_model_size
=
infer_model
.
encoder
.
output_size
# export encoder
# speech (paddle.Tensor): [B, Tmax, D]
# speech_lengths (paddle.Tensor): [B]
# decoding_chunk_size (int, optional): chuck size. Defaults to -1.
# num_decoding_left_chunks (int, optional): nums chunks. Defaults to -1.
# simulate_streaming (bool, optional): streaming or not. Defaults to False.
#encoder hiddens (B, Tmax, D),
#encoder hiddens mask (B, 1, Tmax).
encoder_input_dim
=
80
static_model
=
paddle
.
jit
.
to_static
(
infer_model
.
forward_attention_decoder
,
infer_model
.
encoder
.
forward_export
,
input_spec
=
[
paddle
.
static
.
InputSpec
(
shape
=
[
1
,
decoder_max_time
],
dtype
=
'int32'
),
# tgt
paddle
.
static
.
InputSpec
(
shape
=
[
1
,
decoder_max_time
],
dtype
=
'bool'
),
# tgt_mask
paddle
.
static
.
InputSpec
(
shape
=
[
1
,
encoder_max_time
,
encoder_model_size
]
,
dtype
=
'float32'
),
# encoder_out
shape
=
[
1
,
None
,
encoder_input_dim
],
dtype
=
'float32'
),
# speech, [B, U]
paddle
.
static
.
InputSpec
(
shape
=
[
1
,
None
],
dtype
=
'bool'
),
# speech_mask, [B, U],
-
1
,
-
1
])
logger
.
info
(
f
"Export code:
{
static_model
.
main_program
}
"
)
logger
.
debug
(
f
"Export Code:
{
dir
(
static_model
)
}
"
)
logger
.
debug
(
f
"Export Porgram:
{
static_model
.
main_program
}
"
)
paddle
.
jit
.
save
(
static_model
,
self
.
args
.
export_path
)
# # export decoder
# decoder_max_time = 100
# encoder_max_time = None
# static_model = paddle.jit.to_static(
# infer_model.forward_attention_decoder,
# input_spec=[
# paddle.static.InputSpec(
# shape=[1, decoder_max_time], dtype='int32'), # tgt, [B, U]
# paddle.static.InputSpec(
# shape=[1, decoder_max_time],
# dtype='bool'), # tgt_mask, [B, U]
# paddle.static.InputSpec(
# shape=[1, encoder_max_time, encoder_model_size],
# dtype='float32'), # encoder_out, [B, T, D]
# ])
# logger.debug(f"Export Code: {static_model.code}")
# logger.debug(f"Export Porgram: {static_model.main_program}")
# paddle.jit.save(static_model, self.args.export_path)
def
run_export
(
self
):
try
:
self
.
export
()
...
...
deepspeech/modules/encoder.py
浏览文件 @
42a5bdf8
...
...
@@ -451,3 +451,46 @@ class ConformerEncoder(BaseEncoder):
normalize_before
=
normalize_before
,
concat_after
=
concat_after
)
for
_
in
range
(
num_blocks
)
])
def
forward_export
(
self
,
xs
:
paddle
.
Tensor
,
masks
:
paddle
.
Tensor
,
decoding_chunk_size
:
int
=
0
,
num_decoding_left_chunks
:
int
=-
1
,
)
->
Tuple
[
paddle
.
Tensor
,
paddle
.
Tensor
]:
"""Embed positions in tensor.
Args:
xs: padded input tensor (B, L, D)
masks: input length (B, L)
decoding_chunk_size: decoding chunk size for dynamic chunk
0: default for training, use random dynamic chunk.
<0: for decoding, use full chunk.
>0: for decoding, use fixed chunk size as set.
num_decoding_left_chunks: number of left chunks, this is for decoding,
the chunk size is decoding_chunk_size.
>=0: use num_decoding_left_chunks
<0: use all left chunks
Returns:
encoder output tensor, lens and mask
"""
masks
=
masks
.
unsqueeze
(
1
)
# (B, 1, L)
if
self
.
global_cmvn
is
not
None
:
xs
=
self
.
global_cmvn
(
xs
)
#TODO(Hui Zhang): self.embed(xs, masks, offset=0), stride_slice not support bool tensor
xs
,
pos_emb
,
masks
=
self
.
embed
(
xs
,
masks
.
astype
(
xs
.
dtype
),
offset
=
0
)
#TODO(Hui Zhang): remove mask.astype, stride_slice not support bool tensor
masks
=
masks
.
astype
(
paddle
.
bool
)
#TODO(Hui Zhang): mask_pad = ~masks
mask_pad
=
masks
.
logical_not
()
chunk_masks
=
masks
for
layer
in
self
.
encoders
:
xs
,
chunk_masks
,
_
=
layer
(
xs
,
chunk_masks
,
pos_emb
,
mask_pad
)
if
self
.
normalize_before
:
xs
=
self
.
after_norm
(
xs
)
# Here we assume the mask is not changed in encoder layers, so just
# return the masks before encoder layers, and the masks will be used
# for cross attention with decoder later
return
xs
,
masks
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录