Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
258956c9
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
1 年多 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
258956c9
编写于
7月 01, 2021
作者:
Z
zhangyinhui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Added some paddle.jit.save debug code cases
上级
43b52082
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
49 addition
and
46 deletion
+49
-46
deepspeech/models/u2.py
deepspeech/models/u2.py
+34
-36
deepspeech/modules/attention.py
deepspeech/modules/attention.py
+11
-7
deepspeech/modules/embedding.py
deepspeech/modules/embedding.py
+1
-1
deepspeech/modules/encoder.py
deepspeech/modules/encoder.py
+1
-1
deepspeech/modules/subsampling.py
deepspeech/modules/subsampling.py
+2
-1
未找到文件。
deepspeech/models/u2.py
浏览文件 @
258956c9
...
...
@@ -599,30 +599,30 @@ class U2BaseModel(nn.Module):
best_index
=
i
return
hyps
[
best_index
][
0
]
@
jit
.
export
def
subsampling_rate
(
self
)
->
int
:
""" Export interface for c++ call, return subsampling_rate of the
model
"""
return
self
.
encoder
.
embed
.
subsampling_rate
@
jit
.
export
def
right_context
(
self
)
->
int
:
""" Export interface for c++ call, return right_context of the model
"""
return
self
.
encoder
.
embed
.
right_context
@
jit
.
export
def
sos_symbol
(
self
)
->
int
:
""" Export interface for c++ call, return sos symbol id of the model
"""
return
self
.
sos
@
jit
.
export
def
eos_symbol
(
self
)
->
int
:
""" Export interface for c++ call, return eos symbol id of the model
"""
return
self
.
eos
#
@jit.export
#
def subsampling_rate(self) -> int:
#
""" Export interface for c++ call, return subsampling_rate of the
#
model
#
"""
#
return self.encoder.embed.subsampling_rate
#
@jit.export
#
def right_context(self) -> int:
#
""" Export interface for c++ call, return right_context of the model
#
"""
#
return self.encoder.embed.right_context
#
@jit.export
#
def sos_symbol(self) -> int:
#
""" Export interface for c++ call, return sos symbol id of the model
#
"""
#
return self.sos
#
@jit.export
#
def eos_symbol(self) -> int:
#
""" Export interface for c++ call, return eos symbol id of the model
#
"""
#
return self.eos
@
jit
.
export
def
forward_encoder_chunk
(
...
...
@@ -654,16 +654,16 @@ class U2BaseModel(nn.Module):
xs
,
offset
,
required_cache_size
,
subsampling_cache
,
elayers_output_cache
,
conformer_cnn_cache
)
@
jit
.
export
def
ctc_activation
(
self
,
xs
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
""" Export interface for c++ call, apply linear transform and log
softmax before ctc
Args:
xs (paddle.Tensor): encoder output
Returns:
paddle.Tensor: activation before ctc
"""
return
self
.
ctc
.
log_softmax
(
xs
)
#
@jit.export
#
def ctc_activation(self, xs: paddle.Tensor) -> paddle.Tensor:
#
""" Export interface for c++ call, apply linear transform and log
#
softmax before ctc
#
Args:
#
xs (paddle.Tensor): encoder output
#
Returns:
#
paddle.Tensor: activation before ctc
#
"""
#
return self.ctc.log_softmax(xs)
@
jit
.
export
def
forward_attention_decoder
(
...
...
@@ -878,12 +878,10 @@ class U2Model(U2BaseModel):
@
classmethod
def
from_pretrained
(
cls
,
dataloader
,
config
,
checkpoint_path
):
"""Build a DeepSpeech2Model model from a pretrained model.
Args:
dataloader (paddle.io.DataLoader): not used.
config (yacs.config.CfgNode): model configs
checkpoint_path (Path or str): the path of pretrained model checkpoint, without extension name
Returns:
DeepSpeech2Model: The model built from pretrained result.
"""
...
...
deepspeech/modules/attention.py
浏览文件 @
258956c9
...
...
@@ -70,10 +70,11 @@ class MultiHeadedAttention(nn.Layer):
paddle.Tensor: Transformed value tensor, size
(#batch, n_head, time2, d_k).
"""
n_batch
=
query
.
size
(
0
)
q
=
self
.
linear_q
(
query
).
view
(
n_batch
,
-
1
,
self
.
h
,
self
.
d_k
)
k
=
self
.
linear_k
(
key
).
view
(
n_batch
,
-
1
,
self
.
h
,
self
.
d_k
)
v
=
self
.
linear_v
(
value
).
view
(
n_batch
,
-
1
,
self
.
h
,
self
.
d_k
)
# n_batch = query.size(0)
n_batch
=
query
.
shape
[
0
]
q
=
self
.
linear_q
(
query
).
reshape
([
n_batch
,
-
1
,
self
.
h
,
self
.
d_k
])
k
=
self
.
linear_k
(
key
).
reshape
([
n_batch
,
-
1
,
self
.
h
,
self
.
d_k
])
v
=
self
.
linear_v
(
value
).
reshape
([
n_batch
,
-
1
,
self
.
h
,
self
.
d_k
])
q
=
q
.
transpose
([
0
,
2
,
1
,
3
])
# (batch, head, time1, d_k)
k
=
k
.
transpose
([
0
,
2
,
1
,
3
])
# (batch, head, time2, d_k)
v
=
v
.
transpose
([
0
,
2
,
1
,
3
])
# (batch, head, time2, d_k)
...
...
@@ -96,7 +97,8 @@ class MultiHeadedAttention(nn.Layer):
paddle.Tensor: Transformed value weighted
by the attention score, (#batch, time1, d_model).
"""
n_batch
=
value
.
size
(
0
)
# n_batch = value.size(0)
n_batch
=
value
.
shape
[
0
]
if
mask
is
not
None
:
mask
=
mask
.
unsqueeze
(
1
).
eq
(
0
)
# (batch, 1, *, time2)
scores
=
scores
.
masked_fill
(
mask
,
-
float
(
'inf'
))
...
...
@@ -205,8 +207,10 @@ class RelPositionMultiHeadedAttention(MultiHeadedAttention):
q
,
k
,
v
=
self
.
forward_qkv
(
query
,
key
,
value
)
q
=
q
.
transpose
([
0
,
2
,
1
,
3
])
# (batch, time1, head, d_k)
n_batch_pos
=
pos_emb
.
size
(
0
)
p
=
self
.
linear_pos
(
pos_emb
).
view
(
n_batch_pos
,
-
1
,
self
.
h
,
self
.
d_k
)
#n_batch_pos = pos_emb.size(0)
n_batch_pos
=
pos_emb
.
shape
[
0
]
# p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
p
=
self
.
linear_pos
(
pos_emb
).
reshape
([
n_batch_pos
,
-
1
,
self
.
h
,
self
.
d_k
])
p
=
p
.
transpose
([
0
,
2
,
1
,
3
])
# (batch, head, time1, d_k)
# (batch, head, time1, d_k)
...
...
deepspeech/modules/embedding.py
浏览文件 @
258956c9
...
...
@@ -114,7 +114,7 @@ class RelPositionalEncoding(PositionalEncoding):
paddle.Tensor: Encoded tensor (batch, time, `*`).
paddle.Tensor: Positional embedding tensor (1, time, `*`).
"""
assert
offset
+
x
.
s
ize
(
1
)
<
self
.
max_len
assert
offset
+
x
.
s
hape
[
1
]
<
self
.
max_len
x
=
x
*
self
.
xscale
#TODO(Hui Zhang): using x.size(1), __getitem__ not support Tensor
pos_emb
=
self
.
pe
[:,
offset
:
offset
+
x
.
shape
[
1
]]
...
...
deepspeech/modules/encoder.py
浏览文件 @
258956c9
...
...
@@ -159,7 +159,7 @@ class BaseEncoder(nn.Layer):
if
self
.
global_cmvn
is
not
None
:
xs
=
self
.
global_cmvn
(
xs
)
#TODO(Hui Zhang): self.embed(xs, masks, offset=0), stride_slice not support bool tensor
xs
,
pos_emb
,
masks
=
self
.
embed
(
xs
,
masks
.
type_as
(
xs
),
offset
=
0
)
xs
,
pos_emb
,
masks
=
self
.
embed
(
xs
,
masks
.
astype
(
xs
.
dtype
),
offset
=
0
)
#TODO(Hui Zhang): remove mask.astype, stride_slice not support bool tensor
masks
=
masks
.
astype
(
paddle
.
bool
)
#TODO(Hui Zhang): mask_pad = ~masks
...
...
deepspeech/modules/subsampling.py
浏览文件 @
258956c9
...
...
@@ -128,7 +128,8 @@ class Conv2dSubsampling4(BaseSubsampling):
"""
x
=
x
.
unsqueeze
(
1
)
# (b, c=1, t, f)
x
=
self
.
conv
(
x
)
b
,
c
,
t
,
f
=
paddle
.
shape
(
x
)
#import pdb;pdb.set_trace()
b
,
c
,
t
,
f
=
x
.
shape
x
=
self
.
out
(
x
.
transpose
([
0
,
2
,
1
,
3
]).
reshape
([
b
,
t
,
c
*
f
]))
x
,
pos_emb
=
self
.
pos_enc
(
x
,
offset
)
return
x
,
pos_emb
,
x_mask
[:,
:,
:
-
2
:
2
][:,
:,
:
-
2
:
2
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录