Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
797ca389
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
1 年多 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
797ca389
编写于
9月 01, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
paddle support some bool op
上级
a463f940
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
25 addition
and
53 deletion
+25
-53
deepspeech/models/u2.py
deepspeech/models/u2.py
+3
-9
deepspeech/models/u2_st.py
deepspeech/models/u2_st.py
+2
-6
deepspeech/modules/attention.py
deepspeech/modules/attention.py
+2
-2
deepspeech/modules/decoder.py
deepspeech/modules/decoder.py
+2
-6
deepspeech/modules/encoder.py
deepspeech/modules/encoder.py
+1
-2
deepspeech/modules/loss.py
deepspeech/modules/loss.py
+4
-6
deepspeech/modules/mask.py
deepspeech/modules/mask.py
+4
-12
deepspeech/utils/tensor_utils.py
deepspeech/utils/tensor_utils.py
+2
-8
doc/src/reference.md
doc/src/reference.md
+3
-0
tests/mask_test.py
tests/mask_test.py
+2
-2
未找到文件。
deepspeech/models/u2.py
浏览文件 @
797ca389
...
...
@@ -162,10 +162,7 @@ class U2BaseModel(nn.Layer):
encoder_out
,
encoder_mask
=
self
.
encoder
(
speech
,
speech_lengths
)
encoder_time
=
time
.
time
()
-
start
#logger.debug(f"encoder time: {encoder_time}")
#TODO(Hui Zhang): sum not support bool type
#encoder_out_lens = encoder_mask.squeeze(1).sum(1) #[B, 1, T] -> [B]
encoder_out_lens
=
encoder_mask
.
squeeze
(
1
).
cast
(
paddle
.
int64
).
sum
(
1
)
#[B, 1, T] -> [B]
encoder_out_lens
=
encoder_mask
.
squeeze
(
1
).
sum
(
1
)
#[B, 1, T] -> [B]
# 2a. Attention-decoder branch
loss_att
=
None
...
...
@@ -320,8 +317,7 @@ class U2BaseModel(nn.Layer):
# 2. Decoder forward step by step
for
i
in
range
(
1
,
maxlen
+
1
):
# Stop if all batch and all beam produce eos
# TODO(Hui Zhang): if end_flag.sum() == running_size:
if
end_flag
.
cast
(
paddle
.
int64
).
sum
()
==
running_size
:
if
end_flag
.
sum
()
==
running_size
:
break
# 2.1 Forward decoder step
...
...
@@ -407,9 +403,7 @@ class U2BaseModel(nn.Layer):
speech
,
speech_lengths
,
decoding_chunk_size
,
num_decoding_left_chunks
,
simulate_streaming
)
maxlen
=
encoder_out
.
size
(
1
)
# (TODO Hui Zhang): bool no support reduce_sum
# encoder_out_lens = encoder_mask.squeeze(1).sum(1)
encoder_out_lens
=
encoder_mask
.
squeeze
(
1
).
astype
(
paddle
.
int
).
sum
(
1
)
encoder_out_lens
=
encoder_mask
.
squeeze
(
1
).
sum
(
1
)
ctc_probs
=
self
.
ctc
.
log_softmax
(
encoder_out
)
# (B, maxlen, vocab_size)
topk_prob
,
topk_index
=
ctc_probs
.
topk
(
1
,
axis
=
2
)
# (B, maxlen, 1)
...
...
deepspeech/models/u2_st.py
浏览文件 @
797ca389
...
...
@@ -163,10 +163,7 @@ class U2STBaseModel(nn.Layer):
encoder_out
,
encoder_mask
=
self
.
encoder
(
speech
,
speech_lengths
)
encoder_time
=
time
.
time
()
-
start
#logger.debug(f"encoder time: {encoder_time}")
#TODO(Hui Zhang): sum not support bool type
#encoder_out_lens = encoder_mask.squeeze(1).sum(1) #[B, 1, T] -> [B]
encoder_out_lens
=
encoder_mask
.
squeeze
(
1
).
cast
(
paddle
.
int64
).
sum
(
1
)
#[B, 1, T] -> [B]
encoder_out_lens
=
encoder_mask
.
squeeze
(
1
).
sum
(
1
)
#[B, 1, T] -> [B]
# 2a. ST-decoder branch
start
=
time
.
time
()
...
...
@@ -363,8 +360,7 @@ class U2STBaseModel(nn.Layer):
# 2. Decoder forward step by step
for
i
in
range
(
1
,
maxlen
+
1
):
# Stop if all batch and all beam produce eos
# TODO(Hui Zhang): if end_flag.sum() == running_size:
if
end_flag
.
cast
(
paddle
.
int64
).
sum
()
==
running_size
:
if
end_flag
.
sum
()
==
running_size
:
break
# 2.1 Forward decoder step
...
...
deepspeech/modules/attention.py
浏览文件 @
797ca389
...
...
@@ -109,8 +109,8 @@ class MultiHeadedAttention(nn.Layer):
p_attn
=
self
.
dropout
(
attn
)
x
=
paddle
.
matmul
(
p_attn
,
value
)
# (batch, head, time1, d_k)
x
=
x
.
transpose
([
0
,
2
,
1
,
3
]).
contiguous
().
view
(
n_batch
,
-
1
,
self
.
h
*
self
.
d_k
)
# (batch, time1, d_model)
x
=
x
.
transpose
([
0
,
2
,
1
,
3
]).
view
(
n_batch
,
-
1
,
self
.
h
*
self
.
d_k
)
# (batch, time1, d_model)
return
self
.
linear_out
(
x
)
# (batch, time1, d_model)
...
...
deepspeech/modules/decoder.py
浏览文件 @
797ca389
...
...
@@ -124,9 +124,7 @@ class TransformerDecoder(nn.Layer):
# m: (1, L, L)
m
=
subsequent_mask
(
tgt_mask
.
size
(
-
1
)).
unsqueeze
(
0
)
# tgt_mask: (B, L, L)
# TODO(Hui Zhang): not support & for tensor
# tgt_mask = tgt_mask & m
tgt_mask
=
tgt_mask
.
logical_and
(
m
)
tgt_mask
=
tgt_mask
&
m
x
,
_
=
self
.
embed
(
tgt
)
for
layer
in
self
.
decoders
:
...
...
@@ -137,9 +135,7 @@ class TransformerDecoder(nn.Layer):
if
self
.
use_output_layer
:
x
=
self
.
output_layer
(
x
)
# TODO(Hui Zhang): reduce_sum not support bool type
# olens = tgt_mask.sum(1)
olens
=
tgt_mask
.
astype
(
paddle
.
int
).
sum
(
1
)
olens
=
tgt_mask
.
sum
(
1
)
return
x
,
olens
def
forward_one_step
(
...
...
deepspeech/modules/encoder.py
浏览文件 @
797ca389
...
...
@@ -162,8 +162,7 @@ class BaseEncoder(nn.Layer):
xs
,
pos_emb
,
masks
=
self
.
embed
(
xs
,
masks
.
type_as
(
xs
),
offset
=
0
)
#TODO(Hui Zhang): remove mask.astype, stride_slice not support bool tensor
masks
=
masks
.
astype
(
paddle
.
bool
)
#TODO(Hui Zhang): mask_pad = ~masks
mask_pad
=
masks
.
logical_not
()
mask_pad
=
~
masks
chunk_masks
=
add_optional_chunk_mask
(
xs
,
masks
,
self
.
use_dynamic_chunk
,
self
.
use_dynamic_left_chunk
,
decoding_chunk_size
,
self
.
static_chunk_size
,
...
...
deepspeech/modules/loss.py
浏览文件 @
797ca389
...
...
@@ -124,9 +124,9 @@ class LabelSmoothingLoss(nn.Layer):
# use zeros_like instead of torch.no_grad() for true_dist,
# since no_grad() can not be exported by JIT
true_dist
=
paddle
.
full_like
(
x
,
self
.
smoothing
/
(
self
.
size
-
1
))
ignore
=
target
==
self
.
padding_idx
# (B,)
ignore
=
(
target
==
self
.
padding_idx
)
# (B,)
# target = target * (1 - ignore) # avoid -1 index
#
TODO(Hui Zhang):
target = target * (1 - ignore) # avoid -1 index
target
=
target
.
masked_fill
(
ignore
,
0
)
# avoid -1 index
# true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
target_mask
=
F
.
one_hot
(
target
,
self
.
size
)
...
...
@@ -135,10 +135,8 @@ class LabelSmoothingLoss(nn.Layer):
kl
=
self
.
criterion
(
F
.
log_softmax
(
x
,
axis
=
1
),
true_dist
)
#TODO(Hui Zhang): sum not support bool type
#total = len(target) - int(ignore.sum())
total
=
len
(
target
)
-
int
(
ignore
.
type_as
(
target
).
sum
())
total
=
len
(
target
)
-
int
(
ignore
.
sum
())
denom
=
total
if
self
.
normalize_length
else
B
#numer = (kl * (1 - ignore)).sum()
#
TODO(Hui Zhang):
numer = (kl * (1 - ignore)).sum()
numer
=
kl
.
masked_fill
(
ignore
.
unsqueeze
(
1
),
0
).
sum
()
return
numer
/
denom
deepspeech/modules/mask.py
浏览文件 @
797ca389
...
...
@@ -69,8 +69,7 @@ def make_non_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor:
[1, 1, 1, 0, 0],
[1, 1, 0, 0, 0]]
"""
#TODO(Hui Zhang): return ~make_pad_mask(lengths), not support ~
return
make_pad_mask
(
lengths
).
logical_not
()
return
~
make_pad_mask
(
lengths
)
def
subsequent_mask
(
size
:
int
)
->
paddle
.
Tensor
:
...
...
@@ -92,12 +91,7 @@ def subsequent_mask(size: int) -> paddle.Tensor:
[1, 1, 1]]
"""
ret
=
paddle
.
ones
([
size
,
size
],
dtype
=
paddle
.
bool
)
#TODO(Hui Zhang): tril not support bool
#return paddle.tril(ret)
ret
=
ret
.
astype
(
paddle
.
float
)
ret
=
paddle
.
tril
(
ret
)
ret
=
ret
.
astype
(
paddle
.
bool
)
return
ret
return
paddle
.
tril
(
ret
)
def
subsequent_chunk_mask
(
...
...
@@ -186,15 +180,13 @@ def add_optional_chunk_mask(xs: paddle.Tensor,
chunk_masks
=
subsequent_chunk_mask
(
xs
.
shape
[
1
],
chunk_size
,
num_left_chunks
)
# (L, L)
chunk_masks
=
chunk_masks
.
unsqueeze
(
0
)
# (1, L, L)
# chunk_masks = masks & chunk_masks # (B, L, L)
chunk_masks
=
masks
.
logical_and
(
chunk_masks
)
# (B, L, L)
chunk_masks
=
masks
&
chunk_masks
# (B, L, L)
elif
static_chunk_size
>
0
:
num_left_chunks
=
num_decoding_left_chunks
chunk_masks
=
subsequent_chunk_mask
(
xs
.
shape
[
1
],
static_chunk_size
,
num_left_chunks
)
# (L, L)
chunk_masks
=
chunk_masks
.
unsqueeze
(
0
)
# (1, L, L)
# chunk_masks = masks & chunk_masks # (B, L, L)
chunk_masks
=
masks
.
logical_and
(
chunk_masks
)
# (B, L, L)
chunk_masks
=
masks
&
chunk_masks
# (B, L, L)
else
:
chunk_masks
=
masks
return
chunk_masks
...
...
deepspeech/utils/tensor_utils.py
浏览文件 @
797ca389
...
...
@@ -168,13 +168,7 @@ def th_accuracy(pad_outputs: paddle.Tensor,
pad_pred
=
pad_outputs
.
view
(
pad_targets
.
size
(
0
),
pad_targets
.
size
(
1
),
pad_outputs
.
size
(
1
)).
argmax
(
2
)
mask
=
pad_targets
!=
ignore_label
#TODO(Hui Zhang): sum not support bool type
# numerator = paddle.sum(
# pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
numerator
=
(
numerator
=
paddle
.
sum
(
pad_pred
.
masked_select
(
mask
)
==
pad_targets
.
masked_select
(
mask
))
numerator
=
paddle
.
sum
(
numerator
.
type_as
(
pad_targets
))
#TODO(Hui Zhang): sum not support bool type
# denominator = paddle.sum(mask)
denominator
=
paddle
.
sum
(
mask
.
type_as
(
pad_targets
))
denominator
=
paddle
.
sum
(
mask
)
return
float
(
numerator
)
/
float
(
denominator
)
doc/src/reference.md
浏览文件 @
797ca389
# Reference
*
[
delta
](
https://github.com/Delta-ML/delta.git
)
*
[
espnet
](
https://github.com/espnet/espnet.git
)
*
[
kaldi
](
https://github.com/kaldi-asr/kaldi.git
)
*
[
wenet
](
https://github.com/mobvoi/wenet
)
tests/mask_test.py
浏览文件 @
797ca389
...
...
@@ -37,13 +37,13 @@ class TestU2Model(unittest.TestCase):
def
test_make_non_pad_mask
(
self
):
res
=
make_non_pad_mask
(
self
.
lengths
)
res2
=
make_pad_mask
(
self
.
lengths
).
logical_not
(
)
res2
=
~
make_pad_mask
(
self
.
lengths
)
self
.
assertSequenceEqual
(
res
.
numpy
().
tolist
(),
self
.
masks
.
tolist
())
self
.
assertSequenceEqual
(
res
.
numpy
().
tolist
(),
res2
.
numpy
().
tolist
())
def
test_make_pad_mask
(
self
):
res
=
make_pad_mask
(
self
.
lengths
)
res1
=
make_non_pad_mask
(
self
.
lengths
).
logical_not
(
)
res1
=
~
make_non_pad_mask
(
self
.
lengths
)
self
.
assertSequenceEqual
(
res
.
numpy
().
tolist
(),
self
.
pad_masks
.
tolist
())
self
.
assertSequenceEqual
(
res
.
numpy
().
tolist
(),
res1
.
tolist
())
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录