Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
e1888f9a
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e1888f9a
编写于
5月 24, 2022
作者:
H
huangyuxin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove size,test=asr
上级
1cdd41bd
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
27 addition
and
47 deletion
+27
-47
paddlespeech/s2t/__init__.py
paddlespeech/s2t/__init__.py
+0
-19
paddlespeech/s2t/decoders/beam_search/beam_search.py
paddlespeech/s2t/decoders/beam_search/beam_search.py
+5
-5
paddlespeech/s2t/decoders/scorers/ctc.py
paddlespeech/s2t/decoders/scorers/ctc.py
+2
-2
paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
+13
-14
paddlespeech/s2t/models/u2/u2.py
paddlespeech/s2t/models/u2/u2.py
+1
-1
paddlespeech/s2t/modules/decoder.py
paddlespeech/s2t/modules/decoder.py
+1
-1
paddlespeech/s2t/modules/embedding.py
paddlespeech/s2t/modules/embedding.py
+2
-2
paddlespeech/s2t/utils/tensor_utils.py
paddlespeech/s2t/utils/tensor_utils.py
+3
-3
未找到文件。
paddlespeech/s2t/__init__.py
浏览文件 @
e1888f9a
...
...
@@ -189,25 +189,6 @@ if not hasattr(paddle.Tensor, 'contiguous'):
paddle
.
static
.
Variable
.
contiguous
=
contiguous
def
size
(
xs
:
paddle
.
Tensor
,
*
args
:
int
)
->
paddle
.
Tensor
:
nargs
=
len
(
args
)
assert
(
nargs
<=
1
)
s
=
paddle
.
shape
(
xs
)
if
nargs
==
1
:
return
s
[
args
[
0
]]
else
:
return
s
#`to_static` do not process `size` property, maybe some `paddle` api dependent on it.
logger
.
debug
(
"override size of paddle.Tensor "
"(`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!"
)
paddle
.
Tensor
.
size
=
size
paddle
.
static
.
Variable
.
size
=
size
def
view
(
xs
:
paddle
.
Tensor
,
*
args
:
int
)
->
paddle
.
Tensor
:
return
xs
.
reshape
(
args
)
...
...
paddlespeech/s2t/decoders/beam_search/beam_search.py
浏览文件 @
e1888f9a
...
...
@@ -194,7 +194,7 @@ class BeamSearch(paddle.nn.Layer):
Args:
hyp (Hypothesis): Hypothesis with prefix tokens to score
ids (paddle.Tensor): 1D tensor of new partial tokens to score,
ids (paddle.Tensor): 1D tensor of new partial tokens to score,
len(ids) < n_vocab
x (paddle.Tensor): Corresponding input feature, (T, D)
...
...
@@ -224,14 +224,14 @@ class BeamSearch(paddle.nn.Layer):
ids (paddle.Tensor): The partial token ids(Global) to compute topk.
Returns:
Tuple[paddle.Tensor, paddle.Tensor]:
Tuple[paddle.Tensor, paddle.Tensor]:
The topk full token ids and partial token ids.
Their shapes are `(self.beam_size,)`.
i.e. (global ids, global relative local ids).
"""
# no pre beam performed, `ids` equal to `weighted_scores`
if
weighted_scores
.
s
ize
(
0
)
==
ids
.
size
(
0
)
:
if
weighted_scores
.
s
hape
[
0
]
==
ids
.
shape
[
0
]
:
top_ids
=
weighted_scores
.
topk
(
self
.
beam_size
)[
1
]
# index in n_vocab
return
top_ids
,
top_ids
...
...
@@ -374,8 +374,8 @@ class BeamSearch(paddle.nn.Layer):
elif
maxlenratio
<
0
:
maxlen
=
-
1
*
int
(
maxlenratio
)
else
:
maxlen
=
max
(
1
,
int
(
maxlenratio
*
x
.
s
ize
(
0
)
))
minlen
=
int
(
minlenratio
*
x
.
s
ize
(
0
)
)
maxlen
=
max
(
1
,
int
(
maxlenratio
*
x
.
s
hape
[
0
]
))
minlen
=
int
(
minlenratio
*
x
.
s
hape
[
0
]
)
logger
.
info
(
"decoder input length: "
+
str
(
x
.
shape
[
0
]))
logger
.
info
(
"max output length: "
+
str
(
maxlen
))
logger
.
info
(
"min output length: "
+
str
(
minlen
))
...
...
paddlespeech/s2t/decoders/scorers/ctc.py
浏览文件 @
e1888f9a
...
...
@@ -69,7 +69,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
return
sc
[
i
],
st
[
i
]
else
:
# for CTCPrefixScorePD (need new_id > 0)
r
,
log_psi
,
f_min
,
f_max
,
scoring_idmap
=
state
s
=
log_psi
[
i
,
new_id
].
expand
(
log_psi
.
s
ize
(
1
)
)
s
=
log_psi
[
i
,
new_id
].
expand
(
log_psi
.
s
hape
[
1
]
)
if
scoring_idmap
is
not
None
:
return
r
[:,
:,
i
,
scoring_idmap
[
i
,
new_id
]],
s
,
f_min
,
f_max
else
:
...
...
@@ -107,7 +107,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
"""
logp
=
self
.
ctc
.
log_softmax
(
x
.
unsqueeze
(
0
))
# assuming batch_size = 1
xlen
=
paddle
.
to_tensor
([
logp
.
s
ize
(
1
)
])
xlen
=
paddle
.
to_tensor
([
logp
.
s
hape
[
1
]
])
self
.
impl
=
CTCPrefixScorePD
(
logp
,
xlen
,
0
,
self
.
eos
)
return
None
...
...
paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
浏览文件 @
e1888f9a
...
...
@@ -33,9 +33,9 @@ class CTCPrefixScorePD():
self
.
logzero
=
-
10000000000.0
self
.
blank
=
blank
self
.
eos
=
eos
self
.
batch
=
x
.
s
ize
(
0
)
self
.
input_length
=
x
.
s
ize
(
1
)
self
.
odim
=
x
.
s
ize
(
2
)
self
.
batch
=
x
.
s
hape
[
0
]
self
.
input_length
=
x
.
s
hape
[
1
]
self
.
odim
=
x
.
s
hape
[
2
]
self
.
dtype
=
x
.
dtype
# Pad the rest of posteriors in the batch
...
...
@@ -76,8 +76,7 @@ class CTCPrefixScorePD():
last_ids
=
[
yi
[
-
1
]
for
yi
in
y
]
# last output label ids
n_bh
=
len
(
last_ids
)
# batch * hyps
n_hyps
=
n_bh
//
self
.
batch
# assuming each utterance has the same # of hyps
self
.
scoring_num
=
scoring_ids
.
size
(
-
1
)
if
scoring_ids
is
not
None
else
0
self
.
scoring_num
=
scoring_ids
.
shape
[
-
1
]
if
scoring_ids
is
not
None
else
0
# prepare state info
if
state
is
None
:
r_prev
=
paddle
.
full
(
...
...
@@ -153,7 +152,7 @@ class CTCPrefixScorePD():
# compute forward probabilities log(r_t^n(h)) and log(r_t^b(h))
for
t
in
range
(
start
,
end
):
rp
=
r
[
t
-
1
]
# (2 x BW x O')
rp
=
r
[
t
-
1
]
# (2 x BW x O')
rr
=
paddle
.
stack
([
rp
[
0
],
log_phi
[
t
-
1
],
rp
[
0
],
rp
[
1
]]).
view
(
2
,
2
,
n_bh
,
snum
)
# (2,2,BW,O')
r
[
t
]
=
paddle
.
logsumexp
(
rr
,
1
)
+
x_
[:,
t
]
...
...
@@ -227,7 +226,7 @@ class CTCPrefixScorePD():
if
self
.
x
.
shape
[
1
]
<
x
.
shape
[
1
]:
# self.x (2,T,B,O); x (B,T,O)
# Pad the rest of posteriors in the batch
# TODO(takaaki-hori): need a better way without for-loops
xlens
=
[
x
.
s
ize
(
1
)
]
xlens
=
[
x
.
s
hape
[
1
]
]
for
i
,
l
in
enumerate
(
xlens
):
if
l
<
self
.
input_length
:
x
[
i
,
l
:,
:]
=
self
.
logzero
...
...
@@ -237,7 +236,7 @@ class CTCPrefixScorePD():
xb
=
xn
[:,
:,
self
.
blank
].
unsqueeze
(
2
).
expand
(
-
1
,
-
1
,
self
.
odim
)
self
.
x
=
paddle
.
stack
([
xn
,
xb
])
# (2, T, B, O)
self
.
x
[:,
:
tmp_x
.
shape
[
1
],
:,
:]
=
tmp_x
self
.
input_length
=
x
.
s
ize
(
1
)
self
.
input_length
=
x
.
s
hape
[
1
]
self
.
end_frames
=
paddle
.
to_tensor
(
xlens
)
-
1
def
extend_state
(
self
,
state
):
...
...
@@ -318,16 +317,16 @@ class CTCPrefixScore():
r
[
0
,
0
]
=
xs
[
0
]
r
[
0
,
1
]
=
self
.
logzero
else
:
# Although the code does not exactly follow Algorithm 2,
# we don't have to change it because we can assume
# r_t(h)=0 for t < |h| in CTC forward computation
# Although the code does not exactly follow Algorithm 2,
# we don't have to change it because we can assume
# r_t(h)=0 for t < |h| in CTC forward computation
# (Note: we assume here that index t starts with 0).
# The purpose of this difference is to reduce the number of for-loops.
# https://github.com/espnet/espnet/pull/3655
# where we start to accumulate r_t(h) from t=|h|
# and iterate r_t(h) = (r_{t-1}(h) + ...) to T-1,
# where we start to accumulate r_t(h) from t=|h|
# and iterate r_t(h) = (r_{t-1}(h) + ...) to T-1,
# avoiding accumulating zeros for t=1~|h|-1.
# Thus, we need to set r_{|h|-1}(h) = 0,
# Thus, we need to set r_{|h|-1}(h) = 0,
# i.e., r[output_length-1] = logzero, for initialization.
# This is just for reducing the computation.
r
[
output_length
-
1
]
=
self
.
logzero
...
...
paddlespeech/s2t/models/u2/u2.py
浏览文件 @
e1888f9a
...
...
@@ -775,7 +775,7 @@ class U2DecodeModel(U2BaseModel):
"""
self
.
eval
()
x
=
paddle
.
to_tensor
(
x
).
unsqueeze
(
0
)
ilen
=
x
.
s
ize
(
1
)
ilen
=
x
.
s
hape
[
1
]
enc_output
,
_
=
self
.
_forward_encoder
(
x
,
ilen
)
return
enc_output
.
squeeze
(
0
)
...
...
paddlespeech/s2t/modules/decoder.py
浏览文件 @
e1888f9a
...
...
@@ -242,7 +242,7 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer):
]
# batch decoding
ys_mask
=
subsequent_mask
(
ys
.
s
ize
(
-
1
)
).
unsqueeze
(
0
)
# (B,L,L)
ys_mask
=
subsequent_mask
(
ys
.
s
hape
[
-
1
]
).
unsqueeze
(
0
)
# (B,L,L)
xs_mask
=
make_xs_mask
(
xs
).
unsqueeze
(
1
)
# (B,1,T)
logp
,
states
=
self
.
forward_one_step
(
xs
,
xs_mask
,
ys
,
ys_mask
,
cache
=
batch_state
)
...
...
paddlespeech/s2t/modules/embedding.py
浏览文件 @
e1888f9a
...
...
@@ -115,7 +115,7 @@ class PositionalEncoding(nn.Layer, PositionalEncodingInterface):
assert
offset
+
x
.
shape
[
1
]
<
self
.
max_len
,
"offset: {} + x.shape[1]: {} is larger than the max_len: {}"
.
format
(
offset
,
x
.
shape
[
1
],
self
.
max_len
)
#TODO(Hui Zhang): using T = x.s
ize(1)
, __getitem__ not support Tensor
#TODO(Hui Zhang): using T = x.s
hape[1]
, __getitem__ not support Tensor
pos_emb
=
self
.
pe
[:,
offset
:
offset
+
T
]
x
=
x
*
self
.
xscale
+
pos_emb
return
self
.
dropout
(
x
),
self
.
dropout
(
pos_emb
)
...
...
@@ -165,6 +165,6 @@ class RelPositionalEncoding(PositionalEncoding):
1
]
<
self
.
max_len
,
"offset: {} + x.shape[1]: {} is larger than the max_len: {}"
.
format
(
offset
,
x
.
shape
[
1
],
self
.
max_len
)
x
=
x
*
self
.
xscale
#TODO(Hui Zhang): using x.s
ize(1)
, __getitem__ not support Tensor
#TODO(Hui Zhang): using x.s
hape[1]
, __getitem__ not support Tensor
pos_emb
=
self
.
pe
[:,
offset
:
offset
+
x
.
shape
[
1
]]
return
self
.
dropout
(
x
),
self
.
dropout
(
pos_emb
)
paddlespeech/s2t/utils/tensor_utils.py
浏览文件 @
e1888f9a
...
...
@@ -58,8 +58,8 @@ def pad_sequence(sequences: List[paddle.Tensor],
>>> a = paddle.ones(25, 300)
>>> b = paddle.ones(22, 300)
>>> c = paddle.ones(15, 300)
>>> pad_sequence([a, b, c]).s
ize()
paddle.Tensor([25, 3, 300])
>>> pad_sequence([a, b, c]).s
hape
[25, 3, 300]
Note:
This function returns a Tensor of size ``T x B x *`` or ``B x T x *``
...
...
@@ -79,7 +79,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
# assuming trailing dimensions and type of all the Tensors
# in sequences are same and fetching those from sequences[0]
max_size
=
sequences
[
0
].
s
ize
()
max_size
=
sequences
[
0
].
s
hape
# (TODO Hui Zhang): slice not supprot `end==start`
# trailing_dims = max_size[1:]
trailing_dims
=
tuple
(
max_size
[
1
:].
numpy
().
tolist
())
if
sequences
[
0
].
ndim
>=
2
else
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录