Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
b23bde8e
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b23bde8e
编写于
5月 25, 2022
作者:
H
huangyuxin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
tensor.shape => paddle.shape(tensor)
上级
4c09927f
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
24 addition
and
24 deletion
+24
-24
paddlespeech/s2t/__init__.py
paddlespeech/s2t/__init__.py
+1
-1
paddlespeech/s2t/decoders/beam_search/beam_search.py
paddlespeech/s2t/decoders/beam_search/beam_search.py
+5
-5
paddlespeech/s2t/decoders/scorers/ctc.py
paddlespeech/s2t/decoders/scorers/ctc.py
+2
-2
paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
+6
-6
paddlespeech/s2t/models/lm/transformer.py
paddlespeech/s2t/models/lm/transformer.py
+3
-3
paddlespeech/s2t/models/u2/u2.py
paddlespeech/s2t/models/u2/u2.py
+1
-1
paddlespeech/s2t/modules/decoder.py
paddlespeech/s2t/modules/decoder.py
+1
-1
paddlespeech/s2t/modules/embedding.py
paddlespeech/s2t/modules/embedding.py
+2
-2
paddlespeech/s2t/modules/encoder.py
paddlespeech/s2t/modules/encoder.py
+1
-1
paddlespeech/s2t/utils/tensor_utils.py
paddlespeech/s2t/utils/tensor_utils.py
+2
-2
未找到文件。
paddlespeech/s2t/__init__.py
浏览文件 @
b23bde8e
...
...
@@ -200,7 +200,7 @@ if not hasattr(paddle.Tensor, 'view'):
def
view_as
(
xs
:
paddle
.
Tensor
,
ys
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
return
xs
.
reshape
(
ys
.
shape
)
return
xs
.
reshape
(
paddle
.
shape
(
ys
)
)
if
not
hasattr
(
paddle
.
Tensor
,
'view_as'
):
...
...
paddlespeech/s2t/decoders/beam_search/beam_search.py
浏览文件 @
b23bde8e
...
...
@@ -231,7 +231,7 @@ class BeamSearch(paddle.nn.Layer):
"""
# no pre beam performed, `ids` equal to `weighted_scores`
if
weighted_scores
.
shape
[
0
]
==
ids
.
shape
[
0
]:
if
paddle
.
shape
(
weighted_scores
)[
0
]
==
paddle
.
shape
(
ids
)
[
0
]:
top_ids
=
weighted_scores
.
topk
(
self
.
beam_size
)[
1
]
# index in n_vocab
return
top_ids
,
top_ids
...
...
@@ -370,13 +370,13 @@ class BeamSearch(paddle.nn.Layer):
"""
# set length bounds
if
maxlenratio
==
0
:
maxlen
=
x
.
shape
[
0
]
maxlen
=
paddle
.
shape
(
x
)
[
0
]
elif
maxlenratio
<
0
:
maxlen
=
-
1
*
int
(
maxlenratio
)
else
:
maxlen
=
max
(
1
,
int
(
maxlenratio
*
x
.
shape
[
0
]))
minlen
=
int
(
minlenratio
*
x
.
shape
[
0
])
logger
.
info
(
"decoder input length: "
+
str
(
x
.
shape
[
0
]))
maxlen
=
max
(
1
,
int
(
maxlenratio
*
paddle
.
shape
(
x
)
[
0
]))
minlen
=
int
(
minlenratio
*
paddle
.
shape
(
x
)
[
0
])
logger
.
info
(
"decoder input length: "
+
str
(
paddle
.
shape
(
x
)
[
0
]))
logger
.
info
(
"max output length: "
+
str
(
maxlen
))
logger
.
info
(
"min output length: "
+
str
(
minlen
))
...
...
paddlespeech/s2t/decoders/scorers/ctc.py
浏览文件 @
b23bde8e
...
...
@@ -69,7 +69,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
return
sc
[
i
],
st
[
i
]
else
:
# for CTCPrefixScorePD (need new_id > 0)
r
,
log_psi
,
f_min
,
f_max
,
scoring_idmap
=
state
s
=
log_psi
[
i
,
new_id
].
expand
(
log_psi
.
shape
[
1
])
s
=
log_psi
[
i
,
new_id
].
expand
(
paddle
.
shape
(
log_psi
)
[
1
])
if
scoring_idmap
is
not
None
:
return
r
[:,
:,
i
,
scoring_idmap
[
i
,
new_id
]],
s
,
f_min
,
f_max
else
:
...
...
@@ -107,7 +107,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
"""
logp
=
self
.
ctc
.
log_softmax
(
x
.
unsqueeze
(
0
))
# assuming batch_size = 1
xlen
=
paddle
.
to_tensor
([
logp
.
shape
[
1
]])
xlen
=
paddle
.
to_tensor
([
paddle
.
shape
(
logp
)
[
1
]])
self
.
impl
=
CTCPrefixScorePD
(
logp
,
xlen
,
0
,
self
.
eos
)
return
None
...
...
paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
浏览文件 @
b23bde8e
...
...
@@ -33,9 +33,9 @@ class CTCPrefixScorePD():
self
.
logzero
=
-
10000000000.0
self
.
blank
=
blank
self
.
eos
=
eos
self
.
batch
=
x
.
shape
[
0
]
self
.
input_length
=
x
.
shape
[
1
]
self
.
odim
=
x
.
shape
[
2
]
self
.
batch
=
paddle
.
shape
(
x
)
[
0
]
self
.
input_length
=
paddle
.
shape
(
x
)
[
1
]
self
.
odim
=
paddle
.
shape
(
x
)
[
2
]
self
.
dtype
=
x
.
dtype
# Pad the rest of posteriors in the batch
...
...
@@ -76,7 +76,7 @@ class CTCPrefixScorePD():
last_ids
=
[
yi
[
-
1
]
for
yi
in
y
]
# last output label ids
n_bh
=
len
(
last_ids
)
# batch * hyps
n_hyps
=
n_bh
//
self
.
batch
# assuming each utterance has the same # of hyps
self
.
scoring_num
=
scoring_ids
.
shape
[
-
1
]
if
scoring_ids
is
not
None
else
0
self
.
scoring_num
=
paddle
.
shape
(
scoring_ids
)
[
-
1
]
if
scoring_ids
is
not
None
else
0
# prepare state info
if
state
is
None
:
r_prev
=
paddle
.
full
(
...
...
@@ -226,7 +226,7 @@ class CTCPrefixScorePD():
if
self
.
x
.
shape
[
1
]
<
x
.
shape
[
1
]:
# self.x (2,T,B,O); x (B,T,O)
# Pad the rest of posteriors in the batch
# TODO(takaaki-hori): need a better way without for-loops
xlens
=
[
x
.
shape
[
1
]]
xlens
=
[
paddle
.
shape
(
x
)
[
1
]]
for
i
,
l
in
enumerate
(
xlens
):
if
l
<
self
.
input_length
:
x
[
i
,
l
:,
:]
=
self
.
logzero
...
...
@@ -236,7 +236,7 @@ class CTCPrefixScorePD():
xb
=
xn
[:,
:,
self
.
blank
].
unsqueeze
(
2
).
expand
(
-
1
,
-
1
,
self
.
odim
)
self
.
x
=
paddle
.
stack
([
xn
,
xb
])
# (2, T, B, O)
self
.
x
[:,
:
tmp_x
.
shape
[
1
],
:,
:]
=
tmp_x
self
.
input_length
=
x
.
shape
[
1
]
self
.
input_length
=
paddle
.
shape
(
x
)
[
1
]
self
.
end_frames
=
paddle
.
to_tensor
(
xlens
)
-
1
def
extend_state
(
self
,
state
):
...
...
paddlespeech/s2t/models/lm/transformer.py
浏览文件 @
b23bde8e
...
...
@@ -90,7 +90,7 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
def
_target_mask
(
self
,
ys_in_pad
):
ys_mask
=
ys_in_pad
!=
0
m
=
subsequent_mask
(
ys_mask
.
shape
[
-
1
])).
unsqueeze
(
0
)
m
=
subsequent_mask
(
paddle
.
shape
(
ys_mask
)
[
-
1
])).
unsqueeze
(
0
)
return
ys_mask
.
unsqueeze
(
-
2
)
&
m
def
forward
(
self
,
x
:
paddle
.
Tensor
,
t
:
paddle
.
Tensor
...
...
@@ -112,7 +112,7 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
in perplexity: p(t)^{-n} = exp(-log p(t) / n)
"""
batch_size
=
x
.
shape
[
0
]
batch_size
=
paddle
.
shape
(
x
)
[
0
]
xm
=
x
!=
0
xlen
=
xm
.
sum
(
axis
=
1
)
if
self
.
embed_drop
is
not
None
:
...
...
@@ -122,7 +122,7 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
h
,
_
=
self
.
encoder
(
emb
,
xlen
)
y
=
self
.
decoder
(
h
)
loss
=
F
.
cross_entropy
(
y
.
view
(
-
1
,
y
.
shape
[
-
1
]),
t
.
view
(
-
1
),
reduction
=
"none"
)
y
.
view
(
-
1
,
paddle
.
shape
(
y
)
[
-
1
]),
t
.
view
(
-
1
),
reduction
=
"none"
)
mask
=
xm
.
to
(
loss
.
dtype
)
logp
=
loss
*
mask
.
view
(
-
1
)
nll
=
logp
.
view
(
batch_size
,
-
1
).
sum
(
-
1
)
...
...
paddlespeech/s2t/models/u2/u2.py
浏览文件 @
b23bde8e
...
...
@@ -775,7 +775,7 @@ class U2DecodeModel(U2BaseModel):
"""
self
.
eval
()
x
=
paddle
.
to_tensor
(
x
).
unsqueeze
(
0
)
ilen
=
x
.
shape
[
1
]
ilen
=
paddle
.
shape
(
x
)
[
1
]
enc_output
,
_
=
self
.
_forward_encoder
(
x
,
ilen
)
return
enc_output
.
squeeze
(
0
)
...
...
paddlespeech/s2t/modules/decoder.py
浏览文件 @
b23bde8e
...
...
@@ -242,7 +242,7 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer):
]
# batch decoding
ys_mask
=
subsequent_mask
(
ys
.
shape
[
-
1
]).
unsqueeze
(
0
)
# (B,L,L)
ys_mask
=
subsequent_mask
(
paddle
.
shape
(
ys
)
[
-
1
]).
unsqueeze
(
0
)
# (B,L,L)
xs_mask
=
make_xs_mask
(
xs
).
unsqueeze
(
1
)
# (B,1,T)
logp
,
states
=
self
.
forward_one_step
(
xs
,
xs_mask
,
ys
,
ys_mask
,
cache
=
batch_state
)
...
...
paddlespeech/s2t/modules/embedding.py
浏览文件 @
b23bde8e
...
...
@@ -115,7 +115,7 @@ class PositionalEncoding(nn.Layer, PositionalEncodingInterface):
assert
offset
+
x
.
shape
[
1
]
<
self
.
max_len
,
"offset: {} + x.shape[1]: {} is larger than the max_len: {}"
.
format
(
offset
,
x
.
shape
[
1
],
self
.
max_len
)
#TODO(Hui Zhang): using T =
x.shape
[1], __getitem__ not support Tensor
#TODO(Hui Zhang): using T =
paddle.shape(x)
[1], __getitem__ not support Tensor
pos_emb
=
self
.
pe
[:,
offset
:
offset
+
T
]
x
=
x
*
self
.
xscale
+
pos_emb
return
self
.
dropout
(
x
),
self
.
dropout
(
pos_emb
)
...
...
@@ -165,6 +165,6 @@ class RelPositionalEncoding(PositionalEncoding):
1
]
<
self
.
max_len
,
"offset: {} + x.shape[1]: {} is larger than the max_len: {}"
.
format
(
offset
,
x
.
shape
[
1
],
self
.
max_len
)
x
=
x
*
self
.
xscale
#TODO(Hui Zhang): using
x.shape
[1], __getitem__ not support Tensor
#TODO(Hui Zhang): using
paddle.shape(x)
[1], __getitem__ not support Tensor
pos_emb
=
self
.
pe
[:,
offset
:
offset
+
x
.
shape
[
1
]]
return
self
.
dropout
(
x
),
self
.
dropout
(
pos_emb
)
paddlespeech/s2t/modules/encoder.py
浏览文件 @
b23bde8e
...
...
@@ -218,7 +218,7 @@ class BaseEncoder(nn.Layer):
assert
xs
.
shape
[
0
]
==
1
# batch size must be one
# tmp_masks is just for interface compatibility
# TODO(Hui Zhang): stride_slice not support bool tensor
# tmp_masks = paddle.ones([1,
xs.shape
[1]], dtype=paddle.bool)
# tmp_masks = paddle.ones([1,
paddle.shape(xs)
[1]], dtype=paddle.bool)
tmp_masks
=
paddle
.
ones
([
1
,
xs
.
shape
[
1
]],
dtype
=
paddle
.
int32
)
tmp_masks
=
tmp_masks
.
unsqueeze
(
1
)
#[B=1, C=1, T]
...
...
paddlespeech/s2t/utils/tensor_utils.py
浏览文件 @
b23bde8e
...
...
@@ -59,7 +59,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
>>> b = paddle.ones(22, 300)
>>> c = paddle.ones(15, 300)
>>> pad_sequence([a, b, c]).shape
[25, 3, 300]
paddle.Tensor([25, 3, 300])
Note:
This function returns a Tensor of size ``T x B x *`` or ``B x T x *``
...
...
@@ -79,7 +79,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
# assuming trailing dimensions and type of all the Tensors
# in sequences are same and fetching those from sequences[0]
max_size
=
sequences
[
0
].
shape
max_size
=
paddle
.
shape
(
sequences
[
0
])
# (TODO Hui Zhang): slice not supprot `end==start`
# trailing_dims = max_size[1:]
trailing_dims
=
tuple
(
max_size
[
1
:].
numpy
().
tolist
())
if
sequences
[
0
].
ndim
>=
2
else
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录