Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
271112ca
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
271112ca
编写于
3月 13, 2023
作者:
小湉湉
提交者:
GitHub
3月 13, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix vits reduce_sum's input/output dtype, test=tts (#3028)
上级
34f2995b
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
23 addition
and
15 deletion
+23
-15
paddlespeech/t2s/models/vits/duration_predictor.py
paddlespeech/t2s/models/vits/duration_predictor.py
+6
-8
paddlespeech/t2s/models/vits/generator.py
paddlespeech/t2s/models/vits/generator.py
+8
-5
paddlespeech/t2s/models/vits/transform.py
paddlespeech/t2s/models/vits/transform.py
+9
-2
未找到文件。
paddlespeech/t2s/models/vits/duration_predictor.py
浏览文件 @
271112ca
...
...
@@ -155,12 +155,10 @@ class StochasticDurationPredictor(nn.Layer):
z_u
,
z1
=
paddle
.
split
(
z_q
,
[
1
,
1
],
1
)
u
=
F
.
sigmoid
(
z_u
)
*
x_mask
z0
=
(
w
-
u
)
*
x_mask
logdet_tot_q
+=
paddle
.
sum
(
(
F
.
log_sigmoid
(
z_u
)
+
F
.
log_sigmoid
(
-
z_u
))
*
x_mask
,
[
1
,
2
])
logq
=
(
paddle
.
sum
(
-
0.5
*
(
math
.
log
(
2
*
math
.
pi
)
+
(
e_q
**
2
))
*
x_mask
,
[
1
,
2
])
-
logdet_tot_q
)
tmp1
=
(
F
.
log_sigmoid
(
z_u
)
+
F
.
log_sigmoid
(
-
z_u
))
*
x_mask
logdet_tot_q
+=
paddle
.
sum
(
tmp1
,
[
1
,
2
])
tmp2
=
-
0.5
*
(
math
.
log
(
2
*
math
.
pi
)
+
(
e_q
**
2
))
*
x_mask
logq
=
(
paddle
.
sum
(
tmp2
,
[
1
,
2
])
-
logdet_tot_q
)
logdet_tot
=
0
z0
,
logdet
=
self
.
log_flow
(
z0
,
x_mask
)
logdet_tot
+=
logdet
...
...
@@ -168,8 +166,8 @@ class StochasticDurationPredictor(nn.Layer):
for
flow
in
self
.
flows
:
z
,
logdet
=
flow
(
z
,
x_mask
,
g
=
x
,
inverse
=
inverse
)
logdet_tot
=
logdet_tot
+
logdet
nll
=
(
paddle
.
sum
(
0.5
*
(
math
.
log
(
2
*
math
.
pi
)
+
(
z
**
2
))
*
x_mask
,
[
1
,
2
])
-
logdet_tot
)
tmp3
=
0.5
*
(
math
.
log
(
2
*
math
.
pi
)
+
(
z
**
2
))
*
x_mask
nll
=
(
paddle
.
sum
(
tmp3
,
[
1
,
2
])
-
logdet_tot
)
# (B,)
return
nll
+
logq
else
:
...
...
paddlespeech/t2s/models/vits/generator.py
浏览文件 @
271112ca
...
...
@@ -371,8 +371,9 @@ class VITSGenerator(nn.Layer):
# (B, H, T_text)
s_p_sq_r
=
paddle
.
exp
(
-
2
*
logs_p
)
# (B, 1, T_text)
tmp1
=
-
0.5
*
math
.
log
(
2
*
math
.
pi
)
-
logs_p
neg_x_ent_1
=
paddle
.
sum
(
-
0.5
*
math
.
log
(
2
*
math
.
pi
)
-
logs_p
,
tmp1
,
[
1
],
keepdim
=
True
,
)
# (B, T_feats, H) x (B, H, T_text) = (B, T_feats, T_text)
...
...
@@ -384,8 +385,9 @@ class VITSGenerator(nn.Layer):
z_p
.
transpose
([
0
,
2
,
1
]),
(
m_p
*
s_p_sq_r
),
)
# (B, 1, T_text)
tmp2
=
-
0.5
*
(
m_p
**
2
)
*
s_p_sq_r
neg_x_ent_4
=
paddle
.
sum
(
-
0.5
*
(
m_p
**
2
)
*
s_p_sq_r
,
tmp2
,
[
1
],
keepdim
=
True
,
)
# (B, T_feats, T_text)
...
...
@@ -403,7 +405,6 @@ class VITSGenerator(nn.Layer):
w
=
attn
.
sum
(
2
)
dur_nll
=
self
.
duration_predictor
(
x
,
x_mask
,
w
=
w
,
g
=
g
)
dur_nll
=
dur_nll
/
paddle
.
sum
(
x_mask
)
# expand the length to match with the feature sequence
# (B, T_feats, T_text) x (B, T_text, H) -> (B, H, T_feats)
m_p
=
paddle
.
matmul
(
attn
.
squeeze
(
1
),
...
...
@@ -511,8 +512,9 @@ class VITSGenerator(nn.Layer):
# (B, H, T_text)
s_p_sq_r
=
paddle
.
exp
(
-
2
*
logs_p
)
# (B, 1, T_text)
tmp3
=
-
0.5
*
math
.
log
(
2
*
math
.
pi
)
-
logs_p
neg_x_ent_1
=
paddle
.
sum
(
-
0.5
*
math
.
log
(
2
*
math
.
pi
)
-
logs_p
,
tmp3
,
[
1
],
keepdim
=
True
,
)
# (B, T_feats, H) x (B, H, T_text) = (B, T_feats, T_text)
...
...
@@ -524,8 +526,9 @@ class VITSGenerator(nn.Layer):
z_p
.
transpose
([
0
,
2
,
1
]),
(
m_p
*
s_p_sq_r
),
)
# (B, 1, T_text)
tmp4
=
-
0.5
*
(
m_p
**
2
)
*
s_p_sq_r
neg_x_ent_4
=
paddle
.
sum
(
-
0.5
*
(
m_p
**
2
)
*
s_p_sq_r
,
tmp4
,
[
1
],
keepdim
=
True
,
)
# (B, T_feats, T_text)
...
...
paddlespeech/t2s/models/vits/transform.py
浏览文件 @
271112ca
...
...
@@ -61,8 +61,12 @@ def piecewise_rational_quadratic_transform(
def
mask_preprocess
(
x
,
mask
):
# bins.dtype = int32
B
,
C
,
T
,
bins
=
paddle
.
shape
(
x
)
new_x
=
paddle
.
zeros
([
mask
.
sum
(),
bins
])
mask_int
=
paddle
.
cast
(
mask
,
dtype
=
'int64'
)
# paddle.sum 输入是 int32 或 bool 的时候,输出是 int64
# paddle.zeros (fill_constant) 的 shape 会被强制转成 int32 类型
new_x
=
paddle
.
zeros
([
paddle
.
sum
(
mask_int
),
bins
])
for
i
in
range
(
bins
):
new_x
[:,
i
]
=
x
[:,
:,
:,
i
][
mask
]
return
new_x
...
...
@@ -240,4 +244,7 @@ def rational_quadratic_spline(
def
_searchsorted
(
bin_locations
,
inputs
,
eps
=
1e-6
):
bin_locations
[...,
-
1
]
+=
eps
return
paddle
.
sum
(
inputs
[...,
None
]
>=
bin_locations
,
axis
=-
1
)
-
1
mask
=
inputs
[...,
None
]
>=
bin_locations
mask_int
=
paddle
.
cast
(
mask
,
'int64'
)
out
=
paddle
.
sum
(
mask_int
,
axis
=-
1
)
-
1
return
out
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录