Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Parakeet
提交
0fb927d1
P
Parakeet
项目概览
PaddlePaddle
/
Parakeet
通知
8
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Parakeet
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0fb927d1
编写于
2月 17, 2020
作者:
L
lifuchen
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'master' of upstream into add_readme
上级
0cca7a68
5b442aaa
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
55 addition
and
31 deletion
+55
-31
examples/deepvoice3/train.py
examples/deepvoice3/train.py
+7
-2
examples/deepvoice3/utils.py
examples/deepvoice3/utils.py
+2
-1
parakeet/models/deepvoice3/attention.py
parakeet/models/deepvoice3/attention.py
+17
-4
parakeet/models/deepvoice3/conv1dglu.py
parakeet/models/deepvoice3/conv1dglu.py
+1
-2
parakeet/models/deepvoice3/converter.py
parakeet/models/deepvoice3/converter.py
+18
-17
parakeet/models/deepvoice3/encoder.py
parakeet/models/deepvoice3/encoder.py
+1
-1
parakeet/models/deepvoice3/loss.py
parakeet/models/deepvoice3/loss.py
+8
-3
parakeet/models/deepvoice3/position_embedding.py
parakeet/models/deepvoice3/position_embedding.py
+1
-1
未找到文件。
examples/deepvoice3/train.py
浏览文件 @
0fb927d1
...
@@ -227,8 +227,12 @@ if __name__ == "__main__":
...
@@ -227,8 +227,12 @@ if __name__ == "__main__":
lin_specs
,
done_flags
,
text_lengths
,
frames
)
lin_specs
,
done_flags
,
text_lengths
,
frames
)
l
=
criterion
.
compose_loss
(
losses
)
l
=
criterion
.
compose_loss
(
losses
)
l
.
backward
()
l
.
backward
()
# record learning rate before updating
writer
.
add_scalar
(
"learning_rate"
,
optim
.
_learning_rate
.
step
().
numpy
(),
global_step
)
optim
.
minimize
(
l
,
grad_clip
=
gradient_clipper
)
optim
.
minimize
(
l
,
grad_clip
=
gradient_clipper
)
dv3
.
clear_gradients
()
optim
.
clear_gradients
()
# ==================all kinds of tedious things=================
# ==================all kinds of tedious things=================
for
k
in
epoch_loss
.
keys
():
for
k
in
epoch_loss
.
keys
():
...
@@ -237,6 +241,7 @@ if __name__ == "__main__":
...
@@ -237,6 +241,7 @@ if __name__ == "__main__":
# record step loss into tensorboard
# record step loss into tensorboard
step_loss
=
{
k
:
v
.
numpy
()[
0
]
for
k
,
v
in
losses
.
items
()}
step_loss
=
{
k
:
v
.
numpy
()[
0
]
for
k
,
v
in
losses
.
items
()}
print
(
step_loss
)
for
k
,
v
in
step_loss
.
items
():
for
k
,
v
in
step_loss
.
items
():
writer
.
add_scalar
(
k
,
v
,
global_step
)
writer
.
add_scalar
(
k
,
v
,
global_step
)
...
@@ -276,7 +281,7 @@ if __name__ == "__main__":
...
@@ -276,7 +281,7 @@ if __name__ == "__main__":
"Please call Stella."
,
"Please call Stella."
,
"Some have accepted this as a miracle without any physical explanation."
,
"Some have accepted this as a miracle without any physical explanation."
,
]
]
for
idx
,
sent
in
sentences
:
for
idx
,
sent
in
enumerate
(
sentences
)
:
wav
,
attn
=
eval_model
(
dv3
,
sent
,
wav
,
attn
=
eval_model
(
dv3
,
sent
,
replace_pronounciation_prob
,
replace_pronounciation_prob
,
min_level_db
,
ref_level_db
,
min_level_db
,
ref_level_db
,
...
...
examples/deepvoice3/utils.py
浏览文件 @
0fb927d1
...
@@ -50,7 +50,7 @@ def make_model(n_speakers, speaker_dim, speaker_embed_std, embed_dim,
...
@@ -50,7 +50,7 @@ def make_model(n_speakers, speaker_dim, speaker_embed_std, embed_dim,
embed_dim
,
embed_dim
,
n_speakers
,
n_speakers
,
speaker_dim
,
speaker_dim
,
padding_idx
=
padding_idx
,
padding_idx
=
None
,
embedding_weight_std
=
embedding_std
,
embedding_weight_std
=
embedding_std
,
convolutions
=
encoder_convolutions
,
convolutions
=
encoder_convolutions
,
max_positions
=
max_positions
,
max_positions
=
max_positions
,
...
@@ -122,6 +122,7 @@ def eval_model(model, text, replace_pronounciation_prob, min_level_db,
...
@@ -122,6 +122,7 @@ def eval_model(model, text, replace_pronounciation_prob, min_level_db,
text
=
np
.
expand_dims
(
text
,
0
)
text
=
np
.
expand_dims
(
text
,
0
)
text_positions
=
np
.
expand_dims
(
text_positions
,
0
)
text_positions
=
np
.
expand_dims
(
text_positions
,
0
)
model
.
eval
()
mel_outputs
,
linear_outputs
,
alignments
,
done
=
model
.
transduce
(
mel_outputs
,
linear_outputs
,
alignments
,
done
=
model
.
transduce
(
dg
.
to_variable
(
text
),
dg
.
to_variable
(
text_positions
))
dg
.
to_variable
(
text
),
dg
.
to_variable
(
text_positions
))
linear_outputs_np
=
linear_outputs
.
numpy
()[
0
].
T
# (C, T)
linear_outputs_np
=
linear_outputs
.
numpy
()[
0
].
T
# (C, T)
...
...
parakeet/models/deepvoice3/attention.py
浏览文件 @
0fb927d1
...
@@ -3,6 +3,7 @@ from collections import namedtuple
...
@@ -3,6 +3,7 @@ from collections import namedtuple
from
paddle
import
fluid
from
paddle
import
fluid
import
paddle.fluid.dygraph
as
dg
import
paddle.fluid.dygraph
as
dg
import
paddle.fluid.layers
as
F
import
paddle.fluid.layers
as
F
import
paddle.fluid.initializer
as
I
from
parakeet.modules.weight_norm
import
Linear
from
parakeet.modules.weight_norm
import
Linear
WindowRange
=
namedtuple
(
"WindowRange"
,
[
"backward"
,
"ahead"
])
WindowRange
=
namedtuple
(
"WindowRange"
,
[
"backward"
,
"ahead"
])
...
@@ -17,12 +18,24 @@ class Attention(dg.Layer):
...
@@ -17,12 +18,24 @@ class Attention(dg.Layer):
key_projection
=
True
,
key_projection
=
True
,
value_projection
=
True
):
value_projection
=
True
):
super
(
Attention
,
self
).
__init__
()
super
(
Attention
,
self
).
__init__
()
self
.
query_proj
=
Linear
(
query_dim
,
embed_dim
)
std
=
np
.
sqrt
(
1
/
query_dim
)
self
.
query_proj
=
Linear
(
query_dim
,
embed_dim
,
param_attr
=
I
.
Normal
(
scale
=
std
))
if
key_projection
:
if
key_projection
:
self
.
key_proj
=
Linear
(
embed_dim
,
embed_dim
)
std
=
np
.
sqrt
(
1
/
embed_dim
)
self
.
key_proj
=
Linear
(
embed_dim
,
embed_dim
,
param_attr
=
I
.
Normal
(
scale
=
std
))
if
value_projection
:
if
value_projection
:
self
.
value_proj
=
Linear
(
embed_dim
,
embed_dim
)
std
=
np
.
sqrt
(
1
/
embed_dim
)
self
.
out_proj
=
Linear
(
embed_dim
,
query_dim
)
self
.
value_proj
=
Linear
(
embed_dim
,
embed_dim
,
param_attr
=
I
.
Normal
(
scale
=
std
))
std
=
np
.
sqrt
(
1
/
embed_dim
)
self
.
out_proj
=
Linear
(
embed_dim
,
query_dim
,
param_attr
=
I
.
Normal
(
scale
=
std
))
self
.
key_projection
=
key_projection
self
.
key_projection
=
key_projection
self
.
value_projection
=
value_projection
self
.
value_projection
=
value_projection
...
...
parakeet/models/deepvoice3/conv1dglu.py
浏览文件 @
0fb927d1
...
@@ -42,8 +42,6 @@ class Conv1DGLU(dg.Layer):
...
@@ -42,8 +42,6 @@ class Conv1DGLU(dg.Layer):
# weight init and dropout
# weight init and dropout
self
.
std_mul
=
std_mul
self
.
std_mul
=
std_mul
self
.
dropout
=
dropout
self
.
dropout
=
dropout
c_in
=
filter_size
*
in_channels
std
=
np
.
sqrt
(
std_mul
*
(
1
-
dropout
)
/
c_in
)
self
.
residual
=
residual
self
.
residual
=
residual
if
residual
:
if
residual
:
...
@@ -51,6 +49,7 @@ class Conv1DGLU(dg.Layer):
...
@@ -51,6 +49,7 @@ class Conv1DGLU(dg.Layer):
in_channels
==
num_filters
in_channels
==
num_filters
),
"this block uses residual connection"
\
),
"this block uses residual connection"
\
"the input_channes should equals num_filters"
"the input_channes should equals num_filters"
std
=
np
.
sqrt
(
std_mul
*
(
1
-
dropout
)
/
(
filter_size
*
in_channels
))
self
.
conv
=
Conv1DCell
(
in_channels
,
self
.
conv
=
Conv1DCell
(
in_channels
,
2
*
num_filters
,
2
*
num_filters
,
filter_size
,
filter_size
,
...
...
parakeet/models/deepvoice3/converter.py
浏览文件 @
0fb927d1
...
@@ -13,11 +13,12 @@ from parakeet.models.deepvoice3.encoder import ConvSpec
...
@@ -13,11 +13,12 @@ from parakeet.models.deepvoice3.encoder import ConvSpec
def
upsampling_4x_blocks
(
n_speakers
,
speaker_dim
,
target_channels
,
dropout
):
def
upsampling_4x_blocks
(
n_speakers
,
speaker_dim
,
target_channels
,
dropout
):
# upsampling convolitions
# upsampling convolitions
upsampling_convolutions
=
[
upsampling_convolutions
=
[
Conv1DTranspose
(
target_channels
,
Conv1DTranspose
(
target_channels
,
target_channels
,
2
,
target_channels
,
stride
=
2
,
2
,
param_attr
=
I
.
Normal
(
np
.
sqrt
(
1
/
target_channels
))),
stride
=
2
,
param_attr
=
I
.
Normal
(
scale
=
np
.
sqrt
(
1
/
(
2
*
target_channels
)))),
Conv1DGLU
(
n_speakers
,
Conv1DGLU
(
n_speakers
,
speaker_dim
,
speaker_dim
,
target_channels
,
target_channels
,
...
@@ -34,12 +35,12 @@ def upsampling_4x_blocks(n_speakers, speaker_dim, target_channels, dropout):
...
@@ -34,12 +35,12 @@ def upsampling_4x_blocks(n_speakers, speaker_dim, target_channels, dropout):
dilation
=
3
,
dilation
=
3
,
std_mul
=
4.
,
std_mul
=
4.
,
dropout
=
dropout
),
dropout
=
dropout
),
Conv1DTranspose
(
target_channels
,
Conv1DTranspose
(
target_channels
,
target_channels
,
2
,
target_channels
,
stride
=
2
,
2
,
param_attr
=
I
.
Normal
(
scale
=
np
.
sqrt
(
4.
/
stride
=
2
,
target_channels
))),
param_attr
=
I
.
Normal
(
scale
=
np
.
sqrt
(
4.
/
(
2
*
target_channels
)
))),
Conv1DGLU
(
n_speakers
,
Conv1DGLU
(
n_speakers
,
speaker_dim
,
speaker_dim
,
target_channels
,
target_channels
,
...
@@ -62,12 +63,12 @@ def upsampling_4x_blocks(n_speakers, speaker_dim, target_channels, dropout):
...
@@ -62,12 +63,12 @@ def upsampling_4x_blocks(n_speakers, speaker_dim, target_channels, dropout):
def
upsampling_2x_blocks
(
n_speakers
,
speaker_dim
,
target_channels
,
dropout
):
def
upsampling_2x_blocks
(
n_speakers
,
speaker_dim
,
target_channels
,
dropout
):
upsampling_convolutions
=
[
upsampling_convolutions
=
[
Conv1DTranspose
(
target_channels
,
Conv1DTranspose
(
target_channels
,
target_channels
,
2
,
target_channels
,
stride
=
2
,
2
,
param_attr
=
I
.
Normal
(
scale
=
np
.
sqrt
(
1.
/
stride
=
2
,
target_channels
))),
param_attr
=
I
.
Normal
(
scale
=
np
.
sqrt
(
1.
/
(
2
*
target_channels
)
))),
Conv1DGLU
(
n_speakers
,
Conv1DGLU
(
n_speakers
,
speaker_dim
,
speaker_dim
,
target_channels
,
target_channels
,
...
...
parakeet/models/deepvoice3/encoder.py
浏览文件 @
0fb927d1
...
@@ -32,7 +32,7 @@ class Encoder(dg.Layer):
...
@@ -32,7 +32,7 @@ class Encoder(dg.Layer):
self
.
dropout
=
dropout
self
.
dropout
=
dropout
if
n_speakers
>
1
:
if
n_speakers
>
1
:
std
=
np
.
sqrt
((
1
-
dropout
)
/
speaker_dim
)
# CAUTION: keep_prob
std
=
np
.
sqrt
((
1
-
dropout
)
/
speaker_dim
)
self
.
sp_proj1
=
Linear
(
speaker_dim
,
self
.
sp_proj1
=
Linear
(
speaker_dim
,
embed_dim
,
embed_dim
,
param_attr
=
I
.
Normal
(
scale
=
std
))
param_attr
=
I
.
Normal
(
scale
=
std
))
...
...
parakeet/models/deepvoice3/loss.py
浏览文件 @
0fb927d1
...
@@ -203,16 +203,21 @@ class TTSLoss(object):
...
@@ -203,16 +203,21 @@ class TTSLoss(object):
result
=
{
result
=
{
"mel"
:
mel_loss
if
compute_mel_loss
else
None
,
"mel"
:
mel_loss
if
compute_mel_loss
else
None
,
"mel_l1_loss"
:
mel_l1_loss
if
compute_mel_loss
else
None
,
"mel_bce_loss"
:
mel_bce_loss
if
compute_mel_loss
else
None
,
"lin"
:
lin_loss
if
compute_lin_loss
else
None
,
"lin"
:
lin_loss
if
compute_lin_loss
else
None
,
"lin_l1_loss"
:
lin_l1_loss
if
compute_lin_loss
else
None
,
"lin_bce_loss"
:
lin_bce_loss
if
compute_lin_loss
else
None
,
"done"
:
done_loss
if
compute_done_loss
else
None
,
"done"
:
done_loss
if
compute_done_loss
else
None
,
"attn"
:
attn_loss
if
compute_attn_loss
else
None
,
"attn"
:
attn_loss
if
compute_attn_loss
else
None
,
}
}
return
result
return
result
@
staticmethod
@
staticmethod
def
compose_loss
(
result
):
def
compose_loss
(
result
):
total_loss
=
0.
total_loss
=
0.
for
v
in
result
.
values
()
:
for
k
in
[
"mel"
,
"lin"
,
"done"
,
"attn"
]
:
if
v
is
not
None
:
if
result
[
k
]
is
not
None
:
total_loss
+=
v
total_loss
+=
result
[
k
]
return
total_loss
return
total_loss
\ No newline at end of file
parakeet/models/deepvoice3/position_embedding.py
浏览文件 @
0fb927d1
...
@@ -42,7 +42,7 @@ def position_encoding_init(n_position,
...
@@ -42,7 +42,7 @@ def position_encoding_init(n_position,
embed_range
=
2
*
(
np
.
arange
(
d_pos_vec
)
//
2
)
embed_range
=
2
*
(
np
.
arange
(
d_pos_vec
)
//
2
)
radians
=
position_rate
\
radians
=
position_rate
\
*
indices_range
\
*
indices_range
\
*
np
.
power
(
1
e4
,
embed_range
/
d_pos_vec
)
/
np
.
power
(
1.
e4
,
embed_range
/
d_pos_vec
)
if
padding_idx
is
not
None
:
if
padding_idx
is
not
None
:
radians
[
padding_idx
]
=
0.
radians
[
padding_idx
]
=
0.
return
radians
return
radians
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录