Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Parakeet
提交
610181d4
P
Parakeet
项目概览
PaddlePaddle
/
Parakeet
通知
10
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Parakeet
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
610181d4
编写于
7月 29, 2020
作者:
L
liuyibing01
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into 'master'
dv3 miscellaneous enhancements. See merge request !67
上级
47915461
ddf1c4f7
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
36 addition
and
139 deletion
+36
-139
examples/deepvoice3/clip.py
examples/deepvoice3/clip.py
+11
-108
examples/deepvoice3/train.py
examples/deepvoice3/train.py
+6
-6
parakeet/models/deepvoice3/model.py
parakeet/models/deepvoice3/model.py
+19
-25
未找到文件。
examples/deepvoice3/clip.py
浏览文件 @
610181d4
...
...
@@ -13,109 +13,6 @@ from paddle.fluid.dygraph import base as imperative_base
from
paddle.fluid.clip
import
GradientClipBase
,
_correct_clip_op_role_var
class
DoubleClip
(
GradientClipBase
):
"""
:alias_main: paddle.nn.GradientClipByGlobalNorm
:alias: paddle.nn.GradientClipByGlobalNorm,paddle.nn.clip.GradientClipByGlobalNorm
:old_api: paddle.fluid.clip.GradientClipByGlobalNorm
Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in
:math:`t\_list` , and limit it to ``clip_norm`` .
- If the global norm is greater than ``clip_norm`` , all elements of :math:`t\_list` will be compressed by a ratio.
- If the global norm is less than or equal to ``clip_norm`` , nothing will be done.
The list of Tensor :math:`t\_list` is not passed from this class, but the gradients of all parameters in ``Program`` . If ``need_clip``
is not None, then only part of gradients can be selected for gradient clipping.
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_fluid_optimizer_SGDOptimizer`).
The clipping formula is:
.. math::
t\_list[i] = t\_list[i] *
\\
frac{clip\_norm}{\max(global\_norm, clip\_norm)}
where:
.. math::
global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2}
Args:
clip_norm (float): The maximum norm value.
group_name (str, optional): The group name for this clip. Default value is ``default_group``
need_clip (function, optional): Type: function. This function accepts a ``Parameter`` and returns ``bool``
(True: the gradient of this ``Parameter`` need to be clipped, False: not need). Default: None,
and gradients of all parameters in the network will be clipped.
Examples:
.. code-block:: python
# use for Static mode
import paddle
import paddle.fluid as fluid
import numpy as np
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(
main_program=main_prog, startup_program=startup_prog):
image = fluid.data(
name='x', shape=[-1, 2], dtype='float32')
predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
loss = fluid.layers.mean(predict)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. fc_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(Parameter):
# # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
# return Parameter.name=="fc_0.w_0"
# clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
sgd_optimizer.minimize(loss)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
exe.run(startup_prog)
out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
# use for Dygraph mode
import paddle
import paddle.fluid as fluid
with fluid.dygraph.guard():
linear = fluid.dygraph.Linear(10, 10) # Trainable: linear_0.w.0, linear_0.b.0
inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
out = linear(fluid.dygraph.to_variable(inputs))
loss = fluid.layers.reduce_mean(out)
loss.backward()
# Clip all parameters in network:
clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name
# clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
sgd_optimizer.minimize(loss)
"""
def
__init__
(
self
,
clip_value
,
clip_norm
,
group_name
=
"default_group"
,
need_clip
=
None
):
super
(
DoubleClip
,
self
).
__init__
(
need_clip
)
self
.
clip_value
=
float
(
clip_value
)
...
...
@@ -128,8 +25,13 @@ class DoubleClip(GradientClipBase):
@
imperative_base
.
no_grad
def
_dygraph_clip
(
self
,
params_grads
):
params_grads
=
self
.
_dygraph_clip_by_value
(
params_grads
)
params_grads
=
self
.
_dygraph_clip_by_global_norm
(
params_grads
)
return
params_grads
@
imperative_base
.
no_grad
def
_dygraph_clip_by_value
(
self
,
params_grads
):
params_and_grads
=
[]
# clip by value first
for
p
,
g
in
params_grads
:
if
g
is
None
:
continue
...
...
@@ -138,9 +40,10 @@ class DoubleClip(GradientClipBase):
continue
new_grad
=
layers
.
clip
(
x
=
g
,
min
=-
self
.
clip_value
,
max
=
self
.
clip_value
)
params_and_grads
.
append
((
p
,
new_grad
))
params_grads
=
params_and_grads
# clip by global norm
return
params_and_grads
@
imperative_base
.
no_grad
def
_dygraph_clip_by_global_norm
(
self
,
params_grads
):
params_and_grads
=
[]
sum_square_list
=
[]
for
p
,
g
in
params_grads
:
...
...
@@ -178,4 +81,4 @@ class DoubleClip(GradientClipBase):
new_grad
=
layers
.
elementwise_mul
(
x
=
g
,
y
=
clip_var
)
params_and_grads
.
append
((
p
,
new_grad
))
return
params_and_grads
return
params_and_grads
\ No newline at end of file
examples/deepvoice3/train.py
浏览文件 @
610181d4
...
...
@@ -7,12 +7,13 @@ import tqdm
import
paddle
from
paddle
import
fluid
from
paddle.fluid
import
layers
as
F
from
paddle.fluid
import
initializer
as
I
from
paddle.fluid
import
dygraph
as
dg
from
paddle.fluid.io
import
DataLoader
from
tensorboardX
import
SummaryWriter
from
parakeet.models.deepvoice3
import
Encoder
,
Decoder
,
PostNet
,
SpectraNet
from
parakeet.data
import
SliceDataset
,
DataCargo
,
PartialyRandomizedSimilarTimeLengthSampler
,
Sequential
Sampler
from
parakeet.data
import
SliceDataset
,
DataCargo
,
SequentialSampler
,
Random
Sampler
from
parakeet.utils.io
import
save_parameters
,
load_parameters
from
parakeet.g2p
import
en
...
...
@@ -22,9 +23,9 @@ from clip import DoubleClip
def
create_model
(
config
):
char_embedding
=
dg
.
Embedding
((
en
.
n_vocab
,
config
[
"char_dim"
]))
char_embedding
=
dg
.
Embedding
((
en
.
n_vocab
,
config
[
"char_dim"
])
,
param_attr
=
I
.
Normal
(
scale
=
0.1
)
)
multi_speaker
=
config
[
"n_speakers"
]
>
1
speaker_embedding
=
dg
.
Embedding
((
config
[
"n_speakers"
],
config
[
"speaker_dim"
]))
\
speaker_embedding
=
dg
.
Embedding
((
config
[
"n_speakers"
],
config
[
"speaker_dim"
])
,
param_attr
=
I
.
Normal
(
scale
=
0.1
)
)
\
if
multi_speaker
else
None
encoder
=
Encoder
(
config
[
"encoder_layers"
],
config
[
"char_dim"
],
config
[
"encoder_dim"
],
config
[
"kernel_size"
],
...
...
@@ -51,8 +52,7 @@ def create_data(config, data_path):
train_dataset
=
SliceDataset
(
dataset
,
config
[
"valid_size"
],
len
(
dataset
))
train_collator
=
DataCollector
(
config
[
"p_pronunciation"
])
train_sampler
=
PartialyRandomizedSimilarTimeLengthSampler
(
dataset
.
num_frames
()[
config
[
"valid_size"
]:])
train_sampler
=
RandomSampler
(
train_dataset
)
train_cargo
=
DataCargo
(
train_dataset
,
train_collator
,
batch_size
=
config
[
"batch_size"
],
sampler
=
train_sampler
)
train_loader
=
DataLoader
\
...
...
@@ -81,7 +81,7 @@ def train(args, config):
optim
=
create_optimizer
(
model
,
config
)
global
global_step
max_iteration
=
2
000000
max_iteration
=
1
000000
iterator
=
iter
(
tqdm
.
tqdm
(
train_loader
))
while
global_step
<=
max_iteration
:
...
...
parakeet/models/deepvoice3/model.py
浏览文件 @
610181d4
...
...
@@ -39,15 +39,15 @@ class ConvBlock(dg.Layer):
self
.
has_bias
=
has_bias
std
=
np
.
sqrt
(
4
*
keep_prob
/
(
kernel_size
*
in_channel
))
initializer
=
I
.
NormalInitializer
(
loc
=
0.
,
scale
=
std
)
padding
=
"valid"
if
causal
else
"same"
conv
=
Conv1D
(
in_channel
,
2
*
in_channel
,
(
kernel_size
,
),
padding
=
padding
,
data_format
=
"NTC"
,
param_attr
=
initializer
)
param_attr
=
I
.
Normal
(
scale
=
std
)
)
self
.
conv
=
weight_norm
(
conv
)
if
has_bias
:
self
.
bias_affine
=
dg
.
Linear
(
bias_dim
,
2
*
in_channel
)
std
=
np
.
sqrt
(
1
/
bias_dim
)
self
.
bias_affine
=
dg
.
Linear
(
bias_dim
,
2
*
in_channel
,
param_attr
=
I
.
Normal
(
scale
=
std
))
def
forward
(
self
,
input
,
bias
=
None
,
padding
=
None
):
"""
...
...
@@ -82,11 +82,11 @@ class AffineBlock1(dg.Layer):
def
__init__
(
self
,
in_channel
,
out_channel
,
has_bias
=
False
,
bias_dim
=
0
):
super
(
AffineBlock1
,
self
).
__init__
()
std
=
np
.
sqrt
(
1.0
/
in_channel
)
initializer
=
I
.
NormalInitializer
(
loc
=
0.
,
scale
=
std
)
affine
=
dg
.
Linear
(
in_channel
,
out_channel
,
param_attr
=
initializer
)
affine
=
dg
.
Linear
(
in_channel
,
out_channel
,
param_attr
=
I
.
Normal
(
scale
=
std
))
self
.
affine
=
weight_norm
(
affine
,
dim
=-
1
)
if
has_bias
:
self
.
bias_affine
=
dg
.
Linear
(
bias_dim
,
out_channel
)
std
=
np
.
sqrt
(
1
/
bias_dim
)
self
.
bias_affine
=
dg
.
Linear
(
bias_dim
,
out_channel
,
param_attr
=
I
.
Normal
(
scale
=
std
))
self
.
has_bias
=
has_bias
self
.
bias_dim
=
bias_dim
...
...
@@ -110,10 +110,10 @@ class AffineBlock2(dg.Layer):
has_bias
=
False
,
bias_dim
=
0
,
dropout
=
False
,
keep_prob
=
1.
):
super
(
AffineBlock2
,
self
).
__init__
()
if
has_bias
:
self
.
bias_affine
=
dg
.
Linear
(
bias_dim
,
in_channel
)
std
=
np
.
sqrt
(
1
/
bias_dim
)
self
.
bias_affine
=
dg
.
Linear
(
bias_dim
,
in_channel
,
param_attr
=
I
.
Normal
(
scale
=
std
))
std
=
np
.
sqrt
(
1.0
/
in_channel
)
initializer
=
I
.
NormalInitializer
(
loc
=
0.
,
scale
=
std
)
affine
=
dg
.
Linear
(
in_channel
,
out_channel
,
param_attr
=
initializer
)
affine
=
dg
.
Linear
(
in_channel
,
out_channel
,
param_attr
=
I
.
Normal
(
scale
=
std
))
self
.
affine
=
weight_norm
(
affine
,
dim
=-
1
)
self
.
has_bias
=
has_bias
...
...
@@ -171,9 +171,8 @@ class AttentionBlock(dg.Layer):
# multispeaker case
if
has_bias
:
std
=
np
.
sqrt
(
1.0
/
bias_dim
)
initializer
=
I
.
NormalInitializer
(
loc
=
0.
,
scale
=
std
)
self
.
q_pos_affine
=
dg
.
Linear
(
bias_dim
,
1
,
param_attr
=
initializer
)
self
.
k_pos_affine
=
dg
.
Linear
(
bias_dim
,
1
,
param_attr
=
initializer
)
self
.
q_pos_affine
=
dg
.
Linear
(
bias_dim
,
1
,
param_attr
=
I
.
Normal
(
scale
=
std
))
self
.
k_pos_affine
=
dg
.
Linear
(
bias_dim
,
1
,
param_attr
=
I
.
Normal
(
scale
=
std
))
self
.
omega_initial
=
self
.
create_parameter
(
shape
=
[
1
],
attr
=
I
.
ConstantInitializer
(
value
=
omega_default
))
...
...
@@ -184,21 +183,17 @@ class AttentionBlock(dg.Layer):
scale
=
np
.
sqrt
(
1.
/
input_dim
))
initializer
=
I
.
NumpyArrayInitializer
(
init_weight
.
astype
(
np
.
float32
))
# 3 affine transformation to project q, k, v into attention_dim
q_affine
=
dg
.
Linear
(
input_dim
,
attention_dim
,
param_attr
=
initializer
)
q_affine
=
dg
.
Linear
(
input_dim
,
attention_dim
,
param_attr
=
initializer
)
self
.
q_affine
=
weight_norm
(
q_affine
,
dim
=-
1
)
k_affine
=
dg
.
Linear
(
input_dim
,
attention_dim
,
param_attr
=
initializer
)
k_affine
=
dg
.
Linear
(
input_dim
,
attention_dim
,
param_attr
=
initializer
)
self
.
k_affine
=
weight_norm
(
k_affine
,
dim
=-
1
)
std
=
np
.
sqrt
(
1.0
/
input_dim
)
initializer
=
I
.
NormalInitializer
(
loc
=
0.
,
scale
=
std
)
v_affine
=
dg
.
Linear
(
input_dim
,
attention_dim
,
param_attr
=
initializer
)
v_affine
=
dg
.
Linear
(
input_dim
,
attention_dim
,
param_attr
=
I
.
Normal
(
scale
=
std
))
self
.
v_affine
=
weight_norm
(
v_affine
,
dim
=-
1
)
std
=
np
.
sqrt
(
1.0
/
attention_dim
)
initializer
=
I
.
NormalInitializer
(
loc
=
0.
,
scale
=
std
)
out_affine
=
dg
.
Linear
(
attention_dim
,
input_dim
,
param_attr
=
initializer
)
out_affine
=
dg
.
Linear
(
attention_dim
,
input_dim
,
param_attr
=
I
.
Normal
(
scale
=
std
))
self
.
out_affine
=
weight_norm
(
out_affine
,
dim
=-
1
)
self
.
keep_prob
=
keep_prob
...
...
@@ -289,11 +284,11 @@ class Decoder(dg.Layer):
# output mel spectrogram
output_dim
=
reduction_factor
*
in_channels
# r * mel_dim
std
=
np
.
sqrt
(
1.0
/
decoder_dim
)
initializer
=
I
.
NormalInitializer
(
loc
=
0.
,
scale
=
std
)
out_affine
=
dg
.
Linear
(
decoder_dim
,
output_dim
,
param_attr
=
initializer
)
out_affine
=
dg
.
Linear
(
decoder_dim
,
output_dim
,
param_attr
=
I
.
Normal
(
scale
=
std
))
self
.
out_affine
=
weight_norm
(
out_affine
,
dim
=-
1
)
if
has_bias
:
self
.
out_sp_affine
=
dg
.
Linear
(
bias_dim
,
output_dim
)
std
=
np
.
sqrt
(
1
/
bias_dim
)
self
.
out_sp_affine
=
dg
.
Linear
(
bias_dim
,
output_dim
,
param_attr
=
I
.
Normal
(
scale
=
std
))
self
.
has_bias
=
has_bias
self
.
kernel_size
=
kernel_size
...
...
@@ -351,8 +346,7 @@ class PostNet(dg.Layer):
ConvBlock
(
postnet_dim
,
kernel_size
,
False
,
has_bias
,
bias_dim
,
keep_prob
)
for
_
in
range
(
layers
)
])
std
=
np
.
sqrt
(
1.0
/
postnet_dim
)
initializer
=
I
.
NormalInitializer
(
loc
=
0.
,
scale
=
std
)
post_affine
=
dg
.
Linear
(
postnet_dim
,
out_channels
,
param_attr
=
initializer
)
post_affine
=
dg
.
Linear
(
postnet_dim
,
out_channels
,
param_attr
=
I
.
Normal
(
scale
=
std
))
self
.
post_affine
=
weight_norm
(
post_affine
,
dim
=-
1
)
self
.
upsample_factor
=
upsample_factor
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录