提交 ca8432f6 编写于 作者: W WongLaw

Vits initialize method, test=tts

上级 e793d267
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet) # Modified from espnet(https://github.com/espnet/espnet)
"""VITS module""" """VITS module"""
import math
from typing import Any from typing import Any
from typing import Dict from typing import Dict
from typing import Optional from typing import Optional
...@@ -27,7 +28,12 @@ from paddlespeech.t2s.models.hifigan import HiFiGANMultiScaleMultiPeriodDiscrimi ...@@ -27,7 +28,12 @@ from paddlespeech.t2s.models.hifigan import HiFiGANMultiScaleMultiPeriodDiscrimi
from paddlespeech.t2s.models.hifigan import HiFiGANPeriodDiscriminator from paddlespeech.t2s.models.hifigan import HiFiGANPeriodDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANScaleDiscriminator from paddlespeech.t2s.models.hifigan import HiFiGANScaleDiscriminator
from paddlespeech.t2s.models.vits.generator import VITSGenerator from paddlespeech.t2s.models.vits.generator import VITSGenerator
from paddlespeech.t2s.modules.nets_utils import initialize from paddlespeech.utils.initialize import _calculate_fan_in_and_fan_out
from paddlespeech.utils.initialize import kaiming_normal_
from paddlespeech.utils.initialize import normal_
from paddlespeech.utils.initialize import ones_
from paddlespeech.utils.initialize import uniform_
from paddlespeech.utils.initialize import zeros_
AVAILABLE_GENERATERS = { AVAILABLE_GENERATERS = {
"vits_generator": VITSGenerator, "vits_generator": VITSGenerator,
...@@ -180,7 +186,7 @@ class VITS(nn.Layer): ...@@ -180,7 +186,7 @@ class VITS(nn.Layer):
super().__init__() super().__init__()
# initialize parameters # initialize parameters
initialize(self, init_type) # initialize(self, init_type)
# define modules # define modules
generator_class = AVAILABLE_GENERATERS[generator_type] generator_class = AVAILABLE_GENERATERS[generator_type]
...@@ -196,7 +202,7 @@ class VITS(nn.Layer): ...@@ -196,7 +202,7 @@ class VITS(nn.Layer):
self.discriminator = discriminator_class( self.discriminator = discriminator_class(
**discriminator_params, ) **discriminator_params, )
nn.initializer.set_global_initializer(None) # nn.initializer.set_global_initializer(None)
# cache # cache
self.cache_generator_outputs = cache_generator_outputs self.cache_generator_outputs = cache_generator_outputs
...@@ -214,6 +220,8 @@ class VITS(nn.Layer): ...@@ -214,6 +220,8 @@ class VITS(nn.Layer):
self.reuse_cache_gen = True self.reuse_cache_gen = True
self.reuse_cache_dis = True self.reuse_cache_dis = True
self.reset_parameters()
def forward( def forward(
self, self,
text: paddle.Tensor, text: paddle.Tensor,
...@@ -243,7 +251,7 @@ class VITS(nn.Layer): ...@@ -243,7 +251,7 @@ class VITS(nn.Layer):
forward_generator (bool): forward_generator (bool):
Whether to forward generator. Whether to forward generator.
Returns: Returns:
""" """
if forward_generator: if forward_generator:
return self._forward_generator( return self._forward_generator(
...@@ -497,3 +505,40 @@ class VITS(nn.Layer): ...@@ -497,3 +505,40 @@ class VITS(nn.Layer):
lids, ) lids, )
return dict(wav=paddle.reshape(wav, [-1])) return dict(wav=paddle.reshape(wav, [-1]))
def reset_parameters(self):
def _reset_parameters(module):
if isinstance(module, nn.Conv1D) or isinstance(module,
nn.Conv1DTranspose):
kaiming_normal_(module.weight, mode="fan_out")
if module.bias is not None:
fan_in, _ = _calculate_fan_in_and_fan_out(module.weight)
bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
uniform_(module.bias, -bound, bound)
if isinstance(module, nn.Conv2D) or isinstance(module,
nn.Conv2DTranspose):
kaiming_normal_(module.weight, mode="fan_out")
if module.bias is not None:
fan_in, _ = _calculate_fan_in_and_fan_out(module.weight)
bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
uniform_(module.bias, -bound, bound)
if isinstance(module,
(nn.BatchNorm1D, nn.BatchNorm2D, nn.GroupNorm)):
ones_(module.weight)
zeros_(module.bias)
if isinstance(module, nn.Linear):
kaiming_normal_(module.weight, a=math.sqrt(5))
if module.bias is not None:
fan_in, _ = _calculate_fan_in_and_fan_out(module.weight)
bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
uniform_(module.bias, -bound, bound)
if isinstance(module, nn.Embedding):
normal_(module.weight)
if module._padding_idx is not None:
with paddle.no_grad():
module.weight[module._padding_idx] = 0
if isinstance(module, nn.LayerNorm):
ones_(module.weight)
zeros_(module.bias)
self.apply(_reset_parameters)
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py
Ths copyright of pytorch/pytorch is a BSD-style license, as found in the LICENSE file.
"""
import math
import numpy as np
import paddle
import paddle.nn as nn
__all__ = [
"uniform_",
"normal_",
"constant_",
"ones_",
"zeros_",
"xavier_uniform_",
"xavier_normal_",
"kaiming_uniform_",
"kaiming_normal_",
"linear_init_",
"conv_init_",
"reset_initialized_parameter",
"_calculate_fan_in_and_fan_out",
]
def _no_grad_uniform_(tensor, a, b):
with paddle.no_grad():
tensor.set_value(
paddle.uniform(
shape=tensor.shape, dtype=tensor.dtype, min=a, max=b))
return tensor
def _no_grad_normal_(tensor, mean=0.0, std=1.0):
with paddle.no_grad():
tensor.set_value(paddle.normal(mean=mean, std=std, shape=tensor.shape))
return tensor
def _no_grad_fill_(tensor, value=0.0):
with paddle.no_grad():
tensor.set_value(paddle.full_like(tensor, value, dtype=tensor.dtype))
return tensor
def uniform_(tensor, a, b):
"""
Modified tensor inspace using uniform_
Args:
tensor (paddle.Tensor): paddle Tensor
a (float|int): min value.
b (float|int): max value.
Return:
tensor
"""
return _no_grad_uniform_(tensor, a, b)
def normal_(tensor, mean=0.0, std=1.0):
"""
Modified tensor inspace using normal_
Args:
tensor (paddle.Tensor): paddle Tensor
mean (float|int): mean value.
std (float|int): std value.
Return:
tensor
"""
return _no_grad_normal_(tensor, mean, std)
def constant_(tensor, value=0.0):
"""
Modified tensor inspace using constant_
Args:
tensor (paddle.Tensor): paddle Tensor
value (float|int): value to fill tensor.
Return:
tensor
"""
return _no_grad_fill_(tensor, value)
def ones_(tensor):
"""
Modified tensor inspace using ones_
Args:
tensor (paddle.Tensor): paddle Tensor
Return:
tensor
"""
return _no_grad_fill_(tensor, 1)
def zeros_(tensor):
"""
Modified tensor inspace using zeros_
Args:
tensor (paddle.Tensor): paddle Tensor
Return:
tensor
"""
return _no_grad_fill_(tensor, 0)
def vector_(tensor, vector):
with paddle.no_grad():
tensor.set_value(paddle.to_tensor(vector, dtype=tensor.dtype))
return tensor
def _calculate_fan_in_and_fan_out(tensor, reverse=False):
"""
Calculate (fan_in, _fan_out) for tensor
Args:
tensor (Tensor): paddle.Tensor
reverse (bool: False): tensor data format order, False by default as [fout, fin, ...]. e.g. : conv.weight [cout, cin, kh, kw] is False; linear.weight [cin, cout] is True
Return:
Tuple[fan_in, fan_out]
"""
if tensor.ndim < 2:
raise ValueError(
"Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
)
if reverse:
num_input_fmaps, num_output_fmaps = tensor.shape[0], tensor.shape[1]
else:
num_input_fmaps, num_output_fmaps = tensor.shape[1], tensor.shape[0]
receptive_field_size = 1
if tensor.ndim > 2:
receptive_field_size = np.prod(tensor.shape[2:])
fan_in = num_input_fmaps * receptive_field_size
fan_out = num_output_fmaps * receptive_field_size
return fan_in, fan_out
def xavier_uniform_(tensor, gain=1.0, reverse=False):
"""
Modified tensor inspace using xavier_uniform_
Args:
tensor (paddle.Tensor): paddle Tensor
gain (float): super parameter, 1. default.
reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
Return:
tensor
"""
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse)
std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
k = math.sqrt(3.0) * std
return _no_grad_uniform_(tensor, -k, k)
def xavier_normal_(tensor, gain=1.0, reverse=False):
"""
Modified tensor inspace using xavier_normal_
Args:
tensor (paddle.Tensor): paddle Tensor
gain (float): super parameter, 1. default.
reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
Return:
tensor
"""
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse)
std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
return _no_grad_normal_(tensor, 0, std)
# reference: https://pytorch.org/docs/stable/_modules/torch/nn/init.html
def _calculate_correct_fan(tensor, mode, reverse=False):
mode = mode.lower()
valid_modes = ["fan_in", "fan_out"]
if mode not in valid_modes:
raise ValueError("Mode {} not supported, please use one of {}".format(
mode, valid_modes))
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse)
return fan_in if mode == "fan_in" else fan_out
def _calculate_gain(nonlinearity, param=None):
linear_fns = [
"linear", "conv1d", "conv2d", "conv3d", "conv_transpose1d",
"conv_transpose2d", "conv_transpose3d"
]
if nonlinearity in linear_fns or nonlinearity == "sigmoid":
return 1
elif nonlinearity == "tanh":
return 5.0 / 3
elif nonlinearity == "relu":
return math.sqrt(2.0)
elif nonlinearity == "leaky_relu":
if param is None:
negative_slope = 0.01
elif not isinstance(param, bool) and isinstance(
param, int) or isinstance(param, float):
# True/False are instances of int, hence check above
negative_slope = param
else:
raise ValueError(
"negative_slope {} not a valid number".format(param))
return math.sqrt(2.0 / (1 + negative_slope**2))
elif nonlinearity == "selu":
return 3.0 / 4
else:
raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
def kaiming_uniform_(tensor,
a=0,
mode="fan_in",
nonlinearity="leaky_relu",
reverse=False):
"""
Modified tensor inspace using kaiming_uniform method
Args:
tensor (paddle.Tensor): paddle Tensor
mode (str): ['fan_in', 'fan_out'], 'fin_in' defalut
nonlinearity (str): nonlinearity method name
reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
Return:
tensor
"""
fan = _calculate_correct_fan(tensor, mode, reverse)
gain = _calculate_gain(nonlinearity, a)
std = gain / math.sqrt(fan)
k = math.sqrt(3.0) * std
return _no_grad_uniform_(tensor, -k, k)
def kaiming_normal_(tensor,
a=0,
mode="fan_in",
nonlinearity="leaky_relu",
reverse=False):
"""
Modified tensor inspace using kaiming_normal_
Args:
tensor (paddle.Tensor): paddle Tensor
mode (str): ['fan_in', 'fan_out'], 'fin_in' defalut
nonlinearity (str): nonlinearity method name
reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
Return:
tensor
"""
fan = _calculate_correct_fan(tensor, mode, reverse)
gain = _calculate_gain(nonlinearity, a)
std = gain / math.sqrt(fan)
return _no_grad_normal_(tensor, 0, std)
def linear_init_(module):
bound = 1 / math.sqrt(module.weight.shape[0])
uniform_(module.weight, -bound, bound)
uniform_(module.bias, -bound, bound)
def conv_init_(module):
bound = 1 / np.sqrt(np.prod(module.weight.shape[1:]))
uniform_(module.weight, -bound, bound)
if module.bias is not None:
uniform_(module.bias, -bound, bound)
def bias_init_with_prob(prior_prob=0.01):
"""initialize conv/fc bias value according to a given probability value."""
bias_init = float(-np.log((1 - prior_prob) / prior_prob))
return bias_init
@paddle.no_grad()
def reset_initialized_parameter(model, include_self=True):
"""
Reset initialized parameter using following method for [conv, linear, embedding, bn]
Args:
model (paddle.Layer): paddle Layer
include_self (bool: False): include_self for Layer.named_sublayers method. Indicate whether including itself
Return:
None
"""
for _, m in model.named_sublayers(include_self=include_self):
if isinstance(m, nn.Conv2D):
k = float(m._groups) / (m._in_channels * m._kernel_size[0] *
m._kernel_size[1])
k = math.sqrt(k)
_no_grad_uniform_(m.weight, -k, k)
if hasattr(m, "bias") and getattr(m, "bias") is not None:
_no_grad_uniform_(m.bias, -k, k)
elif isinstance(m, nn.Linear):
k = math.sqrt(1.0 / m.weight.shape[0])
_no_grad_uniform_(m.weight, -k, k)
if hasattr(m, "bias") and getattr(m, "bias") is not None:
_no_grad_uniform_(m.bias, -k, k)
elif isinstance(m, nn.Embedding):
_no_grad_normal_(m.weight, mean=0.0, std=1.0)
elif isinstance(m, (nn.BatchNorm2D, nn.LayerNorm)):
_no_grad_fill_(m.weight, 1.0)
if hasattr(m, "bias") and getattr(m, "bias") is not None:
_no_grad_fill_(m.bias, 0)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册