parallel_wavenet.py 3.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
from __future__ import division
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
import math
import time
import itertools
import numpy as np

import paddle.fluid.layers as F
import paddle.fluid.dygraph as dg
import paddle.fluid.initializer as I
import paddle.fluid.layers.distributions as D

from parakeet.modules.weight_norm import Linear, Conv1D, Conv1DCell, Conv2DTranspose
from parakeet.models.wavenet import WaveNet


class ParallelWaveNet(dg.Layer):
    def __init__(self, n_loops, n_layers, residual_channels, condition_dim,
                 filter_size):
33 34 35 36 37 38 39 40 41
        """ParallelWaveNet, an inverse autoregressive flow model, it contains several flows(WaveNets).

        Args:
            n_loops (List[int]): `n_loop` for each flow.
            n_layers (List[int]): `n_layer` for each flow.
            residual_channels (int): `residual_channels` for every flow.
            condition_dim (int): `condition_dim` for every flow.
            filter_size (int): `filter_size` for every flow.
        """
42 43 44 45 46 47 48 49 50
        super(ParallelWaveNet, self).__init__()
        self.flows = dg.LayerList()
        for n_loop, n_layer in zip(n_loops, n_layers):
            # teacher's log_scale_min does not matter herem, -100 is a dummy value
            self.flows.append(
                WaveNet(n_loop, n_layer, residual_channels, 3, condition_dim,
                        filter_size, "mog", -100.0))

    def forward(self, z, condition=None):
51 52 53 54
        """Transform a random noise sampled from a standard Gaussian distribution into sample from the target distribution. And output the mean and log standard deviation of the output distribution.

        Args:
            z (Variable): shape(B, T), random noise sampled from a standard gaussian disribution.
C
chenfeiyu 已提交
55
            condition (Variable, optional): shape(B, F, T), dtype float, the upsampled condition. Defaults to None.
56

57
        Returns:
58
            (z, out_mu, out_log_std)
C
chenfeiyu 已提交
59 60 61
            z (Variable): shape(B, T), dtype float, transformed noise, it is the synthesized waveform.
            out_mu (Variable): shape(B, T), dtype float, means of the output distributions.
            out_log_std (Variable): shape(B, T), dtype float, log standard deviations of the output distributions.
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
        """
        for i, flow in enumerate(self.flows):
            theta = flow(z, condition)  # w, mu, log_std [0: T]
            w, mu, log_std = F.split(theta, 3, dim=-1)  # (B, T, 1) for each
            mu = F.squeeze(mu, [-1])  #[0: T]
            log_std = F.squeeze(log_std, [-1])  #[0: T]
            z = z * F.exp(log_std) + mu  #[0: T]

            if i == 0:
                out_mu = mu
                out_log_std = log_std
            else:
                out_mu = out_mu * F.exp(log_std) + mu
                out_log_std += log_std

        return z, out_mu, out_log_std