diff --git a/parakeet/models/clarinet/parallel_wavenet.py b/parakeet/models/clarinet/parallel_wavenet.py new file mode 100644 index 0000000..be30b7b --- /dev/null +++ b/parakeet/models/clarinet/parallel_wavenet.py @@ -0,0 +1,69 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import time +import itertools +import numpy as np + +import paddle.fluid.layers as F +import paddle.fluid.dygraph as dg +import paddle.fluid.initializer as I +import paddle.fluid.layers.distributions as D + +from parakeet.modules.weight_norm import Linear, Conv1D, Conv1DCell, Conv2DTranspose +from parakeet.models.wavenet import WaveNet + + +class ParallelWaveNet(dg.Layer): + def __init__(self, n_loops, n_layers, residual_channels, condition_dim, + filter_size): + super(ParallelWaveNet, self).__init__() + self.flows = dg.LayerList() + for n_loop, n_layer in zip(n_loops, n_layers): + # teacher's log_scale_min does not matter herem, -100 is a dummy value + self.flows.append( + WaveNet(n_loop, n_layer, residual_channels, 3, condition_dim, + filter_size, "mog", -100.0)) + + def forward(self, z, condition=None): + """Inverse Autoregressive Flow. Several wavenets. + + Arguments: + z {Variable} -- shape(batch_size, time_steps), hidden variable, sampled from a standard normal distribution. + + Keyword Arguments: + condition {Variable} -- shape(batch_size, condition_dim, time_steps), condition, basically upsampled mel spectrogram. (default: {None}) + + Returns: + Variable -- shape(batch_size, time_steps), transformed z. + Variable -- shape(batch_size, time_steps), output distribution's mu. + Variable -- shape(batch_size, time_steps), output distribution's log_std. + """ + + for i, flow in enumerate(self.flows): + theta = flow(z, condition) # w, mu, log_std [0: T] + w, mu, log_std = F.split(theta, 3, dim=-1) # (B, T, 1) for each + mu = F.squeeze(mu, [-1]) #[0: T] + log_std = F.squeeze(log_std, [-1]) #[0: T] + z = z * F.exp(log_std) + mu #[0: T] + + if i == 0: + out_mu = mu + out_log_std = log_std + else: + out_mu = out_mu * F.exp(log_std) + mu + out_log_std += log_std + + return z, out_mu, out_log_std diff --git a/parakeet/models/wavenet/net.py b/parakeet/models/wavenet/net.py index 7bbc67a..72b9ad5 100644 --- a/parakeet/models/wavenet/net.py +++ b/parakeet/models/wavenet/net.py @@ -57,7 +57,7 @@ class UpsampleNet(dg.Layer): """ def __init__(self, upscale_factors=[16, 16]): - super().__init__() + super(UpsampleNet, self).__init__() self.upscale_factors = list(upscale_factors) self.upsample_convs = dg.LayerList() for i, factor in enumerate(upscale_factors): @@ -92,7 +92,7 @@ class UpsampleNet(dg.Layer): # AutoRegressive Model class ConditionalWavenet(dg.Layer): def __init__(self, encoder: UpsampleNet, decoder: WaveNet): - super().__init__() + super(ConditionalWavenet, self).__init__() self.encoder = encoder self.decoder = decoder diff --git a/parakeet/models/wavenet/wavenet.py b/parakeet/models/wavenet/wavenet.py index 289efe7..4c355f4 100644 --- a/parakeet/models/wavenet/wavenet.py +++ b/parakeet/models/wavenet/wavenet.py @@ -39,7 +39,7 @@ def dequantize(quantized, n_bands): class ResidualBlock(dg.Layer): def __init__(self, residual_channels, condition_dim, filter_size, dilation): - super().__init__() + super(ResidualBlock, self).__init__() dilated_channels = 2 * residual_channels # following clarinet's implementation, we do not have parametric residual # & skip connection. @@ -135,7 +135,7 @@ class ResidualBlock(dg.Layer): class ResidualNet(dg.Layer): def __init__(self, n_loop, n_layer, residual_channels, condition_dim, filter_size): - super().__init__() + super(ResidualNet, self).__init__() # double the dilation at each layer in a loop(n_loop layers) dilations = [2**i for i in range(n_loop)] * n_layer self.context_size = 1 + sum(dilations) @@ -198,7 +198,7 @@ class ResidualNet(dg.Layer): class WaveNet(dg.Layer): def __init__(self, n_loop, n_layer, residual_channels, output_dim, condition_dim, filter_size, loss_type, log_scale_min): - super().__init__() + super(WaveNet, self).__init__() if loss_type not in ["softmax", "mog"]: raise ValueError("loss_type {} is not supported".format(loss_type)) if loss_type == "softmax":