diff --git a/README.md b/README.md index 2a5f298..b5f61fd 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ sudo apt-get install libsndfile1 ### Install PaddlePaddle -See [install](https://www.paddlepaddle.org.cn/install/quick) for more details. This repo requires PaddlePaddle **1.7.0** or above. +See [install](https://www.paddlepaddle.org.cn/install/quick) for more details. This repo requires PaddlePaddle **1.7.1** or above. ### Install Parakeet @@ -82,7 +82,7 @@ Parakeet also releases some well-trained parameters for the example models, whic - diff --git a/parakeet/models/waveflow/waveflow_modules.py b/parakeet/models/waveflow/waveflow_modules.py index 633f92d..15a7fd2 100644 --- a/parakeet/models/waveflow/waveflow_modules.py +++ b/parakeet/models/waveflow/waveflow_modules.py @@ -96,7 +96,7 @@ class Conditioner(dg.Layer): x = fluid.layers.cast(x, "float16") x = fluid.layers.leaky_relu(x, alpha=0.4) - return fluid.layers.reshape(x, [x.shape[0], x.shape[2], x.shape[3]]) + return fluid.layers.squeeze(x, [1]) def infer(self, x): x = fluid.layers.unsqueeze(x, 1) @@ -111,7 +111,7 @@ class Conditioner(dg.Layer): time_cutoff = layer._filter_size[1] - layer._stride[1] x = fluid.layers.leaky_relu(x[:, :, :, :-time_cutoff], alpha=0.4) - return fluid.layers.reshape(x, [x.shape[0], x.shape[2], x.shape[3]]) + return fluid.layers.squeeze(x, [1]) class Flow(dg.Layer): @@ -220,10 +220,9 @@ class Flow(dg.Layer): # Pad width dim (time): dialated non-causal convolution pad_top, pad_bottom = (self.kernel_h - 1) * dilation_h, 0 pad_left = pad_right = int((self.kernel_w - 1) * dilation_w / 2) - self.in_layers[i].layer._padding = [ - pad_top, pad_bottom, pad_left, pad_right - ] - hidden = self.in_layers[i](audio) + audio_pad = fluid.layers.pad2d( + audio, paddings=[pad_top, pad_bottom, pad_left, pad_right]) + hidden = self.in_layers[i](audio_pad) cond_hidden = self.cond_layers[i](mel) in_acts = hidden + cond_hidden out_acts = fluid.layers.tanh(in_acts[:, :self.n_channels, :]) * \ @@ -268,9 +267,8 @@ class Flow(dg.Layer): pad_top, pad_bottom = 0, 0 pad_left = int((self.kernel_w - 1) * dilation_w / 2) pad_right = int((self.kernel_w - 1) * dilation_w / 2) - self.in_layers[i].layer._padding = [ - pad_top, pad_bottom, pad_left, pad_right - ] + state = fluid.layers.pad2d( + state, paddings=[pad_top, pad_bottom, pad_left, pad_right]) hidden = self.in_layers[i](state) cond_hidden = self.cond_layers[i](mel) in_acts = hidden + cond_hidden @@ -301,6 +299,7 @@ class WaveFlowModule(dg.Layer): Returns: WaveFlowModule """ + def __init__(self, config): super(WaveFlowModule, self).__init__() self.n_flows = config.n_flows @@ -380,8 +379,7 @@ class WaveFlowModule(dg.Layer): mel_slices = [mel[:, :, j, :] for j in self.perms[i]] mel = fluid.layers.stack(mel_slices, axis=2) - z = fluid.layers.reshape( - audio, [audio.shape[0], audio.shape[2], audio.shape[3]]) + z = fluid.layers.squeeze(audio, [1]) return z, log_s_list def synthesize(self, mel, sigma=1.0): @@ -442,8 +440,7 @@ class WaveFlowModule(dg.Layer): audio = fluid.layers.concat(audio_list, axis=2) # audio: [bs, n_group, time/n_group] - audio = fluid.layers.reshape( - audio, [audio.shape[0], audio.shape[2], audio.shape[3]]) + audio = fluid.layers.squeeze(audio, [1]) # audio: [bs, time] audio = fluid.layers.reshape( fluid.layers.transpose(audio, [0, 2, 1]), [audio.shape[0], -1]) diff --git a/parakeet/modules/weight_norm.py b/parakeet/modules/weight_norm.py index 92f1085..27616bf 100644 --- a/parakeet/modules/weight_norm.py +++ b/parakeet/modules/weight_norm.py @@ -65,15 +65,6 @@ def compute_weight(v, g, dim, power): return weight -def assign_by_cast(i, o): - fluid.default_main_program().current_block().append_op( - type="cast", - inputs={"X": i}, - outputs={"Out": o}, - attrs={"in_dtype": i.dtype, - "out_dtype": o.dtype}) - - class WeightNormWrapper(dg.Layer): def __init__(self, layer, param_name="weight", dim=0, power=2): super(WeightNormWrapper, self).__init__() @@ -93,13 +84,13 @@ class WeightNormWrapper(dg.Layer): w_v, self.create_parameter( shape=original_weight.shape, dtype=original_weight.dtype)) - assign_by_cast(original_weight, getattr(self, w_v)) + F.assign(original_weight, getattr(self, w_v)) delattr(layer, param_name) temp = norm_except(getattr(self, w_v), self.dim, self.power) self.add_parameter( w_g, self.create_parameter( shape=temp.shape, dtype=temp.dtype)) - assign_by_cast(temp, getattr(self, w_g)) + F.assign(temp, getattr(self, w_g)) # also set this when setting up setattr(self.layer, self.param_name,
+ WaveFlow @@ -109,7 +109,7 @@ Parakeet also releases some well-trained parameters for the example models, whic
- +