Upgrade to paddle 1.7.1
This commit is contained in:
parent
2664398127
commit
bfbc5e2046
|
@ -40,7 +40,7 @@ sudo apt-get install libsndfile1
|
||||||
|
|
||||||
### Install PaddlePaddle
|
### Install PaddlePaddle
|
||||||
|
|
||||||
See [install](https://www.paddlepaddle.org.cn/install/quick) for more details. This repo requires PaddlePaddle **1.7.0** or above.
|
See [install](https://www.paddlepaddle.org.cn/install/quick) for more details. This repo requires PaddlePaddle **1.7.1** or above.
|
||||||
|
|
||||||
### Install Parakeet
|
### Install Parakeet
|
||||||
|
|
||||||
|
@ -82,7 +82,7 @@ Parakeet also releases some well-trained parameters for the example models, whic
|
||||||
<table>
|
<table>
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
<th style="width: 250px">
|
<th style="width: 250px">
|
||||||
WaveFlow
|
WaveFlow
|
||||||
</th>
|
</th>
|
||||||
<th style="width: 250px">
|
<th style="width: 250px">
|
||||||
|
@ -109,7 +109,7 @@ Parakeet also releases some well-trained parameters for the example models, whic
|
||||||
<a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_samples_1.0/step_500000_sentence_3.wav">
|
<a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_samples_1.0/step_500000_sentence_3.wav">
|
||||||
<img src="images/audio_icon.png" width=250 /></a><br>
|
<img src="images/audio_icon.png" width=250 /></a><br>
|
||||||
<a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_samples_1.0/step_500000_sentence_4.wav">
|
<a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_samples_1.0/step_500000_sentence_4.wav">
|
||||||
<img src="images/audio_icon.png" width=250 /></a>
|
<img src="images/audio_icon.png" width=250 /></a>
|
||||||
</th>
|
</th>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
|
|
|
@ -96,7 +96,7 @@ class Conditioner(dg.Layer):
|
||||||
x = fluid.layers.cast(x, "float16")
|
x = fluid.layers.cast(x, "float16")
|
||||||
x = fluid.layers.leaky_relu(x, alpha=0.4)
|
x = fluid.layers.leaky_relu(x, alpha=0.4)
|
||||||
|
|
||||||
return fluid.layers.reshape(x, [x.shape[0], x.shape[2], x.shape[3]])
|
return fluid.layers.squeeze(x, [1])
|
||||||
|
|
||||||
def infer(self, x):
|
def infer(self, x):
|
||||||
x = fluid.layers.unsqueeze(x, 1)
|
x = fluid.layers.unsqueeze(x, 1)
|
||||||
|
@ -111,7 +111,7 @@ class Conditioner(dg.Layer):
|
||||||
time_cutoff = layer._filter_size[1] - layer._stride[1]
|
time_cutoff = layer._filter_size[1] - layer._stride[1]
|
||||||
x = fluid.layers.leaky_relu(x[:, :, :, :-time_cutoff], alpha=0.4)
|
x = fluid.layers.leaky_relu(x[:, :, :, :-time_cutoff], alpha=0.4)
|
||||||
|
|
||||||
return fluid.layers.reshape(x, [x.shape[0], x.shape[2], x.shape[3]])
|
return fluid.layers.squeeze(x, [1])
|
||||||
|
|
||||||
|
|
||||||
class Flow(dg.Layer):
|
class Flow(dg.Layer):
|
||||||
|
@ -220,10 +220,9 @@ class Flow(dg.Layer):
|
||||||
# Pad width dim (time): dialated non-causal convolution
|
# Pad width dim (time): dialated non-causal convolution
|
||||||
pad_top, pad_bottom = (self.kernel_h - 1) * dilation_h, 0
|
pad_top, pad_bottom = (self.kernel_h - 1) * dilation_h, 0
|
||||||
pad_left = pad_right = int((self.kernel_w - 1) * dilation_w / 2)
|
pad_left = pad_right = int((self.kernel_w - 1) * dilation_w / 2)
|
||||||
self.in_layers[i].layer._padding = [
|
audio_pad = fluid.layers.pad2d(
|
||||||
pad_top, pad_bottom, pad_left, pad_right
|
audio, paddings=[pad_top, pad_bottom, pad_left, pad_right])
|
||||||
]
|
hidden = self.in_layers[i](audio_pad)
|
||||||
hidden = self.in_layers[i](audio)
|
|
||||||
cond_hidden = self.cond_layers[i](mel)
|
cond_hidden = self.cond_layers[i](mel)
|
||||||
in_acts = hidden + cond_hidden
|
in_acts = hidden + cond_hidden
|
||||||
out_acts = fluid.layers.tanh(in_acts[:, :self.n_channels, :]) * \
|
out_acts = fluid.layers.tanh(in_acts[:, :self.n_channels, :]) * \
|
||||||
|
@ -268,9 +267,8 @@ class Flow(dg.Layer):
|
||||||
pad_top, pad_bottom = 0, 0
|
pad_top, pad_bottom = 0, 0
|
||||||
pad_left = int((self.kernel_w - 1) * dilation_w / 2)
|
pad_left = int((self.kernel_w - 1) * dilation_w / 2)
|
||||||
pad_right = int((self.kernel_w - 1) * dilation_w / 2)
|
pad_right = int((self.kernel_w - 1) * dilation_w / 2)
|
||||||
self.in_layers[i].layer._padding = [
|
state = fluid.layers.pad2d(
|
||||||
pad_top, pad_bottom, pad_left, pad_right
|
state, paddings=[pad_top, pad_bottom, pad_left, pad_right])
|
||||||
]
|
|
||||||
hidden = self.in_layers[i](state)
|
hidden = self.in_layers[i](state)
|
||||||
cond_hidden = self.cond_layers[i](mel)
|
cond_hidden = self.cond_layers[i](mel)
|
||||||
in_acts = hidden + cond_hidden
|
in_acts = hidden + cond_hidden
|
||||||
|
@ -301,6 +299,7 @@ class WaveFlowModule(dg.Layer):
|
||||||
Returns:
|
Returns:
|
||||||
WaveFlowModule
|
WaveFlowModule
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super(WaveFlowModule, self).__init__()
|
super(WaveFlowModule, self).__init__()
|
||||||
self.n_flows = config.n_flows
|
self.n_flows = config.n_flows
|
||||||
|
@ -380,8 +379,7 @@ class WaveFlowModule(dg.Layer):
|
||||||
mel_slices = [mel[:, :, j, :] for j in self.perms[i]]
|
mel_slices = [mel[:, :, j, :] for j in self.perms[i]]
|
||||||
mel = fluid.layers.stack(mel_slices, axis=2)
|
mel = fluid.layers.stack(mel_slices, axis=2)
|
||||||
|
|
||||||
z = fluid.layers.reshape(
|
z = fluid.layers.squeeze(audio, [1])
|
||||||
audio, [audio.shape[0], audio.shape[2], audio.shape[3]])
|
|
||||||
return z, log_s_list
|
return z, log_s_list
|
||||||
|
|
||||||
def synthesize(self, mel, sigma=1.0):
|
def synthesize(self, mel, sigma=1.0):
|
||||||
|
@ -442,8 +440,7 @@ class WaveFlowModule(dg.Layer):
|
||||||
audio = fluid.layers.concat(audio_list, axis=2)
|
audio = fluid.layers.concat(audio_list, axis=2)
|
||||||
|
|
||||||
# audio: [bs, n_group, time/n_group]
|
# audio: [bs, n_group, time/n_group]
|
||||||
audio = fluid.layers.reshape(
|
audio = fluid.layers.squeeze(audio, [1])
|
||||||
audio, [audio.shape[0], audio.shape[2], audio.shape[3]])
|
|
||||||
# audio: [bs, time]
|
# audio: [bs, time]
|
||||||
audio = fluid.layers.reshape(
|
audio = fluid.layers.reshape(
|
||||||
fluid.layers.transpose(audio, [0, 2, 1]), [audio.shape[0], -1])
|
fluid.layers.transpose(audio, [0, 2, 1]), [audio.shape[0], -1])
|
||||||
|
|
|
@ -65,15 +65,6 @@ def compute_weight(v, g, dim, power):
|
||||||
return weight
|
return weight
|
||||||
|
|
||||||
|
|
||||||
def assign_by_cast(i, o):
|
|
||||||
fluid.default_main_program().current_block().append_op(
|
|
||||||
type="cast",
|
|
||||||
inputs={"X": i},
|
|
||||||
outputs={"Out": o},
|
|
||||||
attrs={"in_dtype": i.dtype,
|
|
||||||
"out_dtype": o.dtype})
|
|
||||||
|
|
||||||
|
|
||||||
class WeightNormWrapper(dg.Layer):
|
class WeightNormWrapper(dg.Layer):
|
||||||
def __init__(self, layer, param_name="weight", dim=0, power=2):
|
def __init__(self, layer, param_name="weight", dim=0, power=2):
|
||||||
super(WeightNormWrapper, self).__init__()
|
super(WeightNormWrapper, self).__init__()
|
||||||
|
@ -93,13 +84,13 @@ class WeightNormWrapper(dg.Layer):
|
||||||
w_v,
|
w_v,
|
||||||
self.create_parameter(
|
self.create_parameter(
|
||||||
shape=original_weight.shape, dtype=original_weight.dtype))
|
shape=original_weight.shape, dtype=original_weight.dtype))
|
||||||
assign_by_cast(original_weight, getattr(self, w_v))
|
F.assign(original_weight, getattr(self, w_v))
|
||||||
delattr(layer, param_name)
|
delattr(layer, param_name)
|
||||||
temp = norm_except(getattr(self, w_v), self.dim, self.power)
|
temp = norm_except(getattr(self, w_v), self.dim, self.power)
|
||||||
self.add_parameter(
|
self.add_parameter(
|
||||||
w_g, self.create_parameter(
|
w_g, self.create_parameter(
|
||||||
shape=temp.shape, dtype=temp.dtype))
|
shape=temp.shape, dtype=temp.dtype))
|
||||||
assign_by_cast(temp, getattr(self, w_g))
|
F.assign(temp, getattr(self, w_g))
|
||||||
|
|
||||||
# also set this when setting up
|
# also set this when setting up
|
||||||
setattr(self.layer, self.param_name,
|
setattr(self.layer, self.param_name,
|
||||||
|
|
Loading…
Reference in New Issue