Upgrade waveflow api to 1.8.2
This commit is contained in:
parent
b7c584e2f7
commit
33ed693ccf
|
@ -40,7 +40,7 @@ sudo apt-get install libsndfile1
|
|||
|
||||
### Install PaddlePaddle
|
||||
|
||||
See [install](https://www.paddlepaddle.org.cn/install/quick) for more details. This repo requires PaddlePaddle **1.8.0** or above.
|
||||
See [install](https://www.paddlepaddle.org.cn/install/quick) for more details. This repo requires PaddlePaddle **1.8.2** or above.
|
||||
|
||||
### Install Parakeet
|
||||
|
||||
|
@ -177,7 +177,7 @@ We provide the model checkpoints of WaveFlow with 64 and 128 residual channels,
|
|||
We also provide checkpoints for different end-to-end TTS models, and present the synthesized audio examples for some randomly chosen famous quotes. The corresponding texts are displayed as follows.
|
||||
|
||||
||Text | From |
|
||||
|:-:|:-- | :--: |
|
||||
|:-:|:-- | :--: |
|
||||
0|*Life was like a box of chocolates, you never know what you're gonna get.* | *Forrest Gump* |
|
||||
1|*With great power there must come great responsibility.* | *Spider-Man*|
|
||||
2|*To be or not to be, that’s a question.*|*Hamlet*|
|
||||
|
@ -232,7 +232,7 @@ Users have the option to use different vocoders to convert the linear/mel spectr
|
|||
<a href="https://paddlespeech.bj.bcebos.com/Parakeet/transformer_tts_ljspeech_griffin-lim_samples_1.0/step_120000_sentence_3.wav">
|
||||
<img src="images/audio_icon.png" width=250 /></a><br>
|
||||
<a href="https://paddlespeech.bj.bcebos.com/Parakeet/transformer_tts_ljspeech_griffin-lim_samples_1.0/step_120000_sentence_4.wav">
|
||||
<img src="images/audio_icon.png" width=250 /></a>
|
||||
<img src="images/audio_icon.png" width=250 /></a>
|
||||
</th>
|
||||
<th >
|
||||
<a href="https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech_ljspeech_griffin-lim_samples_1.0/step_130000_sentence_0.wav">
|
||||
|
@ -244,7 +244,7 @@ Users have the option to use different vocoders to convert the linear/mel spectr
|
|||
<a href="https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech_ljspeech_griffin-lim_samples_1.0/step_130000_sentence_3.wav">
|
||||
<img src="images/audio_icon.png" width=250 /></a><br>
|
||||
<a href="https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech_ljspeech_griffin-lim_samples_1.0/step_130000_sentence_4.wav">
|
||||
<img src="images/audio_icon.png" width=250 /></a>
|
||||
<img src="images/audio_icon.png" width=250 /></a>
|
||||
</th>
|
||||
</tr>
|
||||
</tbody>
|
||||
|
|
|
@ -79,7 +79,7 @@ class Conditioner(dg.Layer):
|
|||
stride=(1, s),
|
||||
param_attr=param_attr,
|
||||
bias_attr=bias_attr,
|
||||
dtype="float32")
|
||||
dtype=dtype)
|
||||
self.upsample_conv2d.append(conv_trans2d)
|
||||
|
||||
for i, layer in enumerate(self.upsample_conv2d):
|
||||
|
@ -88,12 +88,7 @@ class Conditioner(dg.Layer):
|
|||
def forward(self, x):
|
||||
x = fluid.layers.unsqueeze(x, 1)
|
||||
for layer in self.upsample_conv2d:
|
||||
in_dtype = x.dtype
|
||||
if in_dtype == fluid.core.VarDesc.VarType.FP16:
|
||||
x = fluid.layers.cast(x, "float32")
|
||||
x = layer(x)
|
||||
if in_dtype == fluid.core.VarDesc.VarType.FP16:
|
||||
x = fluid.layers.cast(x, "float16")
|
||||
x = fluid.layers.leaky_relu(x, alpha=0.4)
|
||||
|
||||
return fluid.layers.squeeze(x, [1])
|
||||
|
@ -101,12 +96,7 @@ class Conditioner(dg.Layer):
|
|||
def infer(self, x):
|
||||
x = fluid.layers.unsqueeze(x, 1)
|
||||
for layer in self.upsample_conv2d:
|
||||
in_dtype = x.dtype
|
||||
if in_dtype == fluid.core.VarDesc.VarType.FP16:
|
||||
x = fluid.layers.cast(x, "float32")
|
||||
x = layer(x)
|
||||
if in_dtype == fluid.core.VarDesc.VarType.FP16:
|
||||
x = fluid.layers.cast(x, "float16")
|
||||
# Trim conv artifacts.
|
||||
time_cutoff = layer._filter_size[1] - layer._stride[1]
|
||||
x = fluid.layers.leaky_relu(x[:, :, :, :-time_cutoff], alpha=0.4)
|
||||
|
|
Loading…
Reference in New Issue