From 33ed693ccff88935a62739d564c67efa8a1a4a5f Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Fri, 12 Jun 2020 08:45:55 +0000
Subject: [PATCH] Upgrade waveflow api to 1.8.2

---
 README.md                                    |  8 ++++----
 parakeet/models/waveflow/waveflow_modules.py | 12 +-----------
 setup.py                                     |  2 +-
 3 files changed, 6 insertions(+), 16 deletions(-)
diff --git a/README.md b/README.md
index 4e84664..812c8a6 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,7 @@ sudo apt-get install libsndfile1
 
 ### Install PaddlePaddle
 
-See [install](https://www.paddlepaddle.org.cn/install/quick) for more details. This repo requires PaddlePaddle **1.8.0** or above.
+See [install](https://www.paddlepaddle.org.cn/install/quick) for more details. This repo requires PaddlePaddle **1.8.2** or above.
 
 ### Install Parakeet
 
@@ -177,7 +177,7 @@ We provide the model checkpoints of WaveFlow with 64 and 128 residual channels,
 We also provide checkpoints for different end-to-end TTS models, and present the synthesized audio examples for some randomly chosen famous quotes. The corresponding texts are displayed as follows.
 
 ||Text | From |
-|:-:|:-- | :--: | 
+|:-:|:-- | :--: |
 0|*Life was like a box of chocolates, you never know what you're gonna get.* | *Forrest Gump* |  
 1|*With great power there must come great responsibility.* | *Spider-Man*|
 2|*To be or not to be, that’s a question.*|*Hamlet*|
@@ -232,7 +232,7 @@ Users have the option to use different vocoders to convert the linear/mel spectr
             <a href="https://paddlespeech.bj.bcebos.com/Parakeet/transformer_tts_ljspeech_griffin-lim_samples_1.0/step_120000_sentence_3.wav">
             <img src="images/audio_icon.png" width=250 /></a><br>
             <a href="https://paddlespeech.bj.bcebos.com/Parakeet/transformer_tts_ljspeech_griffin-lim_samples_1.0/step_120000_sentence_4.wav">
-            <img src="images/audio_icon.png" width=250 /></a> 
+            <img src="images/audio_icon.png" width=250 /></a>
             </th>
             <th >
             <a href="https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech_ljspeech_griffin-lim_samples_1.0/step_130000_sentence_0.wav">
@@ -244,7 +244,7 @@ Users have the option to use different vocoders to convert the linear/mel spectr
             <a href="https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech_ljspeech_griffin-lim_samples_1.0/step_130000_sentence_3.wav">
             <img src="images/audio_icon.png" width=250 /></a><br>
             <a href="https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech_ljspeech_griffin-lim_samples_1.0/step_130000_sentence_4.wav">
-            <img src="images/audio_icon.png" width=250 /></a> 
+            <img src="images/audio_icon.png" width=250 /></a>
             </th>
         </tr>
     </tbody>
diff --git a/parakeet/models/waveflow/waveflow_modules.py b/parakeet/models/waveflow/waveflow_modules.py
index 03f873b..31b29dc 100644
--- a/parakeet/models/waveflow/waveflow_modules.py
+++ b/parakeet/models/waveflow/waveflow_modules.py
@@ -79,7 +79,7 @@ class Conditioner(dg.Layer):
                 stride=(1, s),
                 param_attr=param_attr,
                 bias_attr=bias_attr,
-                dtype="float32")
+                dtype=dtype)
             self.upsample_conv2d.append(conv_trans2d)
 
         for i, layer in enumerate(self.upsample_conv2d):
@@ -88,12 +88,7 @@ class Conditioner(dg.Layer):
     def forward(self, x):
         x = fluid.layers.unsqueeze(x, 1)
         for layer in self.upsample_conv2d:
-            in_dtype = x.dtype
-            if in_dtype == fluid.core.VarDesc.VarType.FP16:
-                x = fluid.layers.cast(x, "float32")
             x = layer(x)
-            if in_dtype == fluid.core.VarDesc.VarType.FP16:
-                x = fluid.layers.cast(x, "float16")
             x = fluid.layers.leaky_relu(x, alpha=0.4)
 
         return fluid.layers.squeeze(x, [1])
@@ -101,12 +96,7 @@ class Conditioner(dg.Layer):
     def infer(self, x):
         x = fluid.layers.unsqueeze(x, 1)
         for layer in self.upsample_conv2d:
-            in_dtype = x.dtype
-            if in_dtype == fluid.core.VarDesc.VarType.FP16:
-                x = fluid.layers.cast(x, "float32")
             x = layer(x)
-            if in_dtype == fluid.core.VarDesc.VarType.FP16:
-                x = fluid.layers.cast(x, "float16")
             # Trim conv artifacts.
             time_cutoff = layer._filter_size[1] - layer._stride[1]
             x = fluid.layers.leaky_relu(x[:, :, :, :-time_cutoff], alpha=0.4)
diff --git a/setup.py b/setup.py
index 244eef8..693534b 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@ setup_info = dict(
         'inflect',
         'librosa',
         'unidecode',
-        'numba==0.48.0',
+        'numba==0.47.0',
         'tqdm==4.19.8',
         'matplotlib',
         'tensorboardX',