From 23095bf99297102dbb12f0d4222fad649894c84d Mon Sep 17 00:00:00 2001
From: liuyibing01 <liuyibing01@baidu.com>
Date: Wed, 25 Mar 2020 14:48:54 +0000
Subject: [PATCH 1/2] Release waveflow & wavenet ckpts and audio samples

---
 README.md | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 81 insertions(+), 7 deletions(-)
diff --git a/README.md b/README.md
index b5f61fd..aacf6c3 100644
--- a/README.md
+++ b/README.md
@@ -76,29 +76,61 @@ Entries to the introduction, and the launch of training and synthsis for differe
 
 Parakeet also releases some well-trained parameters for the example models, which can be accessed in the following tables. Each column of these tables lists resources for one model, including the url link to the pre-trained model, the dataset that the model is trained on and the total training steps, and several synthesized audio samples based on the pre-trained model.
 
-- Vocoders
+#### Vocoders
+
+We provide the model checkpoints of WaveFlow with 64 and 128 residual channels, ClariNet and WaveNet.
 
 <div align="center">
 <table>
     <thead>
         <tr>
             <th  style="width: 250px">
-            WaveFlow
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_ckpt_1.0.zip">WaveFlow (res. channels 64)</a>
             </th>
             <th  style="width: 250px">
-            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_ckpt_1.0.zip">ClariNet</a>
+            WaveFlow (res. channels 128)
             </th>
         </tr>
     </thead>
     <tbody>
         <tr>
-            <th>LJSpeech, 2M</th>
-            <th>LJSpeech, 500K</th>
+            <th>LJSpeech, 3020 K</th>
+            <th>LJSpeech </th>
         </tr>
         <tr>
+            <th>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_samples_1.0/step_3020k_sentence_0.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_samples_1.0/step_3020k_sentence_1.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_samples_1.0/step_3020k_sentence_2.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_samples_1.0/step_3020k_sentence_3.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_res64_ljspeech_samples_1.0/step_3020k_sentence_4.wav">
+            <img src="images/audio_icon.png" width=250 /></a>
+            </th>
             <th>
             To be added soon
             </th>
+        </tr>
+    </tbody>
+    <thead>
+        <tr>
+            <th  style="width: 250px">
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_ckpt_1.0.zip">ClariNet</a>
+            </th>
+            <th  style="width: 250px">
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_ckpt_1.0.zip">WaveNet</a>
+            </th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+            <th>LJSpeech, 500 K</th>
+            <th>LJSpeech, 2450 K</th>
+        </tr>
+        <tr>
             <th>
             <a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_samples_1.0/step_500000_sentence_0.wav">
             <img src="images/audio_icon.png" width=250 /></a><br>
@@ -111,15 +143,57 @@ Parakeet also releases some well-trained parameters for the example models, whic
             <a href="https://paddlespeech.bj.bcebos.com/Parakeet/clarinet_ljspeech_samples_1.0/step_500000_sentence_4.wav">
             <img src="images/audio_icon.png" width=250 /></a>  
             </th>
+            <th>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_samples_1.0/step_2450k_sentence_0.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_samples_1.0/step_2450k_sentence_1.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_samples_1.0/step_2450k_sentence_2.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_samples_1.0/step_2450k_sentence_3.wav">
+            <img src="images/audio_icon.png" width=250 /></a><br>
+            <a href="https://paddlespeech.bj.bcebos.com/Parakeet/wavenet_ljspeech_samples_1.0/step_2450k_sentence_4.wav">
+            <img src="images/audio_icon.png" width=250 /></a>  
+            </th>
         </tr>
     </tbody>
 </table>
 </div>
 
 
-&nbsp;&nbsp;&nbsp;&nbsp;**Note:** The input mel spectrogams are from validation dataset, which are not seen during training.
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**Note:** The input mel spectrogams are from validation dataset, which are not seen during training.
 
-- TTS models
+#### TTS models
+
+<div align="center">
+<table>
+    <thead>
+        <tr>
+            <th  style="width: 250px">
+            Deep Voice 3
+            </th>
+            <th  style="width: 250px">
+            Transformer TTS
+            </th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+            <th>LJSpeech </th>
+            <th>LJSpeech </th>
+        </tr>
+        <tr>
+            <th style="height: 150px">
+            To be added soon
+            </th>
+            <th >
+            To be added soon
+            </th>
+        </tr>
+    </tbody>
+    <thead>
+</table>
+</div>
 
 Click each link to download, then you can get the compressed package which contains the pre-trained model and the `yaml` config describing how to train the model.
 

From c845fbd51d734cf6f47ad73cd233470dc3a01fc3 Mon Sep 17 00:00:00 2001
From: chenfeiyu <chenfeiyu@baidu.com>
Date: Thu, 26 Mar 2020 09:48:47 +0800
Subject: [PATCH 2/2] change interface for io.py

---
 parakeet/utils/io.py | 108 +++++++++++++++++++++++++------------------
 1 file changed, 63 insertions(+), 45 deletions(-)

diff --git a/parakeet/utils/io.py b/parakeet/utils/io.py
index e612400..e9e1240 100644
--- a/parakeet/utils/io.py
+++ b/parakeet/utils/io.py
@@ -20,6 +20,11 @@ import numpy as np
 import paddle.fluid.dygraph as dg
 
 
+def is_main_process():
+    local_rank = dg.parallel.Env().local_rank
+    return local_rank == 0
+
+
 def add_yaml_config_to_args(config):
     """ Add args in yaml config to the args parsed by argparse. The argument in 
         yaml config will be overwritten by the same argument in argparse if they 
@@ -41,7 +46,7 @@ def add_yaml_config_to_args(config):
     return config
 
 
-def load_latest_checkpoint(checkpoint_dir, rank=0):
+def _load_latest_checkpoint(checkpoint_dir):
     """Get the iteration number corresponding to the latest saved checkpoint
 
     Args:
@@ -52,26 +57,20 @@ def load_latest_checkpoint(checkpoint_dir, rank=0):
     Returns:
         int: the latest iteration number.
     """
-    checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")
+    checkpoint_record = os.path.join(checkpoint_dir, "checkpoint")
     # Create checkpoint index file if not exist.
-    if (not os.path.isfile(checkpoint_path)) and rank == 0:
-        with open(checkpoint_path, "w") as handle:
-            handle.write("model_checkpoint_path: step-0")
-
-    # Make sure that other process waits until checkpoint file is created
-    # by process 0.
-    while not os.path.isfile(checkpoint_path):
-        time.sleep(1)
+    if (not os.path.isfile(checkpoint_record)):
+        return 0
 
     # Fetch the latest checkpoint index.
-    with open(checkpoint_path, "r") as handle:
+    with open(checkpoint_record, "r") as handle:
         latest_checkpoint = handle.readline().split()[-1]
         iteration = int(latest_checkpoint.split("-")[-1])
 
     return iteration
 
 
-def save_latest_checkpoint(checkpoint_dir, iteration):
+def _save_checkpoint(checkpoint_dir, iteration):
     """Save the iteration number of the latest model to be checkpointed.
 
     Args:
@@ -81,60 +80,76 @@ def save_latest_checkpoint(checkpoint_dir, iteration):
     Returns:
         None
     """
-    checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")
+    checkpoint_record = os.path.join(checkpoint_dir, "checkpoint")
     # Update the latest checkpoint index.
-    with open(checkpoint_path, "w") as handle:
+    with open(checkpoint_record, "w") as handle:
         handle.write("model_checkpoint_path: step-{}".format(iteration))
 
 
-def load_parameters(checkpoint_dir,
-                    rank,
-                    model,
+def load_parameters(model,
                     optimizer=None,
+                    checkpoint_dir=None,
                     iteration=None,
-                    file_path=None,
+                    checkpoint_path=None,
                     dtype="float32"):
     """Load a specific model checkpoint from disk.
 
     Args:
-        checkpoint_dir (str): the directory where checkpoint is saved.
-        rank (int): the rank of the process in multi-process setting.
         model (obj): model to load parameters.
         optimizer (obj, optional): optimizer to load states if needed.
             Defaults to None.
+        checkpoint_dir (str, optional): the directory where checkpoint is saved.
         iteration (int, optional): if specified, load the specific checkpoint,
             if not specified, load the latest one. Defaults to None.
-        file_path (str, optional): if specified, load the checkpoint
-            stored in the file_path. Defaults to None.
+        checkpoint_path (str, optional): if specified, load the checkpoint
+            stored in the checkpoint_path. Defaults to None. 
         dtype (str, optional): precision of the model parameters.
             Defaults to float32.
 
     Returns:
-        None
+        iteration (int): number of iterations that the loaded checkpoint has 
+            been trained.
     """
-    if file_path is None:
-        if iteration is None:
-            iteration = load_latest_checkpoint(checkpoint_dir, rank)
-        if iteration == 0:
-            return
-        file_path = "{}/step-{}".format(checkpoint_dir, iteration)
+    if checkpoint_dir is not None and checkpoint_path is not None:
+        raise ValueError(
+            "Load from either from (checkpoint_dir and iteration) \n"
+            "or checkpoint_path. Do not pass both.")
+    if iteration is not None and checkpoint_dir is None:
+        raise ValueError(
+            "When iteration is specified, checkpoint_dir should not be None")
+
+    if checkpoint_dir is not None:
+        if iteration is None:
+            iteration = _load_latest_checkpoint(checkpoint_dir)
+        checkpoint_path = os.path.join(checkpoint_dir,
+                                       "step-{}".format(iteration))
+        if iteration == 0 and not os.path.exists(checkpoint_path):
+            # if step-0 exist, it is also loaded
+            return iteration
+    else:
+        # checkpoint is not None
+        iteration = int(os.path.basename(checkpoint_path).split("-")[-1])
+
+    local_rank = dg.parallel.Env().local_rank
+    model_dict, optimizer_dict = dg.load_dygraph(checkpoint_path)
+
+    # cast to desired data type
+    for k, v in model_dict.items():
+        model_dict[k] = v.astype(dtype)
 
-    model_dict, optimizer_dict = dg.load_dygraph(file_path)
-    if dtype == "float16":
-        for k, v in model_dict.items():
-            if "conv2d_transpose" in k:
-                model_dict[k] = v.astype("float32")
-            else:
-                model_dict[k] = v.astype(dtype)
     model.set_dict(model_dict)
-    print("[checkpoint] Rank {}: loaded model from {}".format(rank, file_path))
+    print("[checkpoint] Rank {}: loaded model from {}.pdparams".format(
+        local_rank, checkpoint_path))
+
     if optimizer and optimizer_dict:
         optimizer.set_dict(optimizer_dict)
-        print("[checkpoint] Rank {}: loaded optimizer state from {}".format(
-            rank, file_path))
+        print("[checkpoint] Rank {}: loaded optimizer state from {}.pdopt".
+              format(local_rank, checkpoint_path))
+
+    return iteration
 
 
-def save_latest_parameters(checkpoint_dir, iteration, model, optimizer=None):
+def save_parameters(checkpoint_dir, iteration, model, optimizer=None):
     """Checkpoint the latest trained model parameters.
 
     Args:
@@ -147,12 +162,15 @@ def save_latest_parameters(checkpoint_dir, iteration, model, optimizer=None):
     Returns:
         None
     """
-    file_path = "{}/step-{}".format(checkpoint_dir, iteration)
+    checkpoint_path = os.path.join(checkpoint_dir, "step-{}".format(iteration))
     model_dict = model.state_dict()
-    dg.save_dygraph(model_dict, file_path)
-    print("[checkpoint] Saved model to {}".format(file_path))
+    dg.save_dygraph(model_dict, checkpoint_path)
+    print("[checkpoint] Saved model to {}.pdparams".format(checkpoint_path))
 
     if optimizer:
         opt_dict = optimizer.state_dict()
-        dg.save_dygraph(opt_dict, file_path)
-        print("[checkpoint] Saved optimzier state to {}".format(file_path))
+        dg.save_dygraph(opt_dict, checkpoint_path)
+        print("[checkpoint] Saved optimzier state to {}.pdopt".format(
+            checkpoint_path))
+
+    _save_checkpoint(checkpoint_dir, iteration)

- WaveFlow + WaveFlow (res. channels 64)	- ClariNet + WaveFlow (res. channels 128)
LJSpeech, 2M	LJSpeech, 500K	LJSpeech, 3020 K	LJSpeech
+ + + + + + + + + + +	To be added soon
+ ClariNet +	+ WaveNet +
LJSpeech, 500 K	LJSpeech, 2450 K
@@ -111,15 +143,57 @@ Parakeet also releases some well-trained parameters for the example models, whic	+ + + + + + + + + + +